# Fixmatch
This algorithm has been used for semi-supervised training data with unlabeled data from Dataset A for tasks like:
- Noise vs Xray segregation
- AP vs ML view classification

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install --quiet optuna

In [3]:
import os
import PIL

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms
from torch.utils.data import DataLoader, Dataset

import optuna
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split as tts

# Data Utilities

In [4]:
class ImageDatasetFromDF(Dataset):
  def __init__(self, root_directory, dataframe, image_file_label, labels, mapping, transforms=None):
    self.df = dataframe.reset_index(drop=True)
    self.dir = root_directory
    self.tfrm = transforms
    self.labels = labels
    self.ifl = image_file_label

    self.class_to_label = mapping

  def __len__(self):
    return len(self.df)

  def __getitem__(self, index):
    filename = os.path.join(str(self.dir), str(self.df.loc[index, self.ifl]))
    labels = torch.tensor(self.class_to_label[self.df.loc[index, self.labels]], dtype=torch.float32)
    image = PIL.Image.open(filename).convert('RGB')
    if self.tfrm:
      image = self.tfrm(image)
    
    return image, labels

class UnlabeledDatasetFromDF(Dataset):
  def __init__(self, root_directory, dataframe, image_file_label, strong_transforms, weak_transforms):
    self.df = dataframe.reset_index(drop=True)
    self.dir = root_directory
    self.wt = weak_transforms
    self.st = strong_transforms
    self.ifl = image_file_label

    self.class_to_label = mapping

  def __len__(self):
    return len(self.df)

  def __getitem__(self, index):
    filename = os.path.join(str(self.dir), str(self.df.loc[index, self.ifl]))
    image = PIL.Image.open(filename).convert('RGB')
    
    return self.st(image), self.wt(image)

class UnlabeledDataset(Dataset):
  def __init__(self, img_dir, strong_transforms, weak_transforms):
    self.dir = img_dir
    self.image_files = [f for f in os.listdir(img_dir) if f.endswith(".png") or f.endswith(".jpg") or f.endswith(".jpeg")]
    self.wt = weak_transforms
    self.st = strong_transforms

  def __len__(self):
    return len(self.image_files)

  def __getitem__(self, index):
    filename = os.path.join(str(self.dir), str(self.image_files[index]))
    image = PIL.Image.open(filename).convert('RGB')
    
    return self.st(image), self.wt(image)

In [5]:
def get_default_device():
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
    
def to_device(data, device):
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

In [6]:
class DeviceDataLoader():
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        for batch in self.dl:
            yield to_device(batch, self.device)

    def __len__(self):
        return len(self.dl)

# FixMatch Model, Training & Evaluation Routine

In [7]:
class MultiClassWideResNet50(nn.Module):
  def __init__(self, num_features,dropout):
    super().__init__()

    self.model = torch.hub.load('pytorch/vision:v0.6.0', 'wide_resnet50_2', pretrained=True)
    self.model.fc = nn.Sequential(
        nn.Dropout(dropout),
        nn.Linear(self.model.fc.in_features, num_features)
        )
    self.sigmoid = nn.Sigmoid()


  def forward(self, x):
    return self.sigmoid(self.model(x))


In [8]:
def interleave(x, size):
    size = int(size)
    s = list(x.shape)
    return x.reshape([-1, size] + s[1:]).transpose(0, 1).reshape([-1] + s[1:])


def de_interleave(x, size):
    size = int(size)
    s = list(x.shape)
    return x.reshape([size, -1] + s[1:]).transpose(0, 1).reshape([-1] + s[1:])

  
def accuracy(output, labels):
  _, preds = torch.max(output, dim=1)
  return torch.tensor(torch.sum(preds == labels).item() / len(preds))

In [9]:
def train(model, optimizer, train_labeled_dl, train_unlabeled_dl, mu, threshold, lmbda, device):
  model.train()

  losses = 0

  labeled_iter = iter(train_labeled_dl)
  unlabed_iter = iter(train_unlabeled_dl)


  for batch_idx in tqdm(range(len(train_labeled_dl))):

    # batch of labeled data
    try:
      train_X_l, train_y_l = labeled_iter.__next__()
    except:
      labeled_iter = iter(train_labeled_dl)
      train_X_l, train_y_l = labeled_iter.__next__()
    
    # batch of unlabeled data
    try:
      train_X_us, train_X_uw = unlabed_iter.__next__()
    except:
      unlabed_iter = iter(train_unlabeled_dl)
      train_X_us, train_X_uw = unlabed_iter.__next__()

    inputs = interleave(
          torch.cat((train_X_l, train_X_us, train_X_uw)), 
          2*mu+1
        )
    
    preds = model(inputs)
    preds = de_interleave(preds, 2*mu+1)
    preds_X_l = preds[:train_X_l.shape[0]]
    preds_X_us, preds_X_uw = preds[train_X_l.shape[0]:].chunk(2)
    del preds

    loss_X_l = F.cross_entropy(preds_X_l, to_device(train_y_l.type(torch.LongTensor),device), reduction="mean")

    pseudo_labels = torch.softmax(preds_X_uw.detach(), dim=-1)
    max_probs, train_y_u = torch.max(pseudo_labels, dim=-1)
    mask = max_probs.ge(threshold).float()


    loss_X_u = (F.cross_entropy(preds_X_us, train_y_u, reduction='none') * mask).mean()

    loss = loss_X_l + loss_X_u * lmbda

    loss.backward()

    optimizer.step()
    optimizer.zero_grad()

    losses += loss.item()

  return losses / len(train_labeled_dl)    
    

@torch.no_grad()
def evaluate(model, val_dl, device):
  pbar = tqdm(range(len(val_dl)))
  losses = 0
  accuracies = 0
  
  model.eval()
  for images, labels in val_dl:
    preds = model(images)
    loss = F.cross_entropy(preds, to_device(labels.type(torch.LongTensor),device))
    losses += loss.item()
    accuracies += accuracy(preds, to_device(labels.type(torch.LongTensor),device))
    pbar.update()

  return losses / len(val_dl), accuracies / len(val_dl)


In [10]:
@torch.no_grad()
def predict(model, images):
  activation = {}
  def get_activation(name):
    def hook(model, input, output):
      output = output.reshape(output.size(0), -1)
      activation[name] = output.cpu().detach().numpy()
    return hook
  model.model.avgpool.register_forward_hook(get_activation('avgpool'))
  
  model.eval()
  output = model(images)
  _, preds = torch.max(output, dim=1)
  return preds.cpu().detach(), activation['avgpool']

# Preparing Labeled and Unlabeled Data

This section needs to be modified for path of labeled and unlabeled data, weak and strong transforms

In [65]:
# loading labeled datasets
sample_idx = 5
root_img_dir =  ""
labeled_df = pd.read_csv(f"/content/drive/MyDrive/Fellows Resources/Data set A/dataset_samples/sample_{sample_idx}.csv")
labeled_df = labeled_df[["image_filepath", "product"]]
labeled_df["product"] = labeled_df["product"].apply(lambda x: "data" if x != "Others" else x)
mapping = {label:idx for idx, label in enumerate(list(labeled_df["product"].value_counts().index))}
print("Classes: ", mapping)

train_X_l, test_X_l, train_y_l, test_y_l = tts(labeled_df["image_filepath"], labeled_df["product"], stratify=labeled_df["product"], test_size=0.2)


mean = (0.5071, 0.4867, 0.4408)
std = (0.2675, 0.2565, 0.2761)
img_size = (128, 128)

transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.Resize(img_size),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3,1,1) if x.shape[0] == 1 else x),
    transforms.Normalize(mean=mean, std=std)
  ])

transform_val = transforms.Compose([
    transforms.Resize(img_size),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3,1,1) if x.shape[0] == 1 else x),
    transforms.Normalize(mean=mean, std=std)
    ])

labeled_train_ds = ImageDatasetFromDF(root_img_dir,pd.concat([train_X_l, train_y_l],axis=1),"image_filepath", "product", mapping, transform_val)
labeled_val_ds= ImageDatasetFromDF(root_img_dir,pd.concat([test_X_l, test_y_l],axis=1),"image_filepath", "product",mapping, transform_val)


# loading unlabeled data
unlabeled_img_dir = "/content/drive/MyDrive/Fellows Resources/Data set A/segregated/xray"
strong_transforms = transforms.Compose([
    transforms.RandomCrop(300, pad_if_needed=True, padding_mode="reflect"),
    transforms.Resize(img_size),
    transforms.ColorJitter(0, 0.6, 0.9, 0.3),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
  ])
weak_transforms = transforms.Compose([
    transforms.RandomCrop(300, pad_if_needed=True, padding_mode="reflect"),
    transforms.Resize(img_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std)
  ])
unlabeled_ds = UnlabeledDataset(unlabeled_img_dir, strong_transforms, weak_transforms) #UnlabeledDatasetFromDF(unlabeled_img_dir, unlabeled_df, "image",strong_transforms, weak_transforms)

Classes:  {'Others': 0, 'data': 1}


# Fixmatch algorithm

In [66]:
def fixmatch(params, epochs, labeled_train_ds, unlabeled_ds, labeled_val_ds = None):
  lr = params["lr"] 
  dropout = params["dropout"]
  mu = params["mu"]
  lmbda = params["lambda"]

  momentum = 0.95
  threshold = 0.99
  BS = 16
  device = get_default_device()

  # dataloaders
  train_labeled_dl = DeviceDataLoader(DataLoader(labeled_train_ds, batch_size=BS,shuffle=True,num_workers=4), device)
  unlabeled_dl = DeviceDataLoader(DataLoader(unlabeled_ds, batch_size=BS*2,shuffle=True,num_workers=4), device)


  if labeled_val_ds:
    val_labeled_dl = DeviceDataLoader(DataLoader(labeled_val_ds, batch_size=BS*2,shuffle=True,num_workers=4), device)
    best_loss = np.inf

  model = to_device(MultiClassWideResNet50(len(mapping), dropout), device)
  opt = optim.SGD(model.parameters(), lr=lr, momentum=momentum)



  for epoch in range(epochs):
    train_loss = train(model, opt, train_labeled_dl, unlabeled_dl, mu, threshold, lmbda, device)

    if labeled_val_ds:
      val_loss, val_acc = evaluate(model, val_labeled_dl, device)
      if val_loss < best_loss: best_loss = val_loss
      print(f"Epoch[{epoch+1}]:: train_loss: {train_loss}, val_loss: {val_loss}, val_acc: {val_acc}")

    else:
      print(f"Epoch[{epoch+1}]:: train_loss: {train_loss}")

  if labeled_val_ds:
    return model, best_loss
  else: 
    return model
  
  

# Hyperparameter Optimizer

In [67]:
def objective(trial):
  params = {
      "lr": trial.suggest_loguniform("lr", 1e-6, 1e-2),
      "dropout": trial.suggest_uniform("dropout", 0.1, 0.7),
      "mu": trial.suggest_float("mu", 0, 1),
      "lambda": trial.suggest_float("lambda", 0, 1)
  }
  _, loss = fixmatch(params, 10, labeled_train_ds, unlabeled_ds, labeled_val_ds)

  return loss


In [68]:
study = optuna.create_study(direction="minimize")
study.optimize(objective,n_trials=5)
best_trial = study.best_trial
params = best_trial.params

[32m[I 2021-02-04 16:59:00,277][0m A new study created in memory with name: no-name-3624a773-55ab-443c-83e8-6ed199b61792[0m
Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.6.0


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[1]:: train_loss: 0.690544273853302, val_loss: 0.6733535081148148, val_acc: 0.6041666865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[2]:: train_loss: 0.6829502439498901, val_loss: 0.6608505696058273, val_acc: 0.7109375


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))












HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[3]:: train_loss: 0.6747012376785279, val_loss: 0.672283723950386, val_acc: 0.6588541865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[4]:: train_loss: 0.6671806526184082, val_loss: 0.6576412618160248, val_acc: 0.6588541865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))













HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[5]:: train_loss: 0.6565085458755493, val_loss: 0.6270231157541275, val_acc: 0.734375


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[6]:: train_loss: 0.64995441198349, val_loss: 0.6595252305269241, val_acc: 0.6588541865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[7]:: train_loss: 0.6464131021499634, val_loss: 0.6520100384950638, val_acc: 0.6588541865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))












HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[8]:: train_loss: 0.6398909449577331, val_loss: 0.6177679300308228, val_acc: 0.734375


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))













HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[9]:: train_loss: 0.6347104954719544, val_loss: 0.69561967253685, val_acc: 0.5078125


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))













HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

[32m[I 2021-02-04 17:02:00,289][0m Trial 0 finished with value: 0.6177679300308228 and parameters: {'lr': 2.8720159047162155e-05, 'dropout': 0.5319329566553338, 'mu': 0.06334434175385129, 'lambda': 0.5994818840123749}. Best is trial 0 with value: 0.6177679300308228.[0m


Epoch[10]:: train_loss: 0.6377123546600342, val_loss: 0.6779174208641052, val_acc: 0.5833333134651184



Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.6.0


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[1]:: train_loss: 0.6963261580467224, val_loss: 0.6812562793493271, val_acc: 0.5130208134651184


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))












HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[2]:: train_loss: 0.6759304547309876, val_loss: 0.6844955384731293, val_acc: 0.4921875


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[3]:: train_loss: 0.6524958419799805, val_loss: 0.6470665633678436, val_acc: 0.6666666865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[4]:: train_loss: 0.6414481782913208, val_loss: 0.6750506907701492, val_acc: 0.5833333134651184


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[5]:: train_loss: 0.6332577919960022, val_loss: 0.6674011200666428, val_acc: 0.5833333134651184


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))












HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[6]:: train_loss: 0.6283681893348694, val_loss: 0.6309536248445511, val_acc: 0.6588541865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))












HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[7]:: train_loss: 0.623597400188446, val_loss: 0.6872300803661346, val_acc: 0.5833333134651184


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))












HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[8]:: train_loss: 0.6155095839500427, val_loss: 0.6445576995611191, val_acc: 0.6588541865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))













HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[9]:: train_loss: 0.607990460395813, val_loss: 0.6320706456899643, val_acc: 0.6588541865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))












HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

[32m[I 2021-02-04 17:05:00,512][0m Trial 1 finished with value: 0.5901577994227409 and parameters: {'lr': 5.789647585271609e-05, 'dropout': 0.28284227774308085, 'mu': 0.6064293737801547, 'lambda': 0.8349240495399686}. Best is trial 1 with value: 0.5901577994227409.[0m


Epoch[10]:: train_loss: 0.6021814608573913, val_loss: 0.5901577994227409, val_acc: 0.734375



Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.6.0


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[1]:: train_loss: 0.6949031615257263, val_loss: 0.6826955676078796, val_acc: 0.5833333134651184


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[2]:: train_loss: 0.6339946913719178, val_loss: 0.7080651372671127, val_acc: 0.5078125


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))












HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[3]:: train_loss: 0.6115635347366333, val_loss: 0.6708458364009857, val_acc: 0.5833333134651184


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[4]:: train_loss: 0.5824772226810455, val_loss: 0.6054089963436127, val_acc: 0.6588541865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[5]:: train_loss: 0.556297744512558, val_loss: 0.5817481353878975, val_acc: 0.7578125


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))












HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[6]:: train_loss: 0.5284537076950073, val_loss: 0.6009363830089569, val_acc: 0.6979166865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))













HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[7]:: train_loss: 0.49265708804130554, val_loss: 0.6380796581506729, val_acc: 0.6067708134651184


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))












HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[8]:: train_loss: 0.45828901886940004, val_loss: 0.6547124981880188, val_acc: 0.6302083134651184


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))












HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[9]:: train_loss: 0.41753769040107724, val_loss: 0.5940995365381241, val_acc: 0.734375


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))










HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

[32m[I 2021-02-04 17:08:02,966][0m Trial 2 finished with value: 0.5817481353878975 and parameters: {'lr': 0.0003649734801249711, 'dropout': 0.2660674339332634, 'mu': 0.20964634526667902, 'lambda': 0.7051856654665141}. Best is trial 2 with value: 0.5817481353878975.[0m


Epoch[10]:: train_loss: 0.3890040278434753, val_loss: 0.5829330086708069, val_acc: 0.6744791865348816



Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.6.0


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[1]:: train_loss: 0.653122673034668, val_loss: 0.6412464827299118, val_acc: 0.6588541865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))












HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[2]:: train_loss: 0.5866785621643067, val_loss: 0.6198582202196121, val_acc: 0.6822916865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))












HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[3]:: train_loss: 0.4861744749546051, val_loss: 0.5973839610815048, val_acc: 0.6979166865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))












HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[4]:: train_loss: 0.3863595795631409, val_loss: 0.5271298512816429, val_acc: 0.7734375


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))













HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[5]:: train_loss: 0.33463836431503297, val_loss: 0.649533748626709, val_acc: 0.6145833134651184


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[6]:: train_loss: 0.32187308549880983, val_loss: 0.659199520945549, val_acc: 0.5833333134651184


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))










HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[7]:: train_loss: 0.32099042654037474, val_loss: 0.6187605559825897, val_acc: 0.6666666865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))












HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[8]:: train_loss: 0.32067630290985105, val_loss: 0.5452872291207314, val_acc: 0.7421875


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[9]:: train_loss: 0.3194160628318787, val_loss: 0.6079920679330826, val_acc: 0.6666666865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))













HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

[32m[I 2021-02-04 17:11:06,889][0m Trial 3 finished with value: 0.5271298512816429 and parameters: {'lr': 0.0019178470346239889, 'dropout': 0.6000459439060641, 'mu': 0.8341616715563207, 'lambda': 0.9213171429593524}. Best is trial 3 with value: 0.5271298512816429.[0m


Epoch[10]:: train_loss: 0.31713643312454226, val_loss: 0.5910055637359619, val_acc: 0.6666666865348816



Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.6.0


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[1]:: train_loss: 0.6364621663093567, val_loss: 0.6425023674964905, val_acc: 0.6588541865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[2]:: train_loss: 0.49905635952949523, val_loss: 0.595333606004715, val_acc: 0.6901041865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[3]:: train_loss: 0.3625841200351715, val_loss: 0.6313303709030151, val_acc: 0.6510416865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))












HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[4]:: train_loss: 0.363455308675766, val_loss: 0.6777587532997131, val_acc: 0.5833333134651184


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[5]:: train_loss: 0.38356221675872804, val_loss: 0.547462597489357, val_acc: 0.7421875


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[6]:: train_loss: 0.424215327501297, val_loss: 0.5812375321984291, val_acc: 0.71875


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[7]:: train_loss: 0.45637715101242066, val_loss: 0.637931153178215, val_acc: 0.6666666865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[8]:: train_loss: 0.482531955242157, val_loss: 0.5868079885840416, val_acc: 0.71875


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[9]:: train_loss: 0.46253244042396546, val_loss: 0.5371684804558754, val_acc: 0.7734375


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

[32m[I 2021-02-04 17:14:11,167][0m Trial 4 finished with value: 0.5371684804558754 and parameters: {'lr': 0.006575839965407297, 'dropout': 0.31179086216268825, 'mu': 0.5016658349676054, 'lambda': 0.2277279222530404}. Best is trial 3 with value: 0.5271298512816429.[0m


Epoch[10]:: train_loss: 0.42869691729545595, val_loss: 0.6774204820394516, val_acc: 0.6510416865348816


# Training with Optimal Parameters & Making Predictions

In [69]:
print(f"Optimized Parameters: {params}")
model = fixmatch(params,10, labeled_train_ds, unlabeled_ds)

Optimized Parameters: {'lr': 0.0019178470346239889, 'dropout': 0.6000459439060641, 'mu': 0.8341616715563207, 'lambda': 0.9213171429593524}



Using cache found in /root/.cache/torch/hub/pytorch_vision_v0.6.0


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[1]:: train_loss: 0.6585631489753723, val_loss: 0.6451102942228317, val_acc: 0.6588541865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))













HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[2]:: train_loss: 0.5930120289325714, val_loss: 0.5788783580064774, val_acc: 0.765625


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))












HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[3]:: train_loss: 0.4956603538990021, val_loss: 0.6138445883989334, val_acc: 0.6822916865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[4]:: train_loss: 0.38804282903671267, val_loss: 0.53815708309412, val_acc: 0.7734375


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[5]:: train_loss: 0.3306989634037018, val_loss: 0.6421440243721008, val_acc: 0.6223958134651184


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[6]:: train_loss: 0.3206179678440094, val_loss: 0.600345104932785, val_acc: 0.6666666865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))










HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[7]:: train_loss: 0.3213345217704773, val_loss: 0.5904466509819031, val_acc: 0.6744791865348816


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))













HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[8]:: train_loss: 0.3197435283660889, val_loss: 0.6666418612003326, val_acc: 0.5989583134651184


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))











HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[9]:: train_loss: 0.3189841747283936, val_loss: 0.5601472333073616, val_acc: 0.765625


HBox(children=(FloatProgress(value=0.0, max=25.0), HTML(value='')))













HBox(children=(FloatProgress(value=0.0, max=4.0), HTML(value='')))

Epoch[10]:: train_loss: 0.31930047512054444, val_loss: 0.5887602716684341, val_acc: 0.6744791865348816


In [70]:
img_size = (128, 128)
transform_val = transforms.Compose([
    transforms.Resize(img_size),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3,1,1) if x.shape[0] == 1 else x),
    transforms.Normalize(mean=mean, std=std)
    ])

output_df = []
device =  get_default_device()
img_files = [f for f in os.listdir(unlabeled_img_dir) if f.endswith(".png") or f.endswith(".jpg") or f.endswith(".jpeg")]
for index in tqdm(range(len(img_files))):
  filename = os.path.join(str(unlabeled_img_dir), str(img_files[index]))
  image = transform_val(PIL.Image.open(filename).convert('RGB'))
  image = to_device(image.unsqueeze(0), device)
  label, embedding = predict(model, image)
  output_df.append([str(img_files[index]), label, embedding])

output_df = pd.DataFrame(output_df, columns=["image_filename", "product", "embedding"])

HBox(children=(FloatProgress(value=0.0, max=9859.0), HTML(value='')))





In [71]:
class_to_labels = {idx: label for label, idx in mapping.items()}
output_df["product"] = output_df["product"].apply(lambda x: class_to_labels[x.item()])
output_df["product"].value_counts()

data      8893
Others     966
Name: product, dtype: int64

In [72]:
data_size = output_df["product"].value_counts()["data"] 
save_df = output_df
if data_size > 1000:
  frac_size = (data_size - 1000)/data_size
  save_df = save_df.drop(save_df[save_df['product'] == 'data'].sample(frac=frac_size).index)

In [73]:
save_df = save_df[save_df["product"]=="data"]
save_df[["image_filename", "product"]].to_csv(f"/content/drive/MyDrive/Fellows Resources/Data set A/fixmatch_approach/generated_samples_{sample_idx}.csv", index=False)

# T-SNE Vizualization

In [52]:
!pip install --quiet plotly-express
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
import plotly_express as px

%matplotlib inline

In [53]:
img_repr_matrix = [list(x[0]) for x in output_df['embedding'].values]
tsne = TSNE(n_components=3, verbose=10) # default settings with 3-dimensions
tsne_results_3d = tsne.fit_transform(img_repr_matrix)

[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Indexed 9859 samples in 3.587s...
[t-SNE] Computed neighbors for 9859 samples in 406.415s...
[t-SNE] Computed conditional probabilities for sample 1000 / 9859
[t-SNE] Computed conditional probabilities for sample 2000 / 9859
[t-SNE] Computed conditional probabilities for sample 3000 / 9859
[t-SNE] Computed conditional probabilities for sample 4000 / 9859
[t-SNE] Computed conditional probabilities for sample 5000 / 9859
[t-SNE] Computed conditional probabilities for sample 6000 / 9859
[t-SNE] Computed conditional probabilities for sample 7000 / 9859
[t-SNE] Computed conditional probabilities for sample 8000 / 9859
[t-SNE] Computed conditional probabilities for sample 9000 / 9859
[t-SNE] Computed conditional probabilities for sample 9859 / 9859
[t-SNE] Mean sigma: 3.066956
[t-SNE] Computed conditional probabilities in 0.564s
[t-SNE] Iteration 50: error = 94.5143509, gradient norm = 0.0363030 (50 iterations in 99.160s)
[t-SNE] Iteration 10

In [54]:
output_df['tsne1'] = tsne_results_3d[:,0]
output_df['tsne2'] = tsne_results_3d[:,1]
output_df['tsne3'] = tsne_results_3d[:,2]

In [55]:
px.scatter_3d(output_df, x='tsne1', y='tsne2', z='tsne3', 
                    color='product')

In [None]:
fig.write_html("/content/drive/MyDrive/Fellows Resources/Data set A/FixMatch Approach/tsne.html")

In [None]:
def scale_to_01_range(x):
    # compute the distribution range
    value_range = (np.max(x) - np.min(x))
    # move the distribution so that it starts from zero
    # by extracting the minimal value from all its values

    starts_from_zero = x - np.min(x)
    # make the distribution fit [0; 1] by dividing by its range
    return starts_from_zero / value_range

In [None]:
output_df['tsne1'] = output_df['tsne1'].apply(scale_to_01_range)
output_df['tsne2'] = output_df['tsne2'].apply(scale_to_01_range)
output_df['tsne3'] = output_df['tsne3'].apply(scale_to_01_range)

In [None]:
px.scatter_3d(output_df, x='tsne1', y='tsne2', z='tsne3', 
                    color='product')

In [None]:
fig.write_html("/content/drive/MyDrive/Fellows Resources/Data set A/FixMatch Approach/tsne_scaled.html")