## Correlation Prediction
#### Solve correlation prediction with convolutional neural networks(CNN).

In [None]:
# check GPU type.
!nvidia-smi

### Import Packages

In [2]:
_exp_name = "sample"

In [3]:
# Import necessary packages.
import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.models import efficientnet_v2_s
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset
# This is for the progress bar.
from tqdm.auto import tqdm
from sklearn.model_selection import KFold
import random

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
myseed = 6666  # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(myseed)

### Transforms

In [5]:
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble methods
train_tfm = transforms.Compose([
    # Resize the image into a fixed shape (height = width = 128)
    transforms.Resize((128, 128)),
    # You may add some transforms here.
    
    # transforms.RandomResizedCrop((128,128), scale=(0.7,1.0)),
    # transforms.RandomHorizontalFlip(p=0.5),
    # transforms.RandomVerticalFlip(p=0.5),
    # #transforms.RandomRotation(20),
    # transforms.RandomAffine(30),
    #transforms.RandomPerspective(distortion_scale=0.2, p=0.2, fill=0),
    # transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4),
    # transforms.RandomGrayscale(p=0.2),
    #transforms.GaussianBlur(7, sigma=(0.1, 2.0)),
    
    # ToTensor() should be the last one of the transforms.
    transforms.ToTensor(),
    #transforms.Normalize([0.4766, 0.4527, 0.3926], [0.2275, 0.2224, 0.2210]),
])

### Datasets

In [6]:
class CorrDataset1(Dataset):

    def __init__(self,path, csv_path, tfm=test_tfm, files = None):
        super(CorrDataset1).__init__()
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".png")])
        if files != None:
            self.files = files
            
        self.transform = tfm
               
        df = pd.read_csv(csv_path)
        self.label_map = dict(zip(df['id'], df['corr']))

  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname).convert("RGB")
        im = self.transform(im)
        
        if self.label_map is not None:
            try:
                value = float(self.label_map[os.path.splitext(fname)[0].replace('./correlation_assignment/train/', '')])
            except KeyError:
                value = -1.0
        else:
            value = -1.0
            
        return im,value

In [7]:
class CorrDataset2(Dataset):

    def __init__(self,path, csv_path, tfm=test_tfm, files = None):
        super(CorrDataset2).__init__()
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".png")])
        if files != None:
            self.files = files
            
        self.transform = tfm
               
        df = pd.read_csv(csv_path)
        self.label_map = dict(zip(df['id'], df['corr']))

  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname).convert("RGB")
        im = self.transform(im)
        
        if self.label_map is not None:
            try:
                value = float(self.label_map[os.path.splitext(fname)[0].replace('./correlation_assignment/valid/', '')])
            except KeyError:
                value = -1.0
        else:
            value = -1.0
            
        return im,value

### Model

In [8]:
class Regressor(nn.Module):
    def __init__(self):
        super(Regressor, self).__init__()

        # Load pretrained EfficientNetB0
        self.base_model = efficientnet_v2_s(weights = None)

        # Get number of features in last layer
        in_features = self.base_model.classifier[1].in_features

        # Replace the classifier with custom regression head
        self.base_model.classifier = nn.Sequential(
            nn.Linear(in_features, 256),  # First dense layer
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 64),           # Optional second dense layer
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 1),
            nn.Tanh()
        )

    def forward(self, x):
        return self.base_model(x)

### Configurations

In [11]:
# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"

# Initialize a model, and put it on the device specified.
model = Regressor().to(device)

# The number of batch size.
batch_size = 128

# The number of training epochs.
n_epochs = 100

# If no improvement in 'patience' epochs, early stop.
patience = 20

# For the classification task, we use cross-entropy as the measurement of performance.
criterion = nn.MSELoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5)

# K-fold
k_folds = 5

### Dataloader

In [10]:
# Construct train and valid datasets.
# The argument "loader" tells how torchvision reads the data.
train_set = CorrDataset1("./correlation_assignment/train", "./correlation_assignment/responses.csv", tfm=train_tfm)
valid_set = CorrDataset2("./correlation_assignment/valid", "./correlation_assignment/responses.csv", tfm=test_tfm)
dataset = ConcatDataset([train_set, valid_set])

kfold = KFold(n_splits=k_folds, shuffle=True)
for fold, (train_ids, test_ids) in enumerate(kfold.split(dataset)):
    train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
    test_subsampler = torch.utils.data.SubsetRandomSampler(test_ids)
    train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_subsampler, shuffle=False, num_workers=0, pin_memory=True)
    valid_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_subsampler, shuffle=False, num_workers=0, pin_memory=True)

### Start Training

In [12]:
# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_loss = float('inf')

for epoch in range(n_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
    model.train()

    # These are used to record information in training.
    train_loss = []

    for batch in tqdm(train_loader):

        # A batch consists of image data and corresponding labels.
        imgs, values = batch

        # Forward the data. (Make sure data and model are on the same device.)
        preds = model(imgs.to(device))
        preds = preds.float()
        values = values.float()

        loss = criterion(preds, values.to(device).view(-1, 1))
        
        # Gradients stored in the parameters in the previous step should be cleared out first.
        optimizer.zero_grad()

        # Compute the gradients for parameters.
        loss.backward()

        # Clip the gradient norms for stable training.
        grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)

        # Update the parameters with computed gradients.
        optimizer.step()

        # Record the loss.
        train_loss.append(loss.item())
        
    train_loss = sum(train_loss) / len(train_loss)

    # Print the information.
    print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.8f}")

    # ---------- Validation ----------
    # Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
    model.eval()

    # These are used to record information in validation.
    valid_losses = []

    # Iterate the validation set by batches.
    for batch in tqdm(valid_loader):

        # A batch consists of image data and corresponding labels.
        imgs, values = batch

        # We don't need gradient in validation.
        # Using torch.no_grad() accelerates the forward process.
        with torch.no_grad():
            preds = model(imgs.to(device))

        # We can still compute the loss (but not the gradient).
        loss = criterion(preds, values.to(device).view(-1, 1))

        # Record the loss.
        valid_losses.append(loss.item())


    # The average loss for entire validation set is the average of the recorded values.
    valid_loss = sum(valid_losses) / len(valid_losses)

    # Print the information.
    print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.8f}")

    # save models
    if valid_loss < best_loss:
        print(f"Best model found at epoch {epoch}, saving model")
        torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
        best_loss = valid_loss
        stale = 0
    else:
        stale += 1
        if stale > patience:
            print(f"No improvment {patience} consecutive epochs, early stopping")
            break

100%|██████████| 250/250 [01:02<00:00,  4.03it/s]


[ Train | 001/100 ] loss = 0.02483268


100%|██████████| 63/63 [00:09<00:00,  6.76it/s]


[ Valid | 001/100 ] loss = 0.00616661
Best model found at epoch 0, saving model


100%|██████████| 250/250 [00:52<00:00,  4.76it/s]


[ Train | 002/100 ] loss = 0.00582825


100%|██████████| 63/63 [00:06<00:00,  9.72it/s]


[ Valid | 002/100 ] loss = 0.00177934
Best model found at epoch 1, saving model


100%|██████████| 250/250 [00:52<00:00,  4.80it/s]


[ Train | 003/100 ] loss = 0.00378802


100%|██████████| 63/63 [00:06<00:00,  9.75it/s]


[ Valid | 003/100 ] loss = 0.00148069
Best model found at epoch 2, saving model


100%|██████████| 250/250 [00:51<00:00,  4.86it/s]


[ Train | 004/100 ] loss = 0.00293090


100%|██████████| 63/63 [00:06<00:00,  9.68it/s]


[ Valid | 004/100 ] loss = 0.00179549


100%|██████████| 250/250 [00:51<00:00,  4.85it/s]


[ Train | 005/100 ] loss = 0.00253347


100%|██████████| 63/63 [00:06<00:00,  9.53it/s]


[ Valid | 005/100 ] loss = 0.00124492
Best model found at epoch 4, saving model


100%|██████████| 250/250 [00:51<00:00,  4.83it/s]


[ Train | 006/100 ] loss = 0.00232684


100%|██████████| 63/63 [00:06<00:00,  9.61it/s]


[ Valid | 006/100 ] loss = 0.00089024
Best model found at epoch 5, saving model


100%|██████████| 250/250 [00:51<00:00,  4.84it/s]


[ Train | 007/100 ] loss = 0.00213943


100%|██████████| 63/63 [00:06<00:00,  9.55it/s]


[ Valid | 007/100 ] loss = 0.00093510


100%|██████████| 250/250 [00:51<00:00,  4.82it/s]


[ Train | 008/100 ] loss = 0.00209976


100%|██████████| 63/63 [00:06<00:00,  9.56it/s]


[ Valid | 008/100 ] loss = 0.00073751
Best model found at epoch 7, saving model


100%|██████████| 250/250 [00:52<00:00,  4.80it/s]


[ Train | 009/100 ] loss = 0.00197168


100%|██████████| 63/63 [00:06<00:00,  9.64it/s]


[ Valid | 009/100 ] loss = 0.00070077
Best model found at epoch 8, saving model


100%|██████████| 250/250 [00:51<00:00,  4.84it/s]


[ Train | 010/100 ] loss = 0.00193767


100%|██████████| 63/63 [00:06<00:00,  9.45it/s]


[ Valid | 010/100 ] loss = 0.00075162


100%|██████████| 250/250 [00:54<00:00,  4.60it/s]


[ Train | 011/100 ] loss = 0.00178100


100%|██████████| 63/63 [00:03<00:00, 16.60it/s]


[ Valid | 011/100 ] loss = 0.00055835
Best model found at epoch 10, saving model


100%|██████████| 250/250 [00:54<00:00,  4.62it/s]


[ Train | 012/100 ] loss = 0.00172099


100%|██████████| 63/63 [00:03<00:00, 15.93it/s]


[ Valid | 012/100 ] loss = 0.00046932
Best model found at epoch 11, saving model


100%|██████████| 250/250 [00:54<00:00,  4.62it/s]


[ Train | 013/100 ] loss = 0.00163855


100%|██████████| 63/63 [00:03<00:00, 16.25it/s]


[ Valid | 013/100 ] loss = 0.00061293


100%|██████████| 250/250 [00:54<00:00,  4.61it/s]


[ Train | 014/100 ] loss = 0.00171983


100%|██████████| 63/63 [00:04<00:00, 15.72it/s]


[ Valid | 014/100 ] loss = 0.00102439


100%|██████████| 250/250 [00:54<00:00,  4.63it/s]


[ Train | 015/100 ] loss = 0.00166544


100%|██████████| 63/63 [00:06<00:00,  9.75it/s]


[ Valid | 015/100 ] loss = 0.00041705
Best model found at epoch 14, saving model


100%|██████████| 250/250 [00:51<00:00,  4.86it/s]


[ Train | 016/100 ] loss = 0.00153341


100%|██████████| 63/63 [00:06<00:00,  9.57it/s]


[ Valid | 016/100 ] loss = 0.00135570


100%|██████████| 250/250 [00:51<00:00,  4.83it/s]


[ Train | 017/100 ] loss = 0.00153457


100%|██████████| 63/63 [00:06<00:00,  9.47it/s]


[ Valid | 017/100 ] loss = 0.00053067


100%|██████████| 250/250 [00:52<00:00,  4.79it/s]


[ Train | 018/100 ] loss = 0.00145933


100%|██████████| 63/63 [00:06<00:00,  9.32it/s]


[ Valid | 018/100 ] loss = 0.00054247


100%|██████████| 250/250 [00:52<00:00,  4.79it/s]


[ Train | 019/100 ] loss = 0.00149922


100%|██████████| 63/63 [00:06<00:00,  9.30it/s]


[ Valid | 019/100 ] loss = 0.00087997


100%|██████████| 250/250 [00:52<00:00,  4.80it/s]


[ Train | 020/100 ] loss = 0.00146010


100%|██████████| 63/63 [00:06<00:00,  9.11it/s]


[ Valid | 020/100 ] loss = 0.00037405
Best model found at epoch 19, saving model


100%|██████████| 250/250 [00:52<00:00,  4.80it/s]


[ Train | 021/100 ] loss = 0.00139091


100%|██████████| 63/63 [00:06<00:00,  9.78it/s]


[ Valid | 021/100 ] loss = 0.00079142


100%|██████████| 250/250 [00:51<00:00,  4.87it/s]


[ Train | 022/100 ] loss = 0.00140865


100%|██████████| 63/63 [00:06<00:00,  9.55it/s]


[ Valid | 022/100 ] loss = 0.00130811


100%|██████████| 250/250 [00:54<00:00,  4.60it/s]


[ Train | 023/100 ] loss = 0.00144435


100%|██████████| 63/63 [00:06<00:00,  9.40it/s]


[ Valid | 023/100 ] loss = 0.00079198


100%|██████████| 250/250 [00:54<00:00,  4.55it/s]


[ Train | 024/100 ] loss = 0.00133628


100%|██████████| 63/63 [00:06<00:00,  9.35it/s]


[ Valid | 024/100 ] loss = 0.00058006


100%|██████████| 250/250 [00:54<00:00,  4.57it/s]


[ Train | 025/100 ] loss = 0.00132348


100%|██████████| 63/63 [00:06<00:00,  9.11it/s]


[ Valid | 025/100 ] loss = 0.00042178


100%|██████████| 250/250 [00:54<00:00,  4.62it/s]


[ Train | 026/100 ] loss = 0.00128405


100%|██████████| 63/63 [00:06<00:00,  9.27it/s]


[ Valid | 026/100 ] loss = 0.00036760
Best model found at epoch 25, saving model


100%|██████████| 250/250 [00:54<00:00,  4.62it/s]


[ Train | 027/100 ] loss = 0.00124479


100%|██████████| 63/63 [00:06<00:00,  9.35it/s]


[ Valid | 027/100 ] loss = 0.00059842


100%|██████████| 250/250 [00:53<00:00,  4.64it/s]


[ Train | 028/100 ] loss = 0.00124226


100%|██████████| 63/63 [00:06<00:00,  9.71it/s]


[ Valid | 028/100 ] loss = 0.00026575
Best model found at epoch 27, saving model


100%|██████████| 250/250 [00:54<00:00,  4.59it/s]


[ Train | 029/100 ] loss = 0.00118909


100%|██████████| 63/63 [00:06<00:00,  9.26it/s]


[ Valid | 029/100 ] loss = 0.00096968


100%|██████████| 250/250 [00:54<00:00,  4.55it/s]


[ Train | 030/100 ] loss = 0.00119820


100%|██████████| 63/63 [00:06<00:00,  9.34it/s]


[ Valid | 030/100 ] loss = 0.00032072


100%|██████████| 250/250 [00:54<00:00,  4.58it/s]


[ Train | 031/100 ] loss = 0.00117312


100%|██████████| 63/63 [00:04<00:00, 15.01it/s]


[ Valid | 031/100 ] loss = 0.00026554
Best model found at epoch 30, saving model


100%|██████████| 250/250 [00:58<00:00,  4.25it/s]


[ Train | 032/100 ] loss = 0.00117678


100%|██████████| 63/63 [00:04<00:00, 15.12it/s]


[ Valid | 032/100 ] loss = 0.00056496


100%|██████████| 250/250 [00:56<00:00,  4.44it/s]


[ Train | 033/100 ] loss = 0.00112518


100%|██████████| 63/63 [00:06<00:00,  9.31it/s]


[ Valid | 033/100 ] loss = 0.00063643


100%|██████████| 250/250 [00:55<00:00,  4.52it/s]


[ Train | 034/100 ] loss = 0.00111793


100%|██████████| 63/63 [00:06<00:00,  9.30it/s]


[ Valid | 034/100 ] loss = 0.00168812


100%|██████████| 250/250 [00:55<00:00,  4.52it/s]


[ Train | 035/100 ] loss = 0.00109281


100%|██████████| 63/63 [00:06<00:00,  9.31it/s]


[ Valid | 035/100 ] loss = 0.00029736


100%|██████████| 250/250 [00:55<00:00,  4.51it/s]


[ Train | 036/100 ] loss = 0.00107129


100%|██████████| 63/63 [00:06<00:00,  9.74it/s]


[ Valid | 036/100 ] loss = 0.00031656


100%|██████████| 250/250 [00:53<00:00,  4.65it/s]


[ Train | 037/100 ] loss = 0.00112077


100%|██████████| 63/63 [00:06<00:00,  9.29it/s]


[ Valid | 037/100 ] loss = 0.00024205
Best model found at epoch 36, saving model


100%|██████████| 250/250 [00:53<00:00,  4.63it/s]


[ Train | 038/100 ] loss = 0.00099878


100%|██████████| 63/63 [00:06<00:00,  9.38it/s]


[ Valid | 038/100 ] loss = 0.00042784


100%|██████████| 250/250 [00:52<00:00,  4.74it/s]


[ Train | 039/100 ] loss = 0.00109285


100%|██████████| 63/63 [00:06<00:00,  9.70it/s]


[ Valid | 039/100 ] loss = 0.00066239


100%|██████████| 250/250 [00:52<00:00,  4.73it/s]


[ Train | 040/100 ] loss = 0.00104038


100%|██████████| 63/63 [00:06<00:00,  9.42it/s]


[ Valid | 040/100 ] loss = 0.00039007


100%|██████████| 250/250 [00:53<00:00,  4.67it/s]


[ Train | 041/100 ] loss = 0.00099417


100%|██████████| 63/63 [00:06<00:00,  9.39it/s]


[ Valid | 041/100 ] loss = 0.00035623


100%|██████████| 250/250 [00:53<00:00,  4.66it/s]


[ Train | 042/100 ] loss = 0.00103502


100%|██████████| 63/63 [00:06<00:00,  9.26it/s]


[ Valid | 042/100 ] loss = 0.00032681


100%|██████████| 250/250 [00:54<00:00,  4.60it/s]


[ Train | 043/100 ] loss = 0.00103994


100%|██████████| 63/63 [00:06<00:00,  9.33it/s]


[ Valid | 043/100 ] loss = 0.00046849


100%|██████████| 250/250 [00:54<00:00,  4.63it/s]


[ Train | 044/100 ] loss = 0.00096080


100%|██████████| 63/63 [00:06<00:00,  9.25it/s]


[ Valid | 044/100 ] loss = 0.00087155


100%|██████████| 250/250 [00:54<00:00,  4.57it/s]


[ Train | 045/100 ] loss = 0.00094847


100%|██████████| 63/63 [00:06<00:00,  9.38it/s]


[ Valid | 045/100 ] loss = 0.00020596
Best model found at epoch 44, saving model


100%|██████████| 250/250 [00:53<00:00,  4.65it/s]


[ Train | 046/100 ] loss = 0.00093527


100%|██████████| 63/63 [00:06<00:00,  9.62it/s]


[ Valid | 046/100 ] loss = 0.00038967


100%|██████████| 250/250 [00:51<00:00,  4.82it/s]


[ Train | 047/100 ] loss = 0.00096908


100%|██████████| 63/63 [00:06<00:00,  9.60it/s]


[ Valid | 047/100 ] loss = 0.00026987


100%|██████████| 250/250 [00:52<00:00,  4.76it/s]


[ Train | 048/100 ] loss = 0.00092973


100%|██████████| 63/63 [00:06<00:00,  9.09it/s]


[ Valid | 048/100 ] loss = 0.00021898


100%|██████████| 250/250 [00:53<00:00,  4.65it/s]


[ Train | 049/100 ] loss = 0.00092639


100%|██████████| 63/63 [00:07<00:00,  8.99it/s]


[ Valid | 049/100 ] loss = 0.00062261


100%|██████████| 250/250 [00:54<00:00,  4.61it/s]


[ Train | 050/100 ] loss = 0.00089071


100%|██████████| 63/63 [00:06<00:00,  9.15it/s]


[ Valid | 050/100 ] loss = 0.00146070


100%|██████████| 250/250 [00:54<00:00,  4.62it/s]


[ Train | 051/100 ] loss = 0.00088286


100%|██████████| 63/63 [00:06<00:00,  9.56it/s]


[ Valid | 051/100 ] loss = 0.00036679


100%|██████████| 250/250 [00:52<00:00,  4.73it/s]


[ Train | 052/100 ] loss = 0.00090603


100%|██████████| 63/63 [00:06<00:00,  9.19it/s]


[ Valid | 052/100 ] loss = 0.00041438


100%|██████████| 250/250 [00:53<00:00,  4.69it/s]


[ Train | 053/100 ] loss = 0.00090471


100%|██████████| 63/63 [00:06<00:00,  9.65it/s]


[ Valid | 053/100 ] loss = 0.00024832


100%|██████████| 250/250 [00:51<00:00,  4.86it/s]


[ Train | 054/100 ] loss = 0.00084840


100%|██████████| 63/63 [00:06<00:00,  9.87it/s]


[ Valid | 054/100 ] loss = 0.00023528


100%|██████████| 250/250 [00:50<00:00,  4.91it/s]


[ Train | 055/100 ] loss = 0.00088942


100%|██████████| 63/63 [00:06<00:00,  9.74it/s]


[ Valid | 055/100 ] loss = 0.00032324


100%|██████████| 250/250 [00:51<00:00,  4.89it/s]


[ Train | 056/100 ] loss = 0.00090620


100%|██████████| 63/63 [00:06<00:00,  9.85it/s]


[ Valid | 056/100 ] loss = 0.00062154


100%|██████████| 250/250 [00:51<00:00,  4.83it/s]


[ Train | 057/100 ] loss = 0.00088301


100%|██████████| 63/63 [00:06<00:00,  9.54it/s]


[ Valid | 057/100 ] loss = 0.00044138


100%|██████████| 250/250 [04:19<00:00,  1.04s/it]


[ Train | 058/100 ] loss = 0.00088732


100%|██████████| 63/63 [00:05<00:00, 12.37it/s]


[ Valid | 058/100 ] loss = 0.00056280


100%|██████████| 250/250 [00:59<00:00,  4.23it/s]


[ Train | 059/100 ] loss = 0.00087677


100%|██████████| 63/63 [00:07<00:00,  8.98it/s]


[ Valid | 059/100 ] loss = 0.00058236


100%|██████████| 250/250 [01:06<00:00,  3.74it/s]


[ Train | 060/100 ] loss = 0.00083743


100%|██████████| 63/63 [00:08<00:00,  7.67it/s]


[ Valid | 060/100 ] loss = 0.00030221


100%|██████████| 250/250 [01:04<00:00,  3.85it/s]


[ Train | 061/100 ] loss = 0.00084016


100%|██████████| 63/63 [00:05<00:00, 11.05it/s]


[ Valid | 061/100 ] loss = 0.00025243


100%|██████████| 250/250 [01:08<00:00,  3.66it/s]


[ Train | 062/100 ] loss = 0.00085957


100%|██████████| 63/63 [00:08<00:00,  7.54it/s]


[ Valid | 062/100 ] loss = 0.00021888


100%|██████████| 250/250 [01:04<00:00,  3.90it/s]


[ Train | 063/100 ] loss = 0.00085126


100%|██████████| 63/63 [00:05<00:00, 10.98it/s]


[ Valid | 063/100 ] loss = 0.00100690


100%|██████████| 250/250 [01:07<00:00,  3.71it/s]


[ Train | 064/100 ] loss = 0.00084053


100%|██████████| 63/63 [00:07<00:00,  8.83it/s]


[ Valid | 064/100 ] loss = 0.00029838


100%|██████████| 250/250 [01:07<00:00,  3.68it/s]


[ Train | 065/100 ] loss = 0.00081455


100%|██████████| 63/63 [00:05<00:00, 11.05it/s]


[ Valid | 065/100 ] loss = 0.00025118


100%|██████████| 250/250 [01:01<00:00,  4.08it/s]


[ Train | 066/100 ] loss = 0.00086776


100%|██████████| 63/63 [00:08<00:00,  7.61it/s]

[ Valid | 066/100 ] loss = 0.00022492
No improvment 20 consecutive epochs, early stopping





### Dataloader for test

In [None]:
class CorrDataset(Dataset):

    def __init__(self,path, csv_path, tfm=test_tfm, files = None):
        super(CorrDataset).__init__()
        self.path = path
        self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".png")])
        if files != None:
            self.files = files
            
        self.transform = tfm
               
        df = pd.read_csv(csv_path)
        self.label_map = dict(zip(df['id'], df['corr']))

  
    def __len__(self):
        return len(self.files)
  
    def __getitem__(self,idx):
        fname = self.files[idx]
        im = Image.open(fname).convert("RGB")
        im = self.transform(im)
        
        if self.label_map is not None:
            try:
                value = float(self.label_map[os.path.splitext(fname)[0].replace('./correlation_assignment/images/', '')])
            except KeyError:
                value = -1.0
        else:
            value = -1.0
            
        return im,value,fname

In [None]:
# Construct test datasets.
# The argument "loader" tells how torchvision reads the data.
test_set = CorrDataset("./correlation_assignment/test", "./correlation_assignment/responses.csv", tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=128, shuffle=False, num_workers=0, pin_memory=True)

### Dirty Data Cleaning

In [None]:
import shutil

model_best = Regressor().to(device)
model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()

loss_fn = nn.L1Loss(reduction='mean')

target_dir = './correlation_assignment/clean'

img_clean = []
with torch.no_grad():
    for batch in tqdm(test_loader):
        imgs, values, fname = batch
        print(fname)
        # print(imgs)

        preds = model(imgs.to(device))
        preds = preds.float()
        values = values.float()
        
        loss = loss_fn(preds, values.to(device))
        print(loss.item())
        if (loss.item() <= 0.3):
            filename = os.path.basename(fname[0])  # Extracts 'image1.jpg'
            target_path = os.path.join(target_dir, filename)
            shutil.copy2(fname[0], target_path)  # copy2 preserves metadata




NameError: name 'Regressor' is not defined

## Testing

In [None]:
model_best = Regressor().to(device)
model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()

test_losses = []

# Iterate the validation set by batches.
for batch in tqdm(valid_loader):

    # A batch consists of image data and corresponding labels.
    imgs, values = batch

    # Using torch.no_grad() accelerates the forward process.
    with torch.no_grad():
        preds = model(imgs.to(device))

    loss = criterion(preds, values.to(device))

    test_losses.append(loss.item())

# The average loss for entire validation set is the average of the recorded values.
test_loss = sum(test_losses) / len(test_losses)

# Print the information.
print(f"[ Test ] loss = {test_loss:.8f}")