In [6]:
import os
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, random_split, DataLoader
from PIL import Image
import torchvision.models as models
import torchvision.transforms as transforms
from tqdm.notebook import tqdm
import torch.nn.functional as F

# Train Model

In [2]:
path = "C:\\Users\\czahi\\Desktop\\CIS519-Final-Project"

In [3]:
class CUBDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.df = pd.read_csv(csv_file)

        # # for test, only first 200 samples
        # self.df = self.df.head(1000)

        self.transform = transform
        self.root_dir = root_dir
        
    def __len__(self):
        return len(self.df)    
    
    def __getitem__(self, idx):
        row = self.df.loc[idx]
        img_path, img_label = row['image_path'], torch.tensor(row[1:].astype('float32').values)
        img_path = os.path.join(self.root_dir, img_path)
        img_path = img_path.replace("/", "\\")
        img = Image.open(img_path)
        if self.transform:
            img = self.transform(img)
        return img, img_label

In [4]:
def F_score(output, label, threshold=0.5, beta=1):
    prob = output > threshold
    label = label > threshold

    TP = (prob & label).sum(1).float()
    TN = ((~prob) & (~label)).sum(1).float()
    FP = (prob & (~label)).sum(1).float()
    FN = ((~prob) & label).sum(1).float()

    precision = torch.mean(TP / (TP + FP + 1e-12))
    recall = torch.mean(TP / (TP + FN + 1e-12))
    F2 = (1 + beta**2) * precision * recall / (beta**2 * precision + recall + 1e-12)
    return F2.mean(0)

In [5]:
class MultilabelImageClassificationBase(torch.nn.Module):
    def training_step(self, batch):
        images, targets = batch 
        out = self(images)                      
        loss = F.binary_cross_entropy(out, targets)      
        return loss
    
    def validation_step(self, batch):
        images, targets = batch 
        out = self(images)                           # Generate predictions
        loss = F.binary_cross_entropy(out, targets)  # Calculate loss
        score = F_score(out, targets)
        return {'val_loss': loss.detach(), 'val_score': score.detach() }
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_scores = [x['val_score'] for x in outputs]
        epoch_score = torch.stack(batch_scores).mean()      # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_score': epoch_score.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], train_loss: {:.4f}, val_loss: {:.4f}, val_score: {:.4f}".format(
            epoch, result['train_loss'], result['val_loss'], result['val_score']))

In [6]:
class CNN(MultilabelImageClassificationBase):
    def __init__(self):
        super().__init__()
        # Use a pretrained model
        self.network = models.wide_resnet50(pretrained=True)
        # Replace last layer
        num_ftrs = self.network.fc.in_features
        self.network.fc = torch.nn.Linear(num_ftrs, 288)
    
    def forward(self, xb):
        return torch.sigmoid(self.network(xb))

In [7]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
    
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

In [8]:
@torch.no_grad()
def evaluate(model, val_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    torch.cuda.empty_cache()
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        model.train()
        train_losses = []
        for batch in tqdm(train_loader):
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        # Validation phase
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        model.epoch_end(epoch, result)
        history.append(result)
    return history

In [9]:
# load dataset
transform = transforms.Compose([transforms.ToTensor()])
dataset = CUBDataset(os.path.join(path, "data\\csv\\image_path_to_attrs.csv"), path, transform=transform)

In [10]:
# split dataset into train and validation datasets
val_pct = 0.15
val_size = int(val_pct * len(dataset))
train_size = len(dataset) - val_size
train_ds, val_ds = random_split(dataset, [train_size, val_size])

In [11]:
batch_size = 16
train_dl = DataLoader(train_ds, batch_size, shuffle=True, num_workers=0, pin_memory=True)
val_dl = DataLoader(val_ds, batch_size*2, num_workers=0, pin_memory=True)

In [12]:
# Run on GPU
device = get_default_device()
device

device(type='cuda')

In [13]:
train_dl = DeviceDataLoader(train_dl, device)
val_dl = DeviceDataLoader(val_dl, device)

In [14]:
model = to_device(CNN(), device)

In [15]:
num_epochs = 10
opt_func = torch.optim.Adam
lr = 1e-5

In [16]:
len(dataset)

5263

In [17]:
# evaluate before training
evaluate(model, val_dl)

{'val_loss': 0.7723822593688965, 'val_score': 0.18556448817253113}

In [1]:
# train
history = fit(num_epochs, lr, model, train_dl, val_dl, opt_func)

# Train Model with images not resized

In [None]:
transform = transforms.Compose([transforms.ToTensor()])
origin_dataset = CUBDataset("/content/origin_image_path_to_attrs.csv", path, transform=transform)

In [None]:
val_pct = 0.15
val_size = int(val_pct * len(origin_dataset))
train_size = len(origin_dataset) - val_size
train_ds, val_ds = random_split(origin_dataset, [train_size, val_size])

In [None]:
batch_size = 16
train_dl = DataLoader(train_ds, batch_size, shuffle=True, num_workers=2, pin_memory=True)
val_dl = DataLoader(val_ds, batch_size*2, num_workers=2, pin_memory=True)

In [None]:
device = get_default_device()
device

device(type='cuda')

In [None]:
train_dl = DeviceDataLoader(train_dl, device)
val_dl = DeviceDataLoader(val_dl, device)

In [None]:
origin_model = to_device(CNN(), device)

In [None]:
num_epochs = 10
opt_func = torch.optim.Adam
lr = 1e-5

# Predict

In [None]:
def predict_single(model, image):
    xb = image.unsqueeze(0)
    xb = to_device(xb, device)
    preds = model(xb)
    prediction = preds[0]
    return prediction

In [None]:
def decode_pred(pred, threshold=0.5):
    pred = pred.cpu().detach().numpy()
    pred = np.where(pred > 0.5, 1, 0)
    return pred

In [None]:
pred = predict_single(model, dataset[0][0])
pred = decode_pred(pred)
pred

Prediction:  tensor([9.5776e-04, 9.8298e-01, 5.9659e-03, 1.9908e-03, 7.5923e-04, 5.8100e-03,
        3.1481e-02, 1.5744e-03, 7.0058e-05, 8.4130e-05, 1.0583e-02, 2.3193e-02,
        8.8994e-03, 9.9501e-01, 9.8486e-01, 1.2546e-02, 8.6315e-03, 2.2253e-04,
        4.1445e-04, 3.0190e-03, 3.1699e-03, 9.8071e-01, 1.6737e-02, 1.0108e-02,
        6.5905e-03, 5.2788e-03, 1.0029e-02, 1.3767e-03, 1.4587e-04, 1.8061e-04,
        5.3763e-03, 1.2758e-04, 6.4290e-03, 1.0123e-02, 2.2790e-02, 9.5540e-01,
        2.3320e-03, 2.2011e-03, 9.8673e-01, 9.1794e-03, 9.4037e-01, 1.5800e-03,
        2.1446e-02, 1.9408e-02, 2.2303e-02, 1.0921e-02, 6.6283e-04, 8.1149e-03,
        1.0944e-04, 4.7125e-04, 1.3761e-07, 8.9951e-01, 1.3504e-02, 5.3039e-04,
        1.3596e-02, 2.2791e-03, 9.2812e-04, 4.3537e-04, 3.5519e-02, 4.1149e-03,
        9.8722e-01, 2.2692e-03, 6.6331e-02, 3.4055e-02, 3.7743e-02, 1.3034e-03,
        1.5173e-03, 1.6025e-03, 5.0034e-04, 2.4763e-03, 8.3332e-02, 8.2951e-02,
        9.8644e-01, 9.3568e

array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1,
       0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0])

In [2]:
pred = predict_single(model, dataset[0][0])
pred = decode_pred(pred)

In [None]:
ground_truth = dataset[0][1].numpy()

# Save Model

In [19]:
# save model
PATH = "cnn_model.pt"
# torch.save(model.state_dict(), PATH)

In [20]:
# load model
device2 = torch.device("cuda")
model2 = CNN()
model2.load_state_dict(torch.load(PATH))
model2.eval()
model2.to(device)

# model = model2

CNN(
  (network): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running

## Load back the model

In [22]:
evaluate(model2, val_dl)

{'val_loss': 0.03831261023879051, 'val_score': 0.9257423281669617}

In [23]:
model3 = torch.load("whole_model.pt")
model3.eval()

CNN(
  (network): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): BasicBlock(
        (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running

In [24]:
evaluate(model3, val_dl)

{'val_loss': 0.03831261023879051, 'val_score': 0.9257423281669617}