In [2]:
import sys
import logging

import numpy as np
import scipy as sp

%reload_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import seaborn as sns
sns.set_context("poster")
sns.set(rc={'figure.figsize': (16, 9.)})
sns.set_style("whitegrid")

import pandas as pd
pd.set_option("display.max_rows", 120)
pd.set_option("display.max_columns", 120)

logging.basicConfig(level=logging.INFO, stream=sys.stdout)

In [3]:
from PIL import Image
import torch
from torch import nn
from torchvision import transforms, models
from torch.utils.data import Dataset, DataLoader


class TripletsDataset(Dataset):
    def __init__(self, triplets_path, img_dir, transform=None):
        self.triplets_path = triplets_path
        self.img_dir = img_dir
        self.transform = transform

        with open(self.triplets_path) as f:
            triplets = f.readlines()
        triplets = [triplet.strip().split(" ") for triplet in triplets]
        self.labels = [int(triplet[-1]) for triplet in triplets]
        self.triplets = [triplet[:-1] for triplet in triplets]

    def __len__(self):
        return len(self.triplets)

    def __getitem__(self, idx):
        triplet = self.triplets[idx]
        label = self.labels[idx]
        label = torch.tensor(label)

        paths = [os.path.join(self.img_dir, f"{i}.jpg") for i in triplet]
        images = [Image.open(path) for path in paths]

        if self.transform:
            images = [self.transform(image) for image in images]

        return images, label


In [4]:
import os
from torch import nn
from torchvision import models
import torch.nn.functional as F
from torch.optim import SGD
import pytorch_lightning as pl


class Classifier(pl.LightningModule):
    def __init__(self, train_dataset, val_dataset, lr=1e-3, batch_size=64):
        super().__init__()

        self.train_dataset = train_dataset
        self.val_dataset = val_dataset
        self.batch_size = batch_size
        self.learning_rate = lr
        
        self.efficientnet = models.efficientnet_b0(pretrained=True)
        self.efficientnet.requires_grad_(False)  # no fine-tuning
        self.embedding = nn.Sequential(
            *list(self.efficientnet.children())[:-1]
        )  # this is efficientnet without its classification layers
        

        self.classifier = nn.Sequential(
            nn.Linear(in_features=3 * 1280, out_features=100),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(100, 2),
        ) # the classifier we are training

        self.loss = nn.CrossEntropyLoss()

    def forward(self, images):
        img_a, img_b, img_c = images

        phi_a = self.embedding(img_a).squeeze()
        phi_b = self.embedding(img_b).squeeze()
        phi_c = self.embedding(img_c).squeeze()
        phi = torch.concat((phi_a, phi_b, phi_c), dim=1)

        y_hat = self.classifier(phi)

        return y_hat

    def configure_optimizers(self):
        return SGD(self.parameters(), lr=1e-3, weight_decay=1e-3)

    def training_step(self, batch):
        x, y = batch
        # x = x.view(x.size(0), -1)
        y_hat = self(x)
        loss = self.loss(y_hat, y)

        self.log("train_loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = F.cross_entropy(y_hat, y)
        self.log("val_loss", loss)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        return optimizer

    def train_dataloader(self):
        loader = DataLoader(
            dataset=self.train_dataset, batch_size=self.batch_size, num_workers=num_workers
        )
        return loader

    def val_dataloader(self):
        loader = DataLoader(
            dataset=self.val_dataset, batch_size=self.batch_size, num_workers=num_workers
        )
        return loader

In [7]:
from pytorch_lightning import Trainer
from pytorch_lightning.callbacks import TQDMProgressBar, EarlyStopping
from sklearn.model_selection import train_test_split

img_dir = "../food"
train_triplets_path = "../train_triplets.txt"
triplets_path = "../triplets.txt"
train_split_path = "../train_split.txt"
val_split_path = "../val_split.txt"

AVAIL_GPUS = min(1, torch.cuda.device_count())
learning_rate = 0.063
batch_size = 64
num_workers = 8

# get triplets
with open(train_triplets_path) as f:
    triplets = f.readlines()
triplets = [triplet.strip().split(" ") for triplet in triplets]

# for each triplet we create two samples and write to a new txt file:
#  - one for the original triplet; with label 1
#  - one with the second and third image swapped; with label 0
with open(triplets_path, "w") as f:
    for triplet in triplets:
        f.writelines(" ".join(triplet) + " " + str(1) + "\n")
        triplet[1], triplet[2] = triplet[2], triplet[1]
        f.writelines(" ".join(triplet) + " " + str(0) + "\n")
with open(triplets_path, "r") as f:
    triplets = f.readlines()
triplets = [triplet.strip().split(" ") for triplet in triplets]

# split into training and validation set
train_triplets, val_triplets = train_test_split(
    triplets, test_size=0.1, random_state=489, shuffle=True
)

# write training and validation set to txt (so that DataSet can access it)
with open(train_split_path, "w") as f:
    for item in train_triplets:
        f.writelines(" ".join(item) + "\n")
with open(val_split_path, "w") as f:
    for item in val_triplets:
        f.writelines(" ".join(item) + "\n")


tfms = transforms.Compose(
    [
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ]
)  # could also try augmentation

train_dataset = TripletsDataset(train_split_path, img_dir, tfms)
val_dataset = TripletsDataset(val_split_path, img_dir, tfms)
model = Classifier(train_dataset=train_dataset, val_dataset=val_dataset, batch_size=batch_size)


bar = TQDMProgressBar(refresh_rate=1)
early_stop_callback = EarlyStopping(
    monitor="val_loss", min_delta=0.0, patience=5, verbose=True
)


trainer = Trainer(
    gpus=AVAIL_GPUS,
    min_epochs=1,
    max_epochs=250,
    callbacks=[bar, early_stop_callback],
    auto_lr_find=False,
    auto_scale_batch_size=False,
)
# trainer.tune(model)
trainer.fit(model)


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name         | Type             | Params
--------------------------------------------------
0 | efficientnet | EfficientNet     | 5.3 M 
1 | embedding    | Sequential       | 4.0 M 
2 | classifier   | Sequential       | 384 K 
3 | loss         | CrossEntropyLoss | 0     
--------------------------------------------------
384 K     Trainable params
5.3 M     Non-trainable params
5.7 M     Total params
22.691    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]