In [None]:
# Representation Learning with SimCLR

![](figures/simclr_framework.png)

### Imports

In [None]:
import torch
import random
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torchvision.datasets import FashionMNIST
from torchvision import transforms as T
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

from representation_learning.augment import get_augment
from representation_learning.compute_embeddings import compute_embeddings
from representation_learning.loss import simclr_loss
from representation_learning.train import train
from representation_learning.model_makers import create_encoder, create_projection_head
from representation_learning.view_transform import ViewTransform

### Device

In [None]:
device = 'cpu'

### Load Data

In [None]:
data_root = './data'
train_dataset = FashionMNIST(data_root, train=True, 
                             download=True, transform=T.ToTensor())
test_dataset = FashionMNIST(data_root, train=False, 
                            download=True, transform=T.ToTensor())

In [None]:
h, w = 5, 10
fig, ax = plt.subplots(h, w)
fig.set_size_inches((w, h))
ax = ax.ravel()
for i in range(h * w):
    img, label = train_dataset[i]
    ax[i].imshow(torch.permute(img, (1, 2, 0)), cmap='gray')
    ax[i].axis('off')
plt.show()

### Create Encoder and Projection Head

In [None]:
encoder = create_encoder()
projection_head = create_projection_head()

### Test SimCLR Loss Function

\begin{aligned}
\mathcal{L} = -\frac{1}{N} \sum_{i, j \in MB} \log\frac{\exp\left(\text{sim}\left(\mathbf{z}_{i}, \mathbf{z}_{j}\right)/\tau\right)}{\sum^{2N}_{k=1} 1_{[k\neq{i}]}\exp\left(\text{sim}\left(\mathbf{z}_{i}, \mathbf{z}_{k}\right)/\tau\right)}
\end{aligned}

where $z_i$, $z_j$ are hidden representations of two augmented views of the same example; $\text{sim}(u, v) = \frac{u^T v}{\lVert u \rVert \lVert v \rVert}$ is the cosine similarity between two vectors. $\tau$ is a temperature scalar and MB is a randomly sampled mini-batch consisting of augmented pairs of images. 

In [None]:
torch.manual_seed(0)
z1 = torch.rand(size=(1024, 32)) - 0.5
z2 = torch.rand(size=(1024, 32)) - 0.5

random_loss = simclr_loss(z1, z2).item()
print('random loss', random_loss)

loss = simclr_loss(z1, z1).item()
print('views matching exactly', loss)

### Make Augmentation Transform

In [None]:
augment = get_augment()

In [None]:
train_dataset.transform = ViewTransform()

### Test Augment

In [None]:
torch.manual_seed(0)
random.seed(0)

(x1, x2), y = train_dataset[0]
fig, ax = plt.subplots(1, 2)
ax[0].imshow(x1[0], cmap='gray')
ax[0].set_title('View 1')
ax[0].axis('off')
ax[1].imshow(x2[0], cmap='gray')
ax[1].set_title('View 2')
ax[1].axis('off')
plt.show()

### Run the Training

In [None]:
train_loader = DataLoader(train_dataset, batch_size=1024, shuffle=True)
encoder = train(train_loader)

### Train a Linear Classifier on Top of the Encoder

In [None]:
transform = T.Compose([T.ToTensor(), T.Normalize((0.5,), (0.5,))])
train_eval = FashionMNIST(data_root, train=True, 
                         download=True, transform=transform)
test_eval = FashionMNIST(data_root, train=False,
                         download=True, transform=transform)

In [None]:
for k in [0, 4]:
    encoder = create_encoder()
    encoder.load_state_dict(torch.load(f'models/model_{k}.pt'))
    encoder.eval()

    xtrain, ytrain = compute_embeddings(train_eval, encoder, device)
    xtest, ytest = compute_embeddings(test_eval, encoder, device)

    clf = LogisticRegression(random_state=0, max_iter=1000, C=0.3)
    clf.fit(xtrain, ytrain)
    ypred = clf.predict(xtest)
    acc = accuracy_score(ytest, ypred)
    print(f"Accuracy: {acc}")