In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [7]:
import zipfile
import os

zip_ref = zipfile.ZipFile('/content/drive/MyDrive/low-resolution.zip', 'r') #Opens the zip file in read mode
zip_ref.extractall('/tmp') #Extracts the files into the /tmp folder
zip_ref.close()

zip_ref = zipfile.ZipFile('/content/drive/MyDrive/stanford-dog-images.zip', 'r') #Opens the zip file in read mode
zip_ref.extractall('/tmp') #Extracts the files into the /tmp folder
zip_ref.close()

### Imports

In [8]:
import torch
from torch.utils.data import Dataset, DataLoader, Subset, random_split
import torchvision
from torchvision import datasets, transforms
import numpy as np
import matplotlib.pyplot as plt

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from torchvision import models

from collections import Counter


### Connect to Device

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

### Data Pre-Processing

In [51]:
# my_transform = transforms.Compose([
#     transforms.Resize((100, 100)),
#     transforms.ToTensor(),
#     transforms.Grayscale(),
#     transforms.Normalize([0.485], [0.229])])

# dataset = datasets.ImageFolder("../tmp/low-resolution", my_transform)  # your dataset
# train_dataset, valid_dataset, test_dataset = random_split(dataset, [0.8, 0.1, 0.1], generator=torch.Generator().manual_seed(42))

# train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
# valid_loader = DataLoader(valid_dataset, batch_size=50, shuffle=True, num_workers=2)
# test_loader = DataLoader(test_dataset, batch_size=50, shuffle=True, num_workers=2)


In [57]:
my_transform = transforms.Compose([
    transforms.Resize((100, 100)),
    transforms.ToTensor(),
    transforms.Grayscale(),
    transforms.Normalize([0.485], [0.229])])

dataset = datasets.ImageFolder("../tmp/low-resolution", my_transform)

train_indices, val_indices = train_test_split(
    range(len(dataset.targets)),
    test_size=0.4,
    stratify=dataset.targets,
    random_state=0
)

val_indices, test_indices = train_test_split(
    val_indices,
    test_size=0.5,
    stratify=[dataset.targets[i] for i in val_indices],
    random_state=0
)

train_data = torch.utils.data.Subset(dataset, train_indices)
train_loader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=2)

valid_data = torch.utils.data.Subset(dataset, val_indices)
valid_loader = DataLoader(valid_data, batch_size=32, num_workers=2)

test_data = torch.utils.data.Subset(dataset, test_indices)
test_loader = DataLoader(test_data, batch_size=32, num_workers=2)

print(len(train_loader) * 32, len(valid_loader) * 32, len(test_loader) * 32)
print((len(train_loader) * 32 + len(valid_loader) * 32 + len(test_loader) * 32))
print(len(dataset))

# train_features_raw, train_labels = next(iter(train_loader))
# print(train_labels)
# valid_features_raw, valid_labels = next(iter(valid_loader))
# print(valid_labels)
# test_features_raw, test_labels = next(iter(test_loader))
# print(test_labels)

42272 14112 14112
70496
70432
tensor([115, 124, 125, 113,   4, 125,  92,   5,  29,   2, 122,  30,   2,  11,
        119, 117,  58,  72,  71, 125,   1, 119, 128, 114,  92, 121, 119, 104,
        121, 110,  74, 114])
tensor([  1,  96, 119, 115,  83,  92, 127, 127,  96, 107,  11,  60,  95, 127,
         32, 116, 104,  92, 114, 114, 114,  82,  91,  98,  58, 108,  92,  69,
         19, 113,  22,  58])
tensor([125, 129, 125, 106, 127,  71, 119, 124, 127, 127,  41,  33, 113, 127,
         92,  37,   4, 113, 127,  58,   6,  34, 113, 114, 104, 119,  71, 119,
          4,  92,  73,  20])


### CNN Model

In [61]:
# https://medium.com/analytics-vidhya/complete-guide-to-build-cnn-in-pytorch-and-keras-abc9ed8b8160

class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        self.conv1 = nn.Conv2d(1,8,3,1)
        self.conv2 = nn.Conv2d(8,16,3,1)
        
        self.fc1 = nn.Linear(36864, 200)
        self.fc2 = nn.Linear(200, 130)

    def forward(self,x):
        x=self.conv1(x)
        x=F.relu(x)
        x=self.conv2(x)
        x=F.relu(x)
        x=F.max_pool2d(x,2)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output

### ANN Model

In [62]:
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(10000, 5000)
        self.fc2 = nn.Linear(5000, 500)
        self.fc3 = nn.Linear(500, 130)

    def forward(self, x):
        x = x.view(x.shape[0], -1)
        layer1 = F.relu(self.fc1(x))
        layer2 = F.relu(self.fc2(layer1))
        layer3 = self.fc3(layer2)
        return layer3

### Creating and Training Ensembling Models

### Testing Ensembles

#### Create Ensemble

In [63]:
ensemble = []

checkpoint1 = torch.load('50.pth')
cnn_50_epochs = NeuralNet()
cnn_50_epochs.load_state_dict(checkpoint1['model'])
cnn_50_epochs = cnn_50_epochs.to(device)
ensemble.append(cnn_50_epochs)

checkpoint2 = torch.load('ann_40epochs_all_batched.pth')
ann = MLP()
ann.load_state_dict(checkpoint2['model'])
ann = ann.to(device)
ensemble.append(ann)

checkpoint3 = torch.load('19.pth')
cnn_20_epochs = NeuralNet()
cnn_20_epochs.load_state_dict(checkpoint3['model'])
cnn_20_epochs = cnn_20_epochs.to(device)
ensemble.append(cnn_20_epochs)



#### Test Ensemble Models

In [None]:
y_predictions = []
y_true = []

for i, data in enumerate(test_loader):
    inputs, labels = data
    inputs = inputs.to(device)
    labels = labels.to(device)

    votes = []
    
    for model in ensemble:
        log_probabilities = model(inputs)
        y_prediction = log_probabilities.argmax(dim=1, keepdim=True)
        votes.append(y_prediction)

    # print(votes)
    counter = Counter(votes)
    # print(counter.most_common(1)[0][0])
    # break
    ensemble_y_prediction = counter.most_common(1)[0][0]

    y_predictions.extend(list(np.concatenate(ensemble_y_prediction.tolist()).flat))
    y_true.extend(labels.tolist())

tensor([  2, 128,  70, 117, 125,  25,   0, 112,  95, 113, 114, 121, 125,  88,
        127,  95,   1, 104, 126,  90,  58, 113, 105, 115,  98,   4,  92,  30,
        119, 104, 113,  68,   4, 113, 122,  13, 115,  18, 127,  71,  66,   1,
          9,  49, 101, 108,  77,  85, 121,   2])


In [None]:
accuracy_score(y_true, y_predictions)

0.9663495669459037

### Testing Ensemble on Stanford Dogs Dataset

#### Data Processing

In [None]:
stanford_dog_list = os.listdir("../tmp/stanford-dog-images")
for i in range(len(stanford_dog_list)):
    if "-" in stanford_dog_list[i]:
        stanford_dog_list[i] = stanford_dog_list[i].lower()[stanford_dog_list[i].index("-")+1::]

print(len(stanford_dog_list))
stanford_dog_list

In [None]:
tsinghua_dog_list = os.listdir("../tmp/low-resolution")
for i in range(len(tsinghua_dog_list)):
    if "-" in tsinghua_dog_list[i]:
        tsinghua_dog_list[i] = tsinghua_dog_list[i].lower()[tsinghua_dog_list[i].rfind("-")+1::]
tsinghua_dog_list.remove("~$directoryfilecount.xlsx")

print(len(tsinghua_dog_list))
tsinghua_dog_list

In [None]:
stanford_to_tsinghua = {}
for s_breed in stanford_dog_list:
    if s_breed in tsinghua_dog_list:
        stanford_to_tsinghua[stanford_dog_list.index(s_breed)] = tsinghua_dog_list.index(s_breed)

stanford_to_tsinghua

#### Testing

In [40]:
# my_transform = transforms.Compose([
#     transforms.Resize((100, 100)),
#     transforms.ToTensor(),
#     transforms.Grayscale(),
#     transforms.Normalize([0.485], [0.229])])

# stanford_dataset = datasets.ImageFolder("../tmp/stanford-dog-images", my_transform)  # Stanford dogs dataset
# train_dataset, valid_dataset, test_dataset = random_split(stanford_dataset, [0.8, 0.1, 0.1], generator=torch.Generator().manual_seed(42))

# test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True, num_workers=2)

# # test_loader = DataLoader(orig_set, batch_size=100, shuffle=True, num_workers=2)
# # test_features_raw, test_labels = next(iter(test_loader))

# # test_features = test_features_raw.reshape(-1, 10000).squeeze()
# # print(test_features.size())

In [65]:
my_transform = transforms.Compose([
    transforms.Resize((100, 100)),
    transforms.ToTensor(),
    transforms.Grayscale(),
    transforms.Normalize([0.485], [0.229])])

dataset = datasets.ImageFolder("../tmp/stanford-dog-images", my_transform)

train_indices, val_indices = train_test_split(
    range(len(dataset.targets)),
    test_size=0.4,
    stratify=dataset.targets,
    random_state=0
)

val_indices, test_indices = train_test_split(
    val_indices,
    test_size=0.5,
    stratify=[dataset.targets[i] for i in val_indices],
    random_state=0
)

test_data = torch.utils.data.Subset(dataset, test_indices)
test_loader = DataLoader(test_data, batch_size=32, num_workers=2)

In [None]:
y_predictions = []
y_true = []

for i, data in enumerate(test_loader):
    inputs, labels = data

    for label_i in range(len(labels)):
        if labels[label_i].tolist() in stanford_to_tsinghua:
            labels[label_i] = torch.tensor(stanford_to_tsinghua[labels[label_i].tolist()])
    
    votes = []
    
    for model in ensemble:
        log_probabilities = model(inputs)
        y_prediction = log_probabilities.argmax(dim=1, keepdim=True)
        votes.append(y_prediction)

    print(votes)
    counter = Counter(votes)
    print(counter.most_common(1)[0][0])
    break
    ensemble_y_prediction = counter.most_common(1)[0][0]

    y_predictions.extend(list(np.concatenate(ensemble_y_prediction.tolist()).flat))
    y_true.extend(labels.tolist())

In [69]:
accuracy_score(y_true, y_predictions)

0.007288629737609329