In [24]:
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms, utils
import numpy as np
import pandas as pd
from tqdm import tqdm

In [2]:
#load MNIST training and test datasets

train_dataset = datasets.MNIST(root='data',
                              train=True,
                              download=True,
                              transform=transforms.Compose([
                              transforms.ToTensor(),
                              transforms.Normalize((0.1307,), (0.3081,))]))
test_dataset = datasets.MNIST(root='data',
                              train=False,
                              download=True,
                              transform=transforms.Compose([
                              transforms.ToTensor(),
                              transforms.Normalize((0.1307,), (0.3081,))]))

In [3]:
#split training dataset into training and validation sets
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, 
                                                           [50000, 10000],
                                                           generator=torch.Generator().manual_seed(11))

#create filters for datasets so only 5s and 8s are included
train_filter = [idx for idx, sample in enumerate(train_dataset) if sample[1] in [5,8]]
val_filter = [idx for idx, sample in enumerate(val_dataset) if sample[1] in [5,8]]


#create dataloaders using filtered training and test datasets
train_dataloader = torch.utils.data.DataLoader(torch.utils.data.Subset(train_dataset, train_filter),
                                              batch_size = 64)
val_dataloader = torch.utils.data.DataLoader(torch.utils.data.Subset(val_dataset, val_filter),
                                              batch_size = 64)

In [4]:
#check to make sure dataloaders are properly filtered

train_labels = next(iter(train_dataloader))[1]
print('training labels: ', train_labels)

val_labels = next(iter(val_dataloader))[1]
print('val labels: ', val_labels)

training labels:  tensor([8, 8, 5, 5, 8, 8, 5, 5, 8, 5, 5, 8, 5, 5, 5, 5, 5, 5, 8, 5, 5, 8, 5, 8,
        8, 5, 5, 8, 5, 8, 8, 8, 5, 5, 8, 8, 5, 5, 5, 5, 8, 8, 8, 8, 5, 8, 5, 8,
        8, 8, 8, 8, 5, 5, 5, 5, 8, 8, 8, 8, 5, 8, 5, 5])
val labels:  tensor([5, 5, 5, 5, 5, 8, 8, 8, 5, 5, 8, 8, 5, 5, 8, 5, 8, 8, 5, 8, 5, 8, 8, 5,
        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 5, 8, 5, 5, 5, 8, 8, 8, 8, 8, 5, 8, 5,
        8, 8, 8, 5, 5, 8, 5, 5, 8, 5, 5, 8, 8, 5, 5, 8])


In [5]:
#define network for image processing
#outputs are in the form of (log class probabilities, hidden features)

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5) # 10 channels in first convolution layer
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5) # 20 channels in second conv. layer
        self.fc1 = nn.Linear(320, 10) # 10 hidden units in first fully-connected layer
        self.fc2 = nn.Linear(10, 2) # 2 output units

    def forward(self, x):

        # first convolutional layer
        h_conv1 = self.conv1(x)
        h_conv1 = F.relu(h_conv1)
        h_conv1_pool = F.max_pool2d(h_conv1, 2)

        # second convolutional layer
        h_conv2 = self.conv2(h_conv1_pool)
        h_conv2 = F.relu(h_conv2)
        h_conv2_pool = F.max_pool2d(h_conv2, 2)

        # fully-connected layer
        h_fc1 = h_conv2_pool.view(-1, 320)
        h_fc1 = self.fc1(h_fc1)
        h_fc1 = F.relu(h_fc1)
        
        # classifier output
        output = self.fc2(h_fc1)
        output = F.log_softmax(output,dim=1)
        return output, h_fc1

In [6]:
#define training loop

def train_one_epoch():
    running_loss = 0.
    last_loss = 0.
    
    for i, batch in enumerate(train_dataloader):
        data = batch[0]
        target = batch[1]
        
        optimizer.zero_grad()
        output = model(data)[0]
        
        #relabeling target values so 5-->0 and 8-->1
        new_target = torch.tensor([0 if label==5 else 1 for label in target])
        
        loss = criterion(output, new_target)
        #print('loss: ', loss)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        if i%10 == 9:
            last_loss = running_loss/10
            print('batch {} loss: {}'.format(i+1, last_loss))
            running_loss = 0.
        
            
    return last_loss

In [7]:
#set training parameters
model = CNN()
criterion = torch.nn.NLLLoss()
lr = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
num_epochs = 5

best_val_loss = 10e6

#train model
for epoch in range(num_epochs):
    print('EPOCH {}'.format(epoch+1))
    
    model.train(True)
    avg_loss = train_one_epoch()
    
    #check validation loss
    model.train(False)
    running_val_loss = 0.
    for i, val_data in enumerate(val_dataloader):
        val_inputs, val_target = val_data
        val_outputs = model(val_inputs)[0]
        
        #relabeling target values so 5-->0 and 8-->1
        new_val_target = torch.tensor([0 if label==5 else 1 for label in val_target])
        
        val_loss = criterion(val_outputs, new_val_target)
        running_val_loss += val_loss
    
    avg_val_loss = running_val_loss / (i+1)
    print('LOSS train {} valid {}'.format(avg_loss, avg_val_loss))
    
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(model.state_dict(), 'cnn')

EPOCH 1
batch 10 loss: 0.6140370398759842
batch 20 loss: 0.3843510627746582
batch 30 loss: 0.18872250467538834
batch 40 loss: 0.1199959971010685
batch 50 loss: 0.11840065531432628
batch 60 loss: 0.05159692857414484
batch 70 loss: 0.08677796442061662
batch 80 loss: 0.06210407223552465
batch 90 loss: 0.03817955674603581
batch 100 loss: 0.04360020002350211
batch 110 loss: 0.03200101349502802
batch 120 loss: 0.03342755315825343
batch 130 loss: 0.0369915752671659
batch 140 loss: 0.038491040095686914
LOSS train 0.038491040095686914 valid 0.03981615602970123
EPOCH 2
batch 10 loss: 0.03209939012303949
batch 20 loss: 0.046838431153446436
batch 30 loss: 0.027229642495512962
batch 40 loss: 0.031206665188074113
batch 50 loss: 0.04585299580357969
batch 60 loss: 0.015964054223150014
batch 70 loss: 0.04624083014205098
batch 80 loss: 0.039967200998216865
batch 90 loss: 0.01574304080568254
batch 100 loss: 0.015619005309417844
batch 110 loss: 0.02012655879370868
batch 120 loss: 0.015366165060549974
batc

In [33]:
#extract hidden features from training dataset

#initialize results array
hidden_features_results = np.empty([len(train_dataloader)*64, 11])

#iterate through training dataloader and collect hidden features
for i, batch in enumerate(train_dataloader):
    data = batch[0]
    target = batch[1]
    
    log_probs, hidden_features = model(data)
    
    batch_results = np.empty([len(target), 11])
    
    batch_results[:,0] = target
    batch_results[:,1:] = hidden_features.detach().numpy()
    
    hidden_features_results[i*64:(i*64)+len(target)] = batch_results
    
#store results in dataframe
cols = ['label',
        'feature_1',
        'feature_2',
        'feature_3',
        'feature_4',
        'feature_5',
        'feature_6',
        'feature_7',
        'feature_8',
        'feature_9',
        'feature_10']

hidden_features_df = pd.DataFrame(data = hidden_features_results, columns = cols)
hidden_features_df['label'] = hidden_features_df['label'].astype(int)
hidden_features_df = hidden_features_df[np.logical_or(hidden_features_df['label']==5, hidden_features_df['label']==8)]
print(hidden_features_df.head())

#save dataframe as csv
hidden_features_df.to_csv('hidden_features_df.csv')

   label  feature_1  feature_2  feature_3  feature_4  feature_5  feature_6  \
0      8   9.703371        0.0   2.013049   0.000000        0.0        0.0   
1      8  13.691728        0.0   0.000000   0.000000        0.0        0.0   
2      5   0.000000        0.0  13.714252  12.262377        0.0        0.0   
3      5   0.000000        0.0  11.346357   8.525431        0.0        0.0   
4      8   5.615644        0.0   7.408042   3.080934        0.0        0.0   

   feature_7  feature_8  feature_9  feature_10  
0   3.053184  10.997141        0.0         0.0  
1   0.305161  14.574522        0.0         0.0  
2  16.002954   0.000000        0.0         0.0  
3  12.898945   0.332094        0.0         0.0  
4   6.832582   6.443026        0.0         0.0  
