In [1]:
import pickle
import matplotlib.pyplot as plt
import random
import numpy as np
import networkx as nx
from sklearn.decomposition import PCA
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader

## Dataset

In [27]:
class FpsDataset(Dataset):
        
    def __init__(self, fingerprints, labels):
        pca = PCA(n_components=32) # reduce data set vector length to 32
        pca_fps = pca.fit_transform(fingerprints)
        self.data_tens = torch.from_numpy(pca_fps)
        self.label_tens = torch.from_numpy(labels)
    
    def __len__(self):
        return self.label_tens.shape[0]
        
    def __getitem__(self, idx):
        return self.data_tens[idx], self.label_tens[idx]

In [None]:
def unpickle_fps_data(path='fps_and_labels.pickle'):
    with open('fps_and_labels.pickle', 'rb') as f:
        fingerprints, labels = pickle.load(f)
    return (fingerprints, labels)

# NN Implementation
## Setup

In [None]:
# Setup: Data, Error func, optimizer, NN classifier
# 1. Data
fingerprints, labels = unpickle_fps_data() 
dataset = FpsDataset(fingerprints, labels) # initialize dataset obj
# creates a dataloader to batch data
dataloader = DataLoader(dataset=dataset, batch_size=256, shuffle=True)
# 2. Error
err_func = torch.nn.MSELoss()
# 3. Classifier
classifier = torch.nn.Sequential(
    torch.nn.Linear(64,64),
    torch.nn.SiLU(),
    torch.nn.Linear(64,32),
    torch.nn.SiLU(),
    torch.nn.Linear(32,16),
    torch.nn.SiLU(),
    torch.nn.Linear(16,1)  
)
# 4. Optimizer
optimizer = torch.optim.Adam(classifier.parameters(), 0.001)

## Training Loop

In [None]:
epochs = 2000
loss_list = []
accuracy_list = []

# Training loop:
for i in range(epochs):
    # loop through training batches
    for idx, (data, y) in enumerate(dataloader):
        optimizer.zero_grad()
        predictions = torch.sigmoid(classifier(data))
        error = err_func(predictions, y.reshape((y.size(0),1))) 
        error.backward()
        optimizer.step()
    # track error over whole dataset per epoch
    predictions = torch.sigmoid(classifier(dataset.training_tens))
    loss_list.append(err_func(predictions, dataset.tr_label_tens.reshape(
        dataset.tr_label_tens.size(0),1)).detach().numpy())
    # track error over tests per epoch
    predictions = torch.sigmoid(classifier(dataset.test_tens))
    test_predictions = predictions.reshape(-1).detach().numpy().astype(int)
    test_labels = dataset.te_label_tens.detach().numpy()
    total_correct = 0
    for i in range(781):
        if test_predictions[i] == test_labels[i]:
            total_correct += 1
    accuracy_list.append(total_correct/7.81)

In [None]:
def get_accuracy(clss, fps_dataset):
    predictions = torch.sigmoid(clss(fps_dataset.test_tens))
    test_predictions = predictions.reshape(-1).detach().numpy().astype(int)
    test_labels = fps_dataset.te_label_tens.detach().numpy()
    total_correct = 0
    for i in range(781):
        if test_predictions[i] == test_labels[i]:
            total_correct += 1
    return total_correct/7.81

### Results

In [1]:
print("Final Accuracy: %f" % get_accuracy(classifier, dataset))

SyntaxError: unexpected EOF while parsing (4017072579.py, line 1)

In [None]:
plt.plot(list(range(1, epochs+1)), loss_list)
plt.xlabel('Epoch')
plt.ylabel('Loss (BCE)')
plt.title('Training Loss')
plt.show()

In [None]:
plt.plot(list(range(1, epochs+1)), accuracy_list)
plt.xlabel('Epoch')
plt.ylabel('Accuracy (%)')
plt.title('Test Scores')
plt.show()

# Classifier Hyperparam Testing

### Classifier Testing Helper Method

In [None]:
def test_classifiers(classifier, epochs, batch_size):
    # Setup: Data, Error func, optimizer, NN classifier
    # 1. Data
    dataset = FpsDataset(fingerprints, labels) # initialize/load dataset obj
    # creates a dataloader to batch data
    dataloader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True)
    # 2. Error
    err_func = torch.nn.BCELoss()
    # 3. Classifier
    classifier = classifier
    # 4. Optimizer
    optimizer = torch.optim.Adam(classifier.parameters(), 0.001)
    loss_list = [] # a plot of the error on training over time
    # Training loop:
    for i in range(epochs):
        # loop through training batches
        for idx, (data, y) in enumerate(dataloader):
            optimizer.zero_grad()
            predictions = torch.sigmoid(classifier(data))
            error = err_func(predictions, y.reshape((y.size(0),1))) 
            error.backward()
            optimizer.step()
        # track error over whole dataset per epoch
        predictions = torch.sigmoid(classifier(dataset.training_tens))
        loss_list.append(err_func(predictions, dataset.tr_label_tens.reshape(
            dataset.tr_label_tens.size(0),1)).detach().numpy())
    # calc % error  
    total_correct = 0
    predictions = torch.sigmoid(classifier(dataset.test_tens))
    test_predictions = predictions.reshape(-1).detach().numpy().astype(int)
    test_labels = dataset.te_label_tens.detach().numpy()
    for i in range(781):
    if test_predictions[i] == test_labels[i]:
        total_correct += 1
    # return: classifier, error_list, and percent correct on test
    return loss_list, total_correct/7.81

## Non-Lin Func tests

In [None]:
relu_cl = torch.nn.Sequential(
    torch.nn.Linear(64,64),
    torch.nn.ReLU(),
    torch.nn.Linear(64,32),
    torch.nn.ReLU(),
    torch.nn.Linear(32,16),
    torch.nn.ReLU(),
    torch.nn.Linear(16,1)  
)
elu_cl = torch.nn.Sequential(
    torch.nn.Linear(64,64),
    torch.nn.ELU(),
    torch.nn.Linear(64,32),
    torch.nn.ELU(),
    torch.nn.Linear(32,16),
    torch.nn.ELU(),
    torch.nn.Linear(16,1)  
)
silu_cl = torch.nn.Sequential(
    torch.nn.Linear(64,64),
    torch.nn.SiLU(),
    torch.nn.Linear(64,32),
    torch.nn.SiLU(),
    torch.nn.Linear(32,16),
    torch.nn.SiLU(),
    torch.nn.Linear(16,1)  
)

non_lin_list = [relu_cl,elu_cl,silu_cl]
relu = []
elu = []
silu = []
results = [relu, elu, silu]
for i in range(10):
    j = 0
    for cl in non_lin_list:
        results[j].append(test_classifiers(cl, 150, 32))
        j+=1

In [None]:
plt.bar([1,2,3], [np.mean(relu), np.mean(elu), np.mean(silu)])

## Hidden Layer Number Tests

In [None]:
thr_cl = torch.nn.Sequential(
    torch.nn.Linear(64,64),
    torch.nn.SiLU(),
    torch.nn.Linear(64,32),
    torch.nn.SiLU(),
    torch.nn.Linear(32,1), 
)
four_cl = torch.nn.Sequential(
    torch.nn.Linear(64,64),
    torch.nn.SiLU(),
    torch.nn.Linear(64,32),
    torch.nn.SiLU(),
    torch.nn.Linear(32,16),
    torch.nn.SiLU(),
    torch.nn.Linear(16,1)  
)
five_cl = torch.nn.Sequential(
    torch.nn.Linear(64,64),
    torch.nn.SiLU(),
    torch.nn.Linear(64,32),
    torch.nn.SiLU(),
    torch.nn.Linear(32,16),
    torch.nn.SiLU(),
    torch.nn.Linear(16,8),  
    torch.nn.SiLU(),
    torch.nn.Linear(8,1)
)
layer_list = [thr_cl,four_cl,five_cl]
thr = []
four = []
five = []
results = [thr, four, five]
for i in range(10):
    j = 0
    for cl in layer_list:
        results[j].append(test_classifiers(cl, 150, 32))
        j+=1