In [1]:
import torch
from torch import embedding, nn
import torch.nn.functional as F
import torch.optim as optim
import random
import time
from tqdm import tqdm
from preprocessing import *
from baseline import FaceRecognizer
# train_data: tuple of length 320, each is a matrix represents a picture
# train_target: tuple of length 320, each is a label
# For training, there are 40 classes, each has 8 pictures
# For testing, there are 40 classes, each has 2 pictures

In [2]:
fr = FaceRecognizer()
fr.forward(train_data[1])

tensor([[-0.0388, -0.0385,  0.0564,  0.0487,  0.0286,  0.0472,  0.0689,  0.0234,
          0.0072, -0.0092, -0.0242,  0.0578,  0.0495, -0.0365,  0.1204,  0.0399,
         -0.0766, -0.0688, -0.0020,  0.0140,  0.0095, -0.0499,  0.0827,  0.0842,
         -0.0542, -0.0289, -0.0310,  0.1083,  0.0326, -0.1038, -0.0453, -0.0194,
          0.0414, -0.0735,  0.0734, -0.0104,  0.0238, -0.0836, -0.0186,  0.0104]],
       grad_fn=<AddmmBackward>)

In [3]:


num_epochs_train = 0

def train(model, data, targets, optimizer, criterion,  num_epochs=0):  
    global num_epochs_train 
#     if num_epochs_train == 1:
#         tmp = optimizer.state_dict()
#         tmp["param_groups"][0]["lr"] = 0.0005
    model.train()
    epoch_loss = 0
    sampling = list(range(train_data.shape[0]))
    random.shuffle(sampling)
    print("training ...")
    for i, selected_batch_index in tqdm(enumerate(sampling)):
        optimizer.zero_grad()
        z = fr.forward(data[selected_batch_index])
        loss = 0       
        loss=criterion(z,targets[selected_batch_index].long())
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    num_epochs_train += 1
    return epoch_loss

confusion_matrix = []
num_epochs = 0
def evaluate(model, data, targets, criterion, num_targets): 
    model.eval()
    epoch_loss = 0
    sampling = list(range(data.shape[0]))
    random.shuffle(sampling)
    confusion_matrix.append(torch.zeros(num_targets,num_targets))
    global num_epochs
    for i, selected_batch_index in tqdm(enumerate(sampling)):
        z = fr.forward(data[selected_batch_index])
        loss = 0
        loss=criterion(z,targets[selected_batch_index].long())
        print(targets[selected_batch_index].long())
        epoch_loss += loss.item()
        # Load in confusion_matrix
        for i in range(data[selected_batch_index].shape[0]):
            row = targets[selected_batch_index].long()
            col = torch.argmax(z[i])
            
#             print(num_epochs,row.item(),col.item())
#             print(confusion_matrix)
            confusion_matrix[num_epochs][row.item()][col.item()] += 1
     
    num_epochs += 1
        
    return epoch_loss
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [4]:
# optimizer = optim.Adam(fr.parameters())
# criterion = nn.CrossEntropyLoss(reduction = 'sum')
training_losses = []
test_losses = []

In [5]:
def train_eps(ep, lr, reg):
    optimizer = optim.SGD(fr.parameters(), lr=lr, momentum=0.9, weight_decay=reg, nesterov=False)
    criterion = nn.CrossEntropyLoss(reduction = 'sum')
    N_EPOCHS = ep
    best_test_loss = 999999
    for epoch in range(N_EPOCHS):  
        print("epoch start: ", epoch)  
        start_time = time.time()
        training_loss = train(fr, train_data, train_target, optimizer, criterion)
        training_losses.append(training_loss)
        test_loss = evaluate(fr, test_data, test_target, criterion, 40)
        test_losses.append(test_loss)  
        end_time = time.time()
        epoch_mins, epoch_secs = epoch_time(start_time, end_time)
        if test_loss < best_test_loss:
            best_test_loss = test_loss 
            torch.save(fr.state_dict(), 'no_best_model.pt')


        print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s', end='')
        print(f'\tTrain Loss: {training_loss:.3f} | Test Loss: {test_loss:.3f}')

In [8]:
train_data.shape

torch.Size([280, 1, 1, 64, 64])

In [9]:
train_eps(1000, 0.0001, 0.002)

epoch start:  0
training ...


280it [00:00, 434.41it/s]
120it [00:00, 1013.07it/s]


tensor([20])
tensor([19])
tensor([21])
tensor([24])
tensor([35])
tensor([22])
tensor([18])
tensor([31])
tensor([19])
tensor([21])
tensor([30])
tensor([11])
tensor([17])
tensor([30])
tensor([13])
tensor([29])
tensor([31])
tensor([3])
tensor([14])
tensor([9])
tensor([28])
tensor([17])
tensor([34])
tensor([1])
tensor([8])
tensor([36])
tensor([39])
tensor([18])
tensor([32])
tensor([3])
tensor([10])
tensor([14])
tensor([26])
tensor([14])
tensor([26])
tensor([6])
tensor([29])
tensor([9])
tensor([32])
tensor([15])
tensor([20])
tensor([18])
tensor([37])
tensor([24])
tensor([11])
tensor([19])
tensor([25])
tensor([5])
tensor([13])
tensor([23])
tensor([0])
tensor([2])
tensor([12])
tensor([38])
tensor([35])
tensor([28])
tensor([21])
tensor([17])
tensor([32])
tensor([13])
tensor([27])
tensor([27])
tensor([10])
tensor([25])
tensor([36])
tensor([4])
tensor([1])
tensor([39])
tensor([27])
tensor([33])
tensor([15])
tensor([36])
tensor([12])
tensor([15])
tensor([1])
tensor([33])
tensor([16])
tensor([6])


280it [00:00, 496.09it/s]
120it [00:00, 1019.99it/s]


tensor([17])
tensor([21])
tensor([4])
tensor([15])
tensor([31])
tensor([7])
tensor([23])
tensor([37])
tensor([26])
tensor([25])
tensor([32])
tensor([27])
tensor([22])
tensor([18])
tensor([7])
tensor([36])
tensor([38])
tensor([11])
tensor([28])
tensor([30])
tensor([32])
tensor([37])
tensor([35])
tensor([6])
tensor([13])
tensor([22])
tensor([29])
tensor([39])
tensor([7])
tensor([0])
tensor([13])
tensor([39])
tensor([3])
tensor([35])
tensor([4])
tensor([18])
tensor([14])
tensor([2])
tensor([20])
tensor([21])
tensor([8])
tensor([25])
tensor([25])
tensor([19])
tensor([17])
tensor([28])
tensor([1])
tensor([36])
tensor([36])
tensor([26])
tensor([30])
tensor([24])
tensor([8])
tensor([33])
tensor([14])
tensor([11])
tensor([14])
tensor([16])
tensor([20])
tensor([5])
tensor([39])
tensor([26])
tensor([18])
tensor([30])
tensor([31])
tensor([10])
tensor([9])
tensor([34])
tensor([21])
tensor([8])
tensor([23])
tensor([35])
tensor([23])
tensor([15])
tensor([0])
tensor([27])
tensor([29])
tensor([16])
te

43it [00:00, 484.14it/s]


KeyboardInterrupt: 

In [7]:
confusion_matrix[0]

IndexError: list index out of range

In [8]:
confusion_matrix[-1]

IndexError: list index out of range

In [9]:
sum = 0
matrix = -1
for i in range(confusion_matrix[matrix].shape[0]):
    sum = sum + confusion_matrix[matrix][i][i]
sum

IndexError: list index out of range

In [15]:
optimizer = optim.SGD(fr.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0001, nesterov=False)
criterion = nn.CrossEntropyLoss(reduction = 'sum')
evaluate(fr, test_data, test_target, criterion, 40)

120it [00:00, 765.11it/s]

tensor([10])
tensor([22])
tensor([23])
tensor([39])
tensor([15])
tensor([22])
tensor([9])
tensor([9])
tensor([32])
tensor([14])
tensor([15])
tensor([26])
tensor([31])
tensor([20])
tensor([7])
tensor([5])
tensor([8])
tensor([5])
tensor([21])
tensor([12])
tensor([34])
tensor([3])
tensor([24])
tensor([19])
tensor([30])
tensor([5])
tensor([14])
tensor([0])
tensor([25])
tensor([35])
tensor([2])
tensor([23])
tensor([12])
tensor([11])
tensor([37])
tensor([38])
tensor([39])
tensor([4])
tensor([23])
tensor([16])
tensor([20])
tensor([1])
tensor([22])
tensor([14])
tensor([34])
tensor([35])
tensor([18])
tensor([33])
tensor([29])
tensor([31])
tensor([18])
tensor([11])
tensor([33])
tensor([36])
tensor([30])
tensor([31])
tensor([4])
tensor([36])
tensor([21])
tensor([19])
tensor([28])
tensor([8])
tensor([19])
tensor([17])
tensor([34])
tensor([1])
tensor([28])
tensor([6])
tensor([3])
tensor([27])
tensor([13])
tensor([27])
tensor([28])
tensor([12])
tensor([36])
tensor([4])
tensor([24])
tensor([18])
tens




442.7039303779602