In [86]:
import os
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch.utils.data import TensorDataset, DataLoader
from PIL import Image

In [87]:
# To add your own Drive Run this cell.
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [88]:
######################################################################
# OneLayerNetwork
######################################################################

class OneLayerNetwork(torch.nn.Module):
    def __init__(self):
        super(OneLayerNetwork, self).__init__()

        ### ========== TODO : START ========== ###
        ### part d: implement OneLayerNetwork with torch.nn.Linear
        self.layer = torch.nn.Linear(784, 3)
        ### ========== TODO : END ========== ###

    def forward(self, x):
        # x.shape = (n_batch, n_features)

        ### ========== TODO : START ========== ###
        ### part d: implement the forward function
        outputs = self.layer(x)
        ### ========== TODO : END ========== ###
        return outputs

In [89]:
######################################################################
# TwoLayerNetwork
######################################################################

class TwoLayerNetwork(torch.nn.Module):
    def __init__(self):
        super(TwoLayerNetwork, self).__init__()
        ### ========== TODO : START ========== ###
        ### part g: implement TwoLayerNetwork with torch.nn.Linear
        self.layer_1 = torch.nn.Linear(784, 400)
        self.sig_1 = torch.nn.Sigmoid()
        self.layer_2 = torch.nn.Linear(400, 3) 
        ### ========== TODO : END ========== ###

    def forward(self, x):
        # x.shape = (n_batch, n_features)

        ### ========== TODO : START ========== ###
        ### part g: implement the forward function
        outputs = self.layer_2(self.sig_1(self.layer_1(x)))
        ### ========== TODO : END ========== ###
        return outputs

In [90]:
# load data from csv
# X.shape = (n_examples, n_features), y.shape = (n_examples, )
def load_data(filename):
    data = np.loadtxt(filename)
    y = data[:, 0].astype(int)
    X = data[:, 1:].astype(np.float32) / 255
    return X, y

In [91]:
# plot one example
# x.shape = (features, )
def plot_img(x):
    x = x.reshape(28, 28)
    img = Image.fromarray(x*255)
    plt.figure()
    plt.imshow(img)
    return

In [92]:
def evaluate_loss(model, criterion, dataloader):
    model.eval()
    total_loss = 0.0
    for batch_X, batch_y in dataloader:
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        total_loss += loss.item()
        
    return total_loss / len(dataloader)

In [93]:
def evaluate_acc(model, dataloader):
    model.eval()
    total_acc = 0.0
    for batch_X, batch_y in dataloader:
        outputs = model(batch_X)
        predictions = torch.argmax(outputs, dim=1)
        total_acc += (predictions==batch_y).sum()
        
    return total_acc / len(dataloader.dataset)

In [94]:
def train(model, criterion, optimizer, train_loader, valid_loader, n_epochs=30, verbose=False):
    train_loss_list = []
    valid_loss_list = []
    train_acc_list = []
    valid_acc_list = []
    for epoch in range(1, n_epochs+1):
        model.train()
        for batch_X, batch_y in train_loader:
            ### ========== TODO : START ========== ###
            ### part f: implement the training process
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()
            ### ========== TODO : END ========== ###
            
        train_loss = evaluate_loss(model, criterion, train_loader)
        valid_loss = evaluate_loss(model, criterion, valid_loader)
        train_acc = evaluate_acc(model, train_loader)
        valid_acc = evaluate_acc(model, valid_loader)
        train_loss_list.append(train_loss)
        valid_loss_list.append(valid_loss)
        train_acc_list.append(train_acc)
        valid_acc_list.append(valid_acc)

        if verbose:
          print(f"| epoch {epoch:2d} | train loss {train_loss:.6f} | train acc {train_acc:.6f} | valid loss {valid_loss:.6f} | valid acc {valid_acc:.6f} |")

    return train_loss_list, valid_loss_list, train_acc_list, valid_acc_list

In [95]:
######################################################################
# main
######################################################################

def main():

    # fix random seed
    np.random.seed(0)
    torch.manual_seed(0)

    # load data with correct file path

    ### ========== TODO : START ========== ###
    data_directory_path =  "/content/drive/My Drive/Colab Notebooks/CS 146/PS3"
    ### ========== TODO : END ========== ###

    # X.shape = (n_examples, n_features)
    # y.shape = (n_examples, )
    X_train, y_train = load_data(os.path.join(data_directory_path, "ps3_train.csv"))
    X_valid, y_valid = load_data(os.path.join(data_directory_path, "ps3_valid.csv"))
    X_test, y_test = load_data(os.path.join(data_directory_path, "ps3_test.csv"))

    def a():
      indices = np.array([np.random.choice(np.where(y_train==0)[0]), np.random.choice(np.where(y_train==1)[0]), np.random.choice(np.where(y_train==2)[0])])
      for i in indices:
        plot_img(X_train[i])

    ### part b: convert numpy arrays to tensors
    X_train = torch.from_numpy(X_train)
    y_train = torch.from_numpy(y_train)
    X_valid = torch.from_numpy(X_valid)
    y_valid = torch.from_numpy(y_valid)
    X_test = torch.from_numpy(X_test)
    y_test = torch.from_numpy(y_test)

    ### part c: prepare train_loader, valid_loader, and test_loader
    def make_data_loader(X, y):
      return DataLoader(TensorDataset(X, y), batch_size=10)
      
    train_loader = make_data_loader(X_train, y_train)
    valid_loader = make_data_loader(X_valid, y_valid)
    test_loader = make_data_loader(X_test, y_test)

    def modules_1(Optimizer=torch.optim.SGD):
      ### part e
      model_one = OneLayerNetwork()
      criterion = torch.nn.CrossEntropyLoss() 
      optimizer = Optimizer(params=model_one.parameters(), lr=5e-4)
      return model_one, criterion, optimizer

    def train_1(**kwargs):
      ### part f
      print("Start training OneLayerNetwork...")
      results_one = train(*modules_1(**kwargs), train_loader, valid_loader)
      print("Done!")
      return results_one
    
    def modules_2(Optimizer=torch.optim.SGD):
      model_two = TwoLayerNetwork()
      criterion = torch.nn.CrossEntropyLoss()
      optimizer = Optimizer(params=model_two.parameters(), lr=5e-4)
      return model_two, criterion, optimizer
    
    def train_2(**kwargs):
      ### part h
      print("Start training TwoLayerNetwork...")
      results_two = train(*modules_2(**kwargs), train_loader, valid_loader)
      print("Done!")
      return results_two

    def i(**kwargs):
      one_train_loss, one_valid_loss, one_train_acc, one_valid_acc = train_1(**kwargs)
      two_train_loss, two_valid_loss, two_train_acc, two_valid_acc = train_2(**kwargs)
      plt.figure()
      plt.plot(np.arange(len(one_train_loss)), one_train_loss, label="1-Layer Train Loss")
      plt.plot(np.arange(len(one_valid_loss)), one_valid_loss, label="1-Layer Valid Loss")
      plt.plot(np.arange(len(two_train_loss)), two_train_loss, label="2-Layer Train Loss")
      plt.plot(np.arange(len(two_valid_loss)), two_valid_loss, label="2-Layer Valid Loss")
      plt.title("Loss Over Training Epochs for 1 and 2-layer NN")
      plt.xlabel("Epoch")
      plt.ylabel("Cross-Entropy Loss")
      plt.legend()
    

    def j(**kwargs):
      one_train_loss, one_valid_loss, one_train_acc, one_valid_acc = train_1(**kwargs)
      two_train_loss, two_valid_loss, two_train_acc, two_valid_acc = train_2(**kwargs)
      plt.figure()
      plt.plot(np.arange(len(one_train_acc)), one_train_acc, label="1-Layer Train Accuracy")
      plt.plot(np.arange(len(one_valid_acc)), one_valid_acc, label="1-Layer Valid Accuracy")
      plt.plot(np.arange(len(two_train_acc)), two_train_acc, label="2-Layer Train Accuracy")
      plt.plot(np.arange(len(two_valid_acc)), two_valid_acc, label="2-Layer Valid Accuracy")
      plt.title("Accuracy Over Training Epochs for 1 and 2-layer NN")
      plt.xlabel("Epoch")
      plt.ylabel("Accuracy")
      plt.legend()
      

    def k(**kwargs):
      one_valid_acc = train_1(**kwargs)[3][-1]
      two_valid_acc = train_2(**kwargs)[3][-1]
      print(f'One-layer network accuracy: {round(float(one_valid_acc), 4)}')
      print(f'Two-layer network accuracy: {round(float(two_valid_acc), 4)}')
  
    def l():
      kwargs = {'Optimizer': torch.optim.Adam}
      i(**kwargs)
      j(**kwargs)
      k(**kwargs)

if __name__ == "__main__":
    main()