# Visualistion of Learning NN

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
# from tqdm import tqdm

import pathlib

sns.set_style('darkgrid')


import numpy as np
import argparse
import math
import os
import torch
import torch.optim as optim
import torch.utils.data as data
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms

import torch
from torchmetrics import Accuracy
from torchmetrics import ConfusionMatrix
from torchmetrics import F1Score

n_c = 10 # Number of classes
dims = 300 # Dimension
samp_size = 1000 # Sample Size
test_prop = 0.2 # Test Proportion

path = str(pathlib.Path().resolve())
path_train = path + '/syn_data' + '/' + str(n_c) + 'c' + str(dims) + 'd' + str(samp_size) + 's' + '_train' + '.csv'
path_test = path + '/syn_data' + '/' + str(n_c) + 'c' + str(dims) + 'd' + str(samp_size)+ 's' + '_test' + '.csv'
train = pd.read_csv(path_train, index_col=0)
test = pd.read_csv(path_test, index_col=0)

def csv_torch(df):

    # Creating Tuples
    cols = (df.columns)
    rows = np.unique(df.index)

    samps = []

    for i in rows:
        s  = df.loc[int(i),:]
        for j in cols:
            t = torch.Tensor(np.array(s[str(j)]))
            tup = (t, int(i))
            samps.append(tup)
    return samps

dat_train = csv_torch(train)
dat_test = csv_torch(test)

trainset = torch.utils.data.DataLoader(dat_train, batch_size = 10, shuffle = True)
testset = torch.utils.data.DataLoader(dat_test, batch_size = 10, shuffle = True)

In [2]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(dims, 300) # Input Layer
        self.fc2 = nn.Linear(300, 300) # Hidden Layer 1
        self.fc3 = nn.Linear(300, 300) # Hidden Layer 2
        self.fc4 = nn.Linear(300, n_c) # Output Layer
        self.dropout = nn.Dropout(0.2)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return F.log_softmax(x, dim=1) # We want to sum the classes - across columns

net = Net()
print(net)

Net(
  (fc1): Linear(in_features=300, out_features=300, bias=True)
  (fc2): Linear(in_features=300, out_features=300, bias=True)
  (fc3): Linear(in_features=300, out_features=300, bias=True)
  (fc4): Linear(in_features=300, out_features=10, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)


In [3]:
import torch.optim as optim

loss_function = nn.CrossEntropyLoss() # nn.MSELoss() <- is also an option
optimizer = optim.Adam(net.parameters(), lr=1e-3)

In [4]:
EPOCHS = 3

for epoch in range(EPOCHS): # 3 full passes over the data
    for data in trainset:  # `data` is a batch of data
        X, y = data  # X is the batch of features, y is the batch of targets.
        net.zero_grad()  # sets gradients to 0 before loss calc. You will do this likely every step.
        output = net(X.view(-1,dims))  # pass in the reshaped batch (recall they are 28x28 atm)
        loss = F.nll_loss(output, y)  # calc and grab the loss value
        loss.backward()  # apply this loss backwards thru the network's parameters
        optimizer.step()  # attempt to optimize weights to account for loss/gradients
    print(loss)  # print loss. We hope loss (a measure of wrong-ness) declines! 

tensor(0.5493, grad_fn=<NllLossBackward0>)
tensor(0.0408, grad_fn=<NllLossBackward0>)
tensor(0.0095, grad_fn=<NllLossBackward0>)


In [5]:
a = 4

In [1]:
def get_acc(data):
    target =  [] 
    preds =  [] 

    with torch.no_grad():
        for data in testset:
            X, y = data
            output = net(X.view(-1,dims))
            for idx, i in enumerate(output):
                preds.append(int(torch.argmax(i)))
                target.append(int(y[idx]))

    preds = torch.tensor(preds)
    target = torch.tensor(target)

    accuracy = Accuracy()
    a = accuracy(preds, target)
    return a

In [5]:
def fwd_pass(X, y, train = False): # By default, we will not update weights
    if train:
        net.zero_grad()

    outputs = net(X)
    matches = [torch.argmax(i) == torch.argmax(j) for i,j in zip(outputs,y)] # Comparng the argmax of both of those vectors when they are the same, it will be true, otherwise false
    acc = matches.count(True)/len(matches)
    loss = loss_function(outputs, y)

    if train:
        loss.backward()
        optimizer.step()
    return acc, loss

def test(data, size=32):
    test_X, test_y = data[0], data[1]
    random_start = np.random.randint(len(test_X) - size)
    X, y = test_X[random_start:random_start+size], test_y[random_start:random_start+size]
    with torch.no_grad():
        val_acc, val_loss = fwd_pass(X.view(-1, dims),y) #.to(device), y.to(device) for cloud computing
    return val_acc, val_loss


In [6]:
import time

MODEL_NAME = f"model-{int(time.time())}"  # gives a dynamic model name, to just help with things getting messy over time. 

net = Net() # .to(device) # <- for cloud GPU

def train(train, trest, net):
    train_X, train_y = train
    test_X, train_y = test
    BATCH_SIZE = 10
    EPOCHS = 3


    for epoch in range(EPOCHS):
        for i in tqdm(range(0, len(train_X), BATCH_SIZE)):
            batch_X = train_X[i:i+BATCH_SIZE] # .view(-1,1,50,50)
            batch_y = train_y[i:i+BATCH_SIZE]

            batch_X, batch_y = batch_X, batch_y # batch_X.to(device), batch_y.to(device)

            acc, loss = fwd_pass(batch_X, batch_y, train=True)

            if i % 50 == 0:
                val_acc, val_loss = test(size = 100)
                print(f"{MODEL_NAME},{round(time.time(),3)},{round(float(acc),2)},{round(float(loss),4)},{round(float(val_acc),2)},{round(float(val_loss),4)}\n")

train(trainset, testset, net)

ValueError: too many values to unpack (expected 2)

In [None]:

for epoch in range(EPOCHS): # 3 full passes over the data
    for data in trainset:  # `data` is a batch of data
        X, y = data  # X is the batch of features, y is the batch of targets.
        net.zero_grad()  # sets gradients to 0 before loss calc. You will do this likely every step.
        output = net(X.view(-1,dims))  # pass in the reshaped batch (recall they are 28x28 atm)
        loss = F.nll_loss(output, y)  # calc and grab the loss value
        loss.backward()  # apply this loss backwards thru the network's parameters
        optimizer.step()  # attempt to optimize weights to account for loss/gradients
    print(loss)  # print loss. We hope loss (a measure of wrong-ness) declines! 