In [2]:
import numpy as np
import matplotlib.pyplot as plt
from numpy import linalg as LA
import plotly
import plotly.graph_objects as go
import random, time
from collections import Counter
import os
from sklearn.model_selection import train_test_split
import pandas as pd
def plot_costs(costs):
    plt.plot(costs)
    plt.xlabel("iterations")
    plt.ylabel("cost")
    plt.show()
    
# function to plot costs
def plot_grads(grads):
    plt.plot(grads)
    plt.xlabel("iterations")
    plt.ylabel("gradient norm")
    plt.show()

# defining a function to plot data
def plot_data(X,Y,size_ = 2):
    m = len(X)
    plot_figure = go.Figure(data=[go.Scatter3d(x=X[:,0], y=X[:,1], z=[r[0] for r in Y], mode='markers',marker=dict(size=size_))])
    plotly.offline.iplot(plot_figure)

# defining a function to plot models fit
def plot_fit(X,Y,W,B,G,size_ = 2):
    trace = go.Scatter3d(x=X[:,0], y=X[:,1], z=[r[0] for r in Y], mode='markers',marker=dict(size=size_))
    xs,ys = X[:,0],X[:,1]
    xxx = np.outer(np.linspace(min(xs), max(xs), 30), np.ones(30))
    yyy = np.outer(np.linspace(min(ys), max(ys), 30), np.ones(30)).T
    zzz = np.zeros([30,30])
    D = len(G)-1
    for i in range(30):
        for j in range(30):
            zzz[i,j] = feedforward(W,B,G,np.array([xxx[i,j],yyy[i,j]]))[D][0]
    # Configure the layout.
    layout = go.Layout(margin={'l': 0, 'r': 0, 'b': 0, 't': 0})
    data = [trace,go.Surface(x=xxx, y=yyy, z=zzz, showscale=False, opacity=0.5)]
    # Render the plot.
    plot_figure = go.Figure(data=data, layout=layout)
    plot_figure.update_layout(
        scene = dict(
            xaxis = dict(nticks=4, range=[min(X[:,0]),max(X[:,0])],),
            yaxis = dict(nticks=4, range=[min(X[:,1]),max(X[:,1])],),
            zaxis = dict(nticks=4, range=[min(Y),max(Y)],),),
        width=700,
        margin=dict(r=20, l=10, b=10, t=10))
    plotly.offline.iplot(plot_figure)


In [3]:
os.getcwd()

'/Users/guoyihan/Documents/GitHub/MAT_180_ML_Projects/machine-learning-assisted-khovanov-homology/notebooks'

In [4]:
os.chdir("../")

In [5]:
df = pd.read_csv("data/50000_9_clean.csv")

In [220]:
import math
def ReLU(x,deriv = False):
    ######################### your code goes here ########################
    if deriv == True:
        r = len(x)
        x1 = np.zeros([r,r])
        for i in range(len(x1)):
            if x[i] > 0:
                x1[i,i] = 1
    else:
        x1 = []
        for i in range(len(x)):
                if x[i] > 0: 
                    x1.append(x[i])
                else:
                    x1.append(0)
    return x1
def Linear(x,deriv = False):
    ######################### your code goes here ########################
    row = len(x)
    if deriv == True:
        m = np.identity(row)
    else:
        m = x
    return m
def Sigmoid(x,deriv = False):
    ######################### your code goes here ########################
    s = np.empty(x.shape)
    r = len(x)
    for i in range(len(x)):
        s[i] = 1/(1+math.exp(-x[i]))
    if deriv == True:
        S = np.empty([r,r])
        for i in range(len(S)):
            for j in range(len(S[0])):
                if i == j:
                    S[i,j] = math.exp(-x[i])/(1/s[i])**2
                else:
                    S[i,j] = 0
    else:
        S = s
    return S
def Squared(x,deriv = False):
    ######################### your code goes here ########################
    if deriv == True:
        X = np.diag(2*x)
    else:
        X = x**2
    return X
def Softmax(x,deriv = False):
    ######################### your code goes here ########################
    G = np.exp(x)/sum(np.exp(x))      
    if deriv == True:
        G1 = np.diag(G) - np.outer(G,G)
    else:
        G1 = G
    return G1
# Note: Nx and y are always numpy arrays (for 'bce' they always have only one entry)
# when deriv = False the output must be a number and when deriv = True the output must be a vector
def loss(Nx,y,cost_type,deriv = False):
    if cost_type == 'se' and deriv == False:
        L = (Nx - y).T @ (Nx - y)
    if cost_type == 'se' and deriv == True:
        L = 2 * (Nx - y)
    if cost_type == 'ce' and deriv == False:
        L = - y.T @ np.log(Nx)
    if cost_type == 'ce' and deriv == True:
        L = - y.T @ np.diag(1 / Nx)
    if cost_type == 'bce' and deriv == False:
        L = (y - 1) * np.log(1 - Nx) - y * np.log(Nx) 
    if cost_type == 'bce' and deriv == True:
        L = (1 - y) / (1 - Nx) - y / Nx
    return L

In [221]:
def feedforward(W,B,G,x):
    ######################### your code goes here ########################
    feeds = []
    for i in range(len(G)-1):
        if i == 0:
            feeds.append([G[0](x),W[0]@x+B[0]])
        else:
            xi =G[i](feeds[i-1][1])
            feeds.append([xi,W[i]@xi+B[i]])
    xl = G[-1](feeds[-1][1])
    feeds.append(xl)
    return feeds

In [222]:
def deltas(X_feeds,Y,W,B,G,verbose = False, cost_type = 'se'):
    ######################### your code goes here ########################
    D = len(G)-1
    deltas_dict = {}
    for i in X_feeds.keys():
        deltas_dict[i] = [loss(X_feeds[i][-1],Y[i],cost_type,deriv = True) @ G[D](X_feeds[i][D-1][1],deriv = True)]
        for j in range(D-1,0,-1):
                deltas_dict[i].append(deltas_dict[i][-1]@W[j]@G[j](X_feeds[i][j-1][1],deriv = True))
        deltas_dict[i].reverse()
    return deltas_dict

In [223]:
def grads(X,Y,W,B,G,batch, lambda_ = 0, verbose = False,cost_type = 'se'):
    ######################### your code goes here ########################
    D = len(W)
    Ba = len(batch)
    dWs = []
    dBs = []
    X_feeds = {}
    for i in batch:
        X_feeds[i] = feedforward(W,B,G,X[i])
    X_deltas = deltas(X_feeds,Y,W,B,G,verbose,cost_type)
    for l in range(D):
        dw = sum([np.outer(X_deltas[i][l],X_feeds[i][l][0]) for i in batch])
        db = sum([X_deltas[i][l] for i in batch])
        dBs.append(db/Ba)
        dWs.append(dw/Ba+2*lambda_*W[l])
    return dWs,dBs,X_feeds

In [224]:
def fit(X,Y,arch,G,alpha = 1e-9, momentum = .01, batch_size = 100, 
        lambda_ = 0, max_iters = 100,verbose = False, cost_type = 'se',print_costs = True):
    ######################### your code goes here ########################
    W, B, VW, VB = [], [], [], []
    costs, grad_norms = [], []
    D,m = len(G)-1,len(X)
    epochs = 0  
    prev = X.shape[1]
    for l in range(D):
        W.append(np.random.default_rng().normal(loc=0, scale = 2 / (prev + arch[l]), size = (arch[l],prev)))
        r,c = W[l].shape
        B.append(np.zeros(arch[l]))
        VW.append(np.zeros([r,c]))
        VB.append(np.zeros(B[l].shape))
        prev = arch[l]

    while epochs<=max_iters:
        batch = random.sample(range(m),batch_size)
        dWs,dBs,feeds = grads(X,Y,W,B,G,batch, lambda_ , verbose,cost_type)
        grad_norms.append(sum([ LA.norm(dw) for dw in dWs]) + sum([ LA.norm(db) for db in dBs ]))
        costs.append(sum([ loss( feeds[i][D][0], Y[i], cost_type ) for i in batch]) / batch_size)
        if epochs%(np.floor(max_iters/30))==0 and print_costs:
            print(f'epoch: {epochs}')
            print(f'           cost: {costs[-1]}')
        for l in range(D):
            VW[l] = momentum*VW[l]-alpha*dWs[l]
            VB[l] = momentum*VB[l]-alpha*dBs[l]
            W[l] = W[l]+VW[l]
            B[l] = B[l]+VB[l]
        epochs += 1
    return W,B,costs,grad_norms

In [7]:
def predict(W,B,G,x,output_type = 'vector'):
    ######################### your code goes here ########################
    if output_type == 'vector':
        p = np.argmax(feedforward(W,B,G,x)[-1])
        y = np.zeros(feedforward(W,B,G,x)[-1].shape)
        y[p] = 1
    else:
        y = np.argmax(feedforward(W,B,G,x)[-1])
    return y

In [7]:
df = df.drop("Unnamed: 0", axis = 'columns')

In [11]:
find_max_column = float('-inf')
find_min_column = float('inf')
for index, row in df.iterrows():
    #print(a_row.free_part)
    for key in eval(row.free_part):
       #print(a_key[0])
        if key[1] < find_min_column:
            find_min_column = key[1]
        if key[1] > find_max_column:
            find_max_column = key[1]

find_max_column, find_min_column

(9, -9)

In [12]:
find_max_row = float('-inf')
find_min_row = float('inf')
for index, row in df.iterrows():
    #print(a_row.free_part)
    for key in eval(row.free_part):
        #print(a_key[0])
        if key[0] < find_min_row:
            find_min_row = key[0]
        if key[0] > find_max_row:
            find_max_row = key[0]

find_max_row, find_min_row

(25, -25)

In [13]:
find_max_column = float('-inf')
find_min_column = float('inf')
for index, row in df.iterrows():
    #print(a_row.free_part)
    for key in eval(row.torsion_part):
       #print(a_key[0])
        if key[1] < find_min_column:
            find_min_column = key[1]
        if key[1] > find_max_column:
            find_max_column = key[1]

find_max_column, find_min_column

(9, -8)

In [14]:
find_max_row = float('-inf')
find_min_row = float('inf')
for index, row in df.iterrows():
    #print(a_row.free_part)
    for key in eval(row.torsion_part):
        #print(a_key[0])
        if key[0] < find_min_row:
            find_min_row = key[0]
        if key[0] > find_max_row:
            find_max_row = key[0]

find_max_row, find_min_row

(23, -23)

In [37]:
def gen_matrix_FP(X,col, row):
    row_m = len(X)
    col_m = col*row
    m = np.zeros([row_m,col_m])
    for i in range(len(X)):
        b = []
        c = []
        a = np.zeros([row,col])
        for key,value in eval(X[i]).items():
            b.append(key[0])
            c.append(key[1])
            a[b[-1],c[-1]] =  value
        a = a.flatten()
        m[i,:] = a
    return m

In [48]:
def gen_matrix_TP(X,col, row):
    row_m = len(X)
    col_m = col*row
    m = np.zeros([row_m,col_m])
    for i in range(len(X)):
        b = []
        c = []
        a = np.zeros([row,col])
        for key,value in eval(X[i]).items():
            b.append(key[0])
            c.append(key[1])
            a[b[-1],c[-1]] =  value[2]
        a = a.flatten()
        m[i,:] = a
    return m

In [35]:
matrix_free_part = gen_matrix_FP(df.free_part,19,51)
len(matrix_free_part),len(matrix_free_part[0])

(1081, 969)

In [49]:
matrix_free_torsion = gen_matrix_TP(df.torsion_part,18,47)
len(matrix_free_torsion),len(matrix_free_torsion[0])

(1081, 846)