In [None]:
############   IMPORTS AND CONFIG   ############   

import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import bernoulli
import pandas as pd
import random
import seaborn as sns
from functools import reduce

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random

# Make numpy printouts easier to read.
np.set_printoptions(precision=3, suppress=True)

# prettier Matplotlib.
#import matplotlib.style as style
#style.use('seaborn-white') #sets the size of the charts
plt.style.use('ggplot')
%config InlineBackend.figure_format = 'retina'
%matplotlib inline

In [None]:
NUM_ITEMS = 4
NUM_FEATURES =  7

features = ['Grow', 'Move', 'Roots', 'Fly', 'Swim', 'Leaves', 'Petals']
items = ['Canary', 'Salmon', 'Oak', 'Rose']
#dimensions = ['Base', 'Plant-Animal', 'Tree-Flower', 'Bird-Fish']

train_input = np.identity(NUM_ITEMS)
train_output = np.array([[1, 1, 0, 1, 0, 0, 0], [1, 1, 0, 0, 1, 0, 0], [1, 0, 1, 0, 0, 1, 0], [1, 0, 1, 0, 0, 0, 1]]).T
train_output = train_output.astype('int64')
train_output = train_output
a = pd.DataFrame(train_output.T, index = items)
a.columns = features
#dfi.export(a, 'toy_dataset.pdf')
#sns.heatmap(np.corrcoef(a), cmap = 'bwr', vmin = 0, vmax = 1,center =0, linecolor='black', linewidths=0.5, square = True, xticklabels= items, yticklabels= items)
#plt.xlabel('Items', weight = 'bold')
#plt.ylabel('Items', weight = 'bold')
#plt.savefig('toycovariance.pdf', bbox_inches = 'tight', pad_inches = 0)

In [None]:
sigma_31 = train_output @ train_input.T 
u,s,v_T = np.linalg.svd(sigma_31, full_matrices= False)

fig, axes = plt.subplots(nrows = 1, ncols = 4, figsize = (10,6))
sns.heatmap(sigma_31, cmap = 'bwr', vmin = -1, vmax = 1, square = True,cbar = False, linewidths = 1, linecolor = 'black', annot = False,fmt = '.2f', ax = axes[0], yticklabels= features, xticklabels = items)
sns.heatmap(-u, cmap = 'bwr', vmin = -1, vmax = 1, square = True, cbar = False, linewidths = 1, linecolor = 'black', annot = False,fmt = '.2f', ax = axes[1], yticklabels= features)
sns.heatmap(np.diag(s), cmap = 'bwr', square = True, vmin = 0, cbar = False, linewidths = 1, linecolor = 'black', annot = False,fmt = '.2f',  ax = axes[2], center = 0)
sns.heatmap(-v_T, cmap = 'bwr', vmin = -1, vmax = 1, square = True, cbar = False, linewidths = 1, linecolor = 'black', annot = False,fmt = '.2f', ax = axes[3], xticklabels = items)

axes[0].set_ylabel('Modes', weight = 'bold')
axes[0].set_xlabel('Items', weight = 'bold')
axes[0].set_title('$\Sigma_{yx}$', weight = 'bold', fontsize = 20)

axes[1].set_ylabel('Features', weight = 'bold')
axes[1].set_xlabel('Modes', weight = 'bold')
axes[1].set_title('$U$', weight = 'bold', fontsize = 20)
            
axes[2].set_ylabel('Modes', weight = 'bold')
axes[2].set_xlabel('Modes', weight = 'bold')
axes[2].set_title('$S$', weight = 'bold', fontsize = 20)

axes[3].set_ylabel('Modes', weight = 'bold')
axes[3].set_xlabel('Items', weight = 'bold')
axes[3].set_title('$V^T$', weight = 'bold', fontsize = 20)

fig.tight_layout()
plt.savefig('svd.pdf', bbox_inches = 'tight', pad_inches = 0)
print(s)

In [None]:
HIDDEN_UNITS = 4

class Activation(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self, x):
        return x

class FeedForwardNet(nn.Module):
    def __init__(self, num_hidden_layers, activation = Activation(), net_input_size = NUM_ITEMS, net_output_size = NUM_FEATURES, num_hidden_units = HIDDEN_UNITS):
        super(FeedForwardNet, self).__init__()
        self.net_input_size = net_input_size
        self.net_output_size = net_output_size
        self.num_hidden_layers = num_hidden_layers
        self.net_input_size = net_input_size
        self.num_hidden_units = num_hidden_units
        self.activation = activation
        self.modes = []
        self.illusory = []
    
        if num_hidden_layers == 0:
            self.model = torch.nn.Sequential(torch.nn.Linear(self.net_input_size, self.net_output_size, bias = False), activation)
        else:  
            self.model = torch.nn.Sequential(torch.nn.Linear(self.net_input_size, self.num_hidden_units, bias = False), activation)
            for i in range(self.num_hidden_layers - 1):
                self.model = torch.nn.Sequential(self.model, nn.Linear(self.num_hidden_units, self.num_hidden_units, bias = False), activation)
            self.model = torch.nn.Sequential(self.model, nn.Linear(self.num_hidden_units, self.net_output_size, bias = False), activation)
        
        self.model.apply(self.normal_init)
    
    def normal_init(self, m):
        if isinstance(m, nn.Linear):
            m.weight.data.normal_(0, 0.01)
   
    def forward(self, x):
        output = self.model(x)
        return output
    

In [None]:
illusory_activation = [] 
criterion = nn.MSELoss()

def compose(l):
    return(l[-1]  @ compose(l[:-1]))

def learn_properties(model, max_epochs, step_size, optimiser, inp_ = train_input, out_ = train_output, momentum = 0, x = train_input[0]):
    random.seed(123)
    model.modes = []
    model.illusory = []
    if optimiser == 'SGD': 
        sgd_optim = optim.SGD(model.parameters(), lr = step_size, momentum = momentum)
        for epoch in range(max_epochs):
            for item_count in range(NUM_ITEMS):
                outputs = model(torch.Tensor(inp_[item_count]))
                loss = criterion(outputs, torch.Tensor(out_.T[item_count]))
                weight_list = [torch.Tensor(w).detach().numpy() for w in model.parameters()]
                weight_compose = reduce((lambda x, y: x @ y), reversed(weight_list))
                modes = np.diag(u.T @ weight_compose @ (v_T).T)
                illusory = weight_compose @ x
                model.modes.append(modes)
                model.illusory.append(illusory)
                sgd_optim.zero_grad()
                loss.backward()
                sgd_optim.step()  

In [None]:
fig, axes = plt.subplots(nrows = 1, ncols = 2, figsize = (10,4))
storeelu = storerelu = storetanh = []
for i in range(500):
    kk = FeedForwardNet(num_hidden_layers = 1, activation = nn.ELU())
    deep_relu_3_net = FeedForwardNet(num_hidden_layers = 1, activation = nn.ReLU())
    deep_tanh_3_net = FeedForwardNet(num_hidden_layers = 1, activation = nn.Hardtanh())
    learn_properties(model = kk, max_epochs = 250, optimiser = 'SGD', step_size = 1/(NUM_ITEMS)) 
    learn_properties(model = deep_relu_3_net, max_epochs = 250, optimiser = 'SGD', step_size = 1/(NUM_ITEMS)) 
    learn_properties(model = deep_tanh_3_net, max_epochs = 250, optimiser = 'SGD', step_size = 1/(NUM_ITEMS))
    b = np.reshape(deep_relu_3_net.modes, (len(deep_relu_3_net.modes), NUM_ITEMS))
    c = np.reshape(deep_tanh_3_net.modes, (len(deep_tanh_3_net.modes), NUM_ITEMS))
    ee = np.reshape(kk.modes, (len(kk.modes), NUM_ITEMS))
    storerelu.append(b[b.shape[0]-1])
    storeelu.append(ee[ee.shape[0]-1])
    storetanh.append(c[c.shape[0]-1])
    if i == 0:
        axes[0].plot(b[:,0], color = 'red', alpha = 0.3, label = 'ReLU')
        axes[1].plot(c[:,0], color = 'red', alpha = 0.6, label = 'Tanh')
        axes[1].plot(ee[:,0], color = 'green', alpha = 0.3, label = 'ELU')
    else: 
        axes[0].plot(b[:,0], color = 'red', alpha = 0.3)
        axes[1].plot(c[:,0], color = 'red', alpha = 0.6)
        axes[1].plot(ee[:,0], color = 'green', alpha = 0.3)
    for j in range(1,4):
        axes[0].plot(b[:,j], color = 'red', alpha = 0.3)
        axes[1].plot(c[:,j], color = 'red', alpha = 0.6)
        axes[1].plot(ee[:,j], color = 'green', alpha = 0.3)
    
axes[0].legend(loc = 'best')
axes[1].legend(loc = 'best')
for (m), subplot in np.ndenumerate(axes):
    subplot.set_xlabel('Time (Examples)', weight = 'bold')
    subplot.set_ylabel('Effective Singular Value', weight = 'bold')

fig.tight_layout()
plt.savefig('777.pdf', bbox_inches = 'tight', pad_inches = 0)

In [None]:
##### 3LLN vs Shallow #####

new_t = np.linspace(0, 800, 800)
fig, axes = plt.subplots(nrows = 1, ncols = 2, figsize = (10,4))
for i in range(10):
    deep_linear_3_net = FeedForwardNet(num_hidden_layers = 1)  ## Inefficient, but resets the architecture of the neural network everytime
    shallow_net = FeedForwardNet(num_hidden_layers = 0)
    #deep_tanh_3_net = FeedForwardNet(num_hidden_layers = 1, activation = nn.ReLU())
    learn_properties(model = deep_linear_3_net, max_epochs = 200, optimiser = 'SGD', step_size = 1/NUM_ITEMS) 
    #learn_properties(model = deep_tanh_3_net, max_epochs = 600, optimiser = 'SGD', step_size = 1/(NUM_ITEMS))
    learn_properties(model = shallow_net, max_epochs = 200, optimiser = 'SGD', step_size = 1/NUM_ITEMS) 
    b = np.reshape(deep_linear_3_net.modes, (len(deep_linear_3_net.modes), NUM_ITEMS))
    #c = np.reshape(deep_tanh_3_net.modes, (len(deep_tanh_3_net.modes), NUM_ITEMS))
    a = np.reshape(shallow_net.modes, (len(shallow_net.modes), NUM_ITEMS))
    if i == 0:
        axes[0].plot(b[:,0], color = 'red', alpha = 0.3, label = 'Simulated')
        axes[1].plot(a[:,0], color = 'red', alpha = 0.3, label = 'Simulated')
    else: 
        axes[0].plot(b[:,0], color = 'red', alpha = 0.3)
        axes[1].plot(a[:,0], color = 'red', alpha = 0.3)
    for j in range(1,4):
        axes[0].plot(b[:,j], color = 'red', alpha = 0.3)
        axes[1].plot(a[:,j], color = 'red', alpha = 0.3)
    if i == 0:    
        axes[0].plot(new_t, original_sol(s)[0], color = 'blue', alpha = 1, label = 'Exact')
        axes[1].plot(new_t, shallow_sol(s[0],new_t, 0.01, tau = 50), color = 'blue', alpha = 1, label = 'Exact')
    else:
        axes[0].plot(new_t, original_sol(s)[1], color = 'blue', alpha = 1)
        axes[0].plot(new_t, original_sol(s)[2], color = 'blue', alpha = 1)
        axes[0].plot(new_t, original_sol(s)[3], color = 'blue', alpha = 1)
        axes[1].plot(new_t, shallow_sol(s[1],new_t, 0.01, tau = 50), color = 'blue', alpha = 1)
        axes[1].plot(new_t, shallow_sol(s[2],new_t, 0.01, tau = 50), color = 'blue', alpha = 1)
        axes[1].plot(new_t, shallow_sol(s[3],new_t, 0.01, tau = 50), color = 'blue', alpha = 1)
    
axes[0].legend(loc = 'best')
axes[1].legend(loc = 'best')
for (m), subplot in np.ndenumerate(axes):
    subplot.set_xlabel('Time (Examples)', weight = 'bold')
    subplot.set_ylabel('Effective Singular Value', weight = 'bold')

fig.tight_layout()
plt.savefig('trajec.pdf', bbox_inches = 'tight', pad_inches = 0)

In [None]:
###### SECTION ON MOMENTUM ######
from scipy.integrate import odeint
np.random.seed(123)

def shallow_sol(s, t, b_0, tau):
    return(s*(1 - np.exp(-t/tau)) + b_0*np.exp(-t/tau))


def original_ode(w, t, p):
    alpha, s = p
    c, d = w
    f = [d*(s-c*d), c*(s-c*d)]
    return(f)

def momentum_ode(w, t, p):
    alpha, beta, s = p
    c, u, d, v = w
    m = beta/(alpha)
    mu = (1-beta)/(alpha)
    #f = [u, -c2*u/c1 + 4*d*(s-c*d)/c1, v, -c2*v/c1 + 4*c*(s-c*d)/c1]
    f = [u, -mu*u/m + 4*d*(s-c*d)/m, v, -mu*v/m + 4*c*(s-c*d)/m]
    return(f)

w0 = [np.random.normal(0,0.01,1)[0], 0, np.random.normal(0,0.01,1)[0], 0]
i0 = [np.random.normal(0,0.01,1)[0], np.random.normal(0,0.01,1)[0]]

t = np.linspace(0, 16, num = 800)
t2 = np.linspace(0, 44, num = 300)

def find_trajectory(beta, time, svec = s, alpha = 1/(NUM_ITEMS)):
    mode_dynamics = []
    for s in svec:
        p = [alpha, beta, s]
        wsol = odeint(momentum_ode, w0, time, args=(p,))
        mode_dynamics.append(wsol[:,0]*wsol[:,2])
    return(mode_dynamics)

def original_sol(svec = s, alpha = 1/NUM_ITEMS):
    mode_dynamics = []
    for s in svec:
        p = [alpha, s]
        wsol = odeint(original_ode, i0, t, args = (p,))
        mode_dynamics.append(wsol[:,0]*wsol[:,1])
    return(mode_dynamics)

In [None]:
new_t = np.linspace(0, 10, 800)
new_t2 = np.linspace(0, 3.75, 300)
fig, axes = plt.subplots(nrows = 1, ncols = 2, figsize = (10,4))
for i in range(30):
    deep_linear_3_net = FeedForwardNet(num_hidden_layers = 1)  ## Inefficient, but resets the architecture of the neural network everytime
    deep_linear_3_net_momentum = FeedForwardNet(num_hidden_layers = 1)
    learn_properties(model = deep_linear_3_net, max_epochs = 200, optimiser = 'SGD', step_size = 1/(NUM_ITEMS), momentum = 0) 
    learn_properties(model = deep_linear_3_net_momentum, max_epochs = 75,  optimiser = 'SGD', step_size = 1/(NUM_ITEMS), momentum = 0.85) 
    e = np.reshape(deep_linear_3_net.modes, (len(deep_linear_3_net.modes), NUM_ITEMS))
    r = np.reshape(deep_linear_3_net_momentum.modes, (len(deep_linear_3_net_momentum.modes), NUM_ITEMS))
    for k in range(NUM_ITEMS):
        if (k == 0) and (i == 0):
            axes[0].plot(new_t, e[:,k], color = 'red', alpha = 0.4, label = 'Simulated')
            axes[1].plot(new_t2, r[:,k], color = 'red', alpha = 0.4, label = 'Simulated')
        else:
            axes[0].plot(new_t, e[:,k], color = 'red', alpha = 0.4)
            axes[1].plot(new_t2, r[:,k], color = 'red', alpha = 0.4)
   
#Plot the analytical curves#

for k in range(NUM_ITEMS):
    if k == 0:
        axes[0].plot(new_t, original_sol(s)[k], color = 'blue', alpha = 1, label = 'Exact')
        axes[1].plot(new_t2, find_trajectory(beta = 0.85, svec = s)[k], color = 'blue', alpha = 0.5, label = 'Exact')
    else:
        axes[1].plot(new_t2, find_trajectory(beta = 0.85, svec = s)[k], color = 'blue', alpha = 0.5)
        axes[0].plot(new_t, original_sol(s)[k], color = 'blue', alpha = 1)
axes[0].legend(loc = 'best')
axes[1].legend(loc = 'best')
for (m), subplot in np.ndenumerate(axes):
    subplot.set_xlabel('Time (Epochs)', weight = 'bold')
    subplot.set_ylabel('Effective Singular Value', weight = 'bold')

fig.tight_layout()
plt.savefig('momentum85.pdf', bbox_inches = 'tight', pad_inches = 0)

In [None]:
t3 = np.linspace(0, 16, num = 300)
t4 = np.linspace(0, 22, num = 300)
new_t2 = np.linspace(0, 3.75, 300)
fig, axes = plt.subplots(nrows = 1, ncols = 2, figsize = (10,4))
for i in range(30):
    deep_linear_3_net_momentum2 = FeedForwardNet(num_hidden_layers = 1)  ## Inefficient, but resets the architecture of the neural network everytime
    deep_linear_3_net_momentum3 = FeedForwardNet(num_hidden_layers = 1)
    learn_properties(model = deep_linear_3_net_momentum2, max_epochs = 75, optimiser = 'SGD', step_size = 1/(NUM_ITEMS), momentum = 0.5) 
    learn_properties(model = deep_linear_3_net_momentum3, max_epochs = 75,  optimiser = 'SGD', step_size = 1/(NUM_ITEMS), momentum = 0.65) 
    e = np.reshape(deep_linear_3_net_momentum2.modes, (len(deep_linear_3_net_momentum2.modes), NUM_ITEMS))
    r = np.reshape(deep_linear_3_net_momentum3.modes, (len(deep_linear_3_net_momentum3.modes), NUM_ITEMS))
    for k in range(NUM_ITEMS):
        if (k == 0) and (i == 0):
            axes[0].plot(new_t2, e[:,k], color = 'red', alpha = 0.4, label = 'Simulated')
            axes[1].plot(new_t2, r[:,k], color = 'red', alpha = 0.4, label = 'Simulated')
        else:
            axes[0].plot(new_t2, e[:,k], color = 'red', alpha = 0.4)
            axes[1].plot(new_t2, r[:,k], color = 'red', alpha = 0.4)
   
#Plot the analytical curves#

for k in range(NUM_ITEMS):
    if k == 0:
        axes[0].plot(new_t2, find_trajectory(beta = 0.05, time = t3 )[k], color = 'blue', alpha = 1, label = 'Exact')
        axes[1].plot(new_t2, find_trajectory(beta = 0.10, time = t4)[k], color = 'blue', alpha = 1, label = 'Exact')
    else:
        axes[0].plot(new_t2, find_trajectory(beta = 0.05, time = t3)[k], color = 'blue', alpha = 1)
        axes[1].plot(new_t2, find_trajectory(beta = 0.10,  time = t4)[k], color = 'blue', alpha = 1)
axes[0].legend(loc = 'best')
axes[1].legend(loc = 'best')
for (m), subplot in np.ndenumerate(axes):
    subplot.set_xlabel('Time (Epochs)', weight = 'bold')
    subplot.set_ylabel('Effective Singular Value', weight = 'bold')
fig.tight_layout()
plt.savefig('momcritover.pdf', bbox_inches = 'tight', pad_inches = 0)

In [None]:
stat1 = []
stat2 = []
fig, axes = plt.subplots(nrows = 1, ncols = 2, figsize = (10,4))

for m, subplot in np.ndenumerate(axes):
    subplot.set_xlabel('Time (Examples)', weight = 'bold')
    subplot.set_ylabel('Activation', weight = 'bold')

for i in range(1):    
    deep_linear_3_net = FeedForwardNet(num_hidden_layers = 1)
    learn_properties(model = deep_linear_3_net, max_epochs = 150,  optimiser = 'SGD', step_size = 1/(NUM_ITEMS), momentum = 0)

    deep_linear_3_net_momentum = FeedForwardNet(num_hidden_layers = 1)
    learn_properties(model = deep_linear_3_net_momentum, max_epochs = 150,  optimiser = 'SGD', step_size = 1/(NUM_ITEMS), momentum = 0.85, x= train_input[0]+np.random.normal(loc = 0, scale = 0.1, size =4))

    deep_linear_3_net.illusory = np.reshape(deep_linear_3_net.illusory, (len(deep_linear_3_net.illusory), NUM_FEATURES))
    shallow_net.illusory = np.reshape(shallow_net.illusory, (len(shallow_net.illusory), NUM_FEATURES))


    linm = np.reshape(deep_linear_3_net.modes, (len(deep_linear_3_net.modes), NUM_ITEMS))
    axes[0].plot(linm[:,0] * u[0,0] * v_T[0,0], color = 'red', alpha = 1, label = 'Modes')
    axes[0].plot(linm[:,1] * u[0,1] * v_T[0,1], color = 'red', alpha = 1)
    axes[0].plot(linm[:,2] * u[0,2] * v_T[0,2], color = 'red', alpha = 1)
    axes[0].plot(linm[:,3] * u[0,3] * v_T[0,3], color = 'red', alpha = 1)

    momm = np.reshape(deep_linear_3_net.modes, (len(deep_linear_3_net.modes), NUM_ITEMS))
    axes[1].plot(momm[:,0] * u[6,0] * v_T[0,0], color = 'red', alpha = 1, label = 'Modes')
    axes[1].plot(momm[:,1] * u[6,1] * v_T[0,1], color = 'red', alpha = 1)
    axes[1].plot(momm[:,2] * u[6,2] * v_T[0,2], color = 'red', alpha = 1)
    axes[1].plot(momm[:,3] * u[6,3] * v_T[0,3], color = 'red', alpha = 1)

    axes[1].plot(deep_linear_3_net.illusory[:,6], color = 'blue', alpha = 1, label = 'Predicted')
    axes[0].plot(deep_linear_3_net.illusory[:,0], color = 'blue', alpha = 1, label = 'Predicted')

axes[0].legend()
axes[1].legend()

fig.tight_layout()
plt.savefig('illusory22.pdf', bbox_inches = 'tight', pad_inches = 0)

In [None]:
putting = torch.Tensor(np.linspace(-5,5,100))
instance = nn.ELU()
plt.figure(figsize = (10,4))
plt.plot(putting,instance(putting))
plt.xlabel('$x$', weight = 'bold')
plt.ylabel('$ELU(x)$', weight = 'bold')
fig.tight_layout()
plt.savefig('elu.pdf', bbox_inches = 'tight', pad_inches = 0)