In [1]:
# plotting libraries
import matplotlib
import matplotlib.pyplot as plt 
# numpy (math) libary
import numpy as np

from os import walk

path1 = '../../../data/'

# torch library and sublibraries
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F

In [2]:
f = [] # empty list of files
for (dirpath, dirnames, filenames) in walk(path1):
    f.extend(filenames)

# sort list alphabetically
f.sort()
# remove non .ssv files from list
f = [x for x in f if x[-4:]==".ssv"]

In [3]:
print(f[0])

temp = np.loadtxt(path1+f[0])
data = {}

data['x'] = np.copy(temp)[:,3:-1]
data['y_int'] = np.copy(temp)[:,-1]
y = np.zeros((temp.shape[0], 11))
for jj in range(temp.shape[0]):
    y[jj, int(temp[jj,-1])] += 1
data['y'] = np.copy(y)
del y

print('input data x has shape: ',  data['x'].shape)
print('output data y has shape: ', data['y'].shape)
print()

if 0:
    jj = 6
    print(data['x'][jj,:])
    print(data['y'][jj,:])
    print(temp[jj,-1])

full_speach_data.ssv
input data x has shape:  (990, 10)
output data y has shape:  (990, 11)



In [4]:
# normalize data in the range [0,1]
for jj in range(data['x'].shape[1]):
    data['x'][:,jj] = (data['x'][:,jj]-min(data['x'][:,jj]))/(max(data['x'][:,jj])+min(data['x'][:,jj]))

In [5]:
# divide training and test sets
train = {}
test = {}

# The problem is to train the network as well as possible using only
# on data from "speakers" 0-47, and then to test the network on
# speakers 48-89, reporting the number of correct classifications
# in the test set.

# setting '0'
default    = [48*11, 42*11]
# setting '1'

setting = 0
if setting == 0:
    train['x'] = np.copy(data['x'][:default[0],:])
    train['y'] = np.copy(data['y'][:default[0],:])
    train['y_int'] = np.copy(data['y_int'][:default[0]])
    
    test['x']  = np.copy(data['x'][-default[1]:,:])
    test['y']  = np.copy(data['y'][-default[1]:,:])
    test['y_int']  = np.copy(data['y_int'][-default[1]:])
#elif setting == 1:
    
print(train['x'].shape)
print(train['y'].shape)
print()
print(test['x'].shape)
print(test['y'].shape)

(528, 10)
(528, 11)

(462, 10)
(462, 11)


In [6]:
# create x & y torch Variables
x = Variable( torch.from_numpy(train['x']).float() )
y = Variable( torch.from_numpy(train['y']).float() )
y_int = Variable( torch.from_numpy(train['y_int']).long() )
# either input data is float or model becomes doublefloat
# https://stackoverflow.com/questions/44717100/pytorch-convert-floattensor-into-doubletensor?rq=1
# create 
x_test = Variable( torch.from_numpy(test['x']).float() )
y_test = Variable( torch.from_numpy(test['y']).float() )
y_int_test = Variable( torch.from_numpy(test['y_int']).long() )

In [7]:
# define settings for plots
def show_graph(string):
    fig = plt.figure(figsize=(3*6.4, 2*4.8)) # default = 6.4, 4.8
    ax1 = fig.add_subplot(111)

    error = []
    valid = []
    if string=='loglog':
        for ii, ee in models.items():
            error.append( ax1.loglog(ee[-2][:], label='%s, lr=%1.0e'%(ee[0][0],ee[5][0])) )
            valid.append( ax1.loglog(ee[-1][:], ls='--', c=error[-1][0].get_color(), label='validation error') )
    elif string=='logy':
        for ii, ee in models.items():
            error.append( ax1.semilogy(ee[-2][:], label='%s, lr=%1.0e'%(ee[0][0],ee[5][0])) )
            valid.append( ax1.semilogy(ee[-1][:], ls='--', c=error[-1][0].get_color(), label='validation error') )
    elif string=='logx':
        for ii, ee in models.items():
            error.append( ax1.semilogx(ee[-2][:], label='%s, lr=%1.0e'%(ee[0][0],ee[5][0])) )
            valid.append( ax1.semilogx(ee[-1][:], ls='--', c=error[-1][0].get_color(), label='validation error') )
    else:
        for ii, ee in models.items():
            error.append( ax1.plot(ee[-2][:], label='%s, lr=%1.0e'%(ee[0][0],ee[5][0])) )
            valid.append( ax1.plot(ee[-1][:], ls='--', c=error[-1][0].get_color(), label='validation error') )

    ax1.set_xlabel('iteration number', fontsize = 16)
    ax1.set_ylabel('loss', fontsize = 16)
    title_string = '%d epochs, hidden layers [%d, %d] are width and depth'%(epochs, H, n)
    ax1.set_title(title_string, fontsize = 16)
    legend = ax1.legend(loc='best', fontsize=16)

    plt.show()
    plt.close()

# prints errors in semi-log axis
show_type = 'logy'

# some colors
colors = (('xkcd:orange', 'xkcd:red'),#
          ('xkcd:blue', 'xkcd:purple'),#
          ('xkcd:green', 'xkcd:lime'),#
         )

In [8]:
# define baseline network with relu (clamp)
class Baseline(torch.nn.Module):
    def __init__(self, D_in, H, D_out, n):
        super(Baseline, self).__init__()
        self.linear0 = torch.nn.Linear(D_in, H)
        for jj in range(n):
            exec("self.linear%d = torch.nn.Linear(H, H)" %(jj+1) );
        self.linearOut = torch.nn.Linear(H, D_out)

    def forward(self, x, n, NL_out=False):
        
        h = []
        # sum (linear1) and then relu (clamp)
        h.append( self.linear0(x).clamp(min=0) )
        # sum (linear2) and then relu (clamp)
        for jj in range(n):
            exec("h.append( self.linear%d(h[-1]).clamp(min=0) )" %(jj+1) );
        # sum (out) and the output
        if not NL_out:
            y_pred = self.linearOut(h[-1])
        elif NL_out:
            y_pred = self.linearOut(h[-1]).Softmax()
        else:
            raise ValueError('NL_out must be either True or False, but it isn\'t')
        
        return y_pred # output = y_pred(icted)

In [9]:
# define different network with sigmoid
class BaseSigmoid(torch.nn.Module):
    def __init__(self, D_in, H, D_out, n):
        super(BaseSigmoid, self).__init__()
        self.linear0 = torch.nn.Linear(D_in, H)
        for jj in range(n):
            exec("self.linear%d = torch.nn.Linear(H, H)" %(jj+1) );
        self.linearOut = torch.nn.Linear(H, D_out)

    def forward(self, x, n, NL_out=False):
        h = []
        # sum (linear1) and then sigmoid (F.sigmoid)
        h.append( F.sigmoid(self.linear0(x)) )
        # sum (linear2) and then sigmoid (F.sigmoid)
        for jj in range(n):
            exec("h.append( F.sigmoid(self.linear%d(h[-1]) ) )" %(jj+1) );
        # sum (out) and the output
        if not NL_out :
            y_pred = self.linearOut(h[-1])
        elif NL_out :
            y_pred = F.sigmoid( self.linearOut(h[-1]) )
        else:
            raise ValueError('NL_out must be either True or False, but it isn\'t')
        
        return y_pred # output = y_pred(icted)

In [10]:
# define best fit class 
class BestFitSigmoid(torch.nn.Module):
    def __init__(self, D_in, H, D_out, n):
        super(BestFitSigmoid, self).__init__()
        self.linear0 = torch.nn.Linear(D_in, H)
        for jj in range(n):
            exec("self.linear%d = torch.nn.Linear(H, H)" %(jj+1) );
        self.linearOut = torch.nn.Linear(H, D_out)

    def forward(self, x, n, NL_out=False):
        
        # parameters
        p = [ 689.651615, 0.433819208, 1.31042204, -0.975437185, -0.518489780, 0.403015568, 0.502495627]
        
        #p[0], p[1], p[2], p[3], p[4], p[5], p[6]
        #  a,    b,    c,    d,    g,   x0,   x1)
        #b*sigmoid(a*(arg-x0))+c*relu(arg)+d*relu(arg-x0)+g*relu(arg-x1)
        
        h = []
        h.append( torch.mul( torch.sigmoid( self.linear0(x).add_(-p[5]).mul_(p[0]) ), p[1] ) )
        h[0].add_( self.linear0(x).clamp(min=0).mul_(p[2]) )
        h[0].add_( self.linear0(x).clamp(min=p[5]).mul_(p[3]) )
        h[0].add_( self.linear0(x).clamp(min=p[6]).mul_(p[4]) )
        
        # sum (linear2) and then nonlinear function
        for jj in range(n):
            exec("h.append( torch.mul( torch.sigmoid( self.linear%d(h[-1]).add_(-p[5]).mul_(p[0]) ), p[1] ) )" %(jj+1) );
            exec("h[jj+1].add_( self.linear%d(h[-2]).clamp(min=0).mul_(p[2]) )" %(jj+1) );
            exec("h[jj+1].add_( self.linear%d(h[-2]).clamp(min=p[5]).mul_(p[3]) )" %(jj+1) );
            exec("h[jj+1].add_( self.linear%d(h[-2]).clamp(min=p[6]).mul_(p[4]) )" %(jj+1) );
                     
        # sum (out) and the output
        if not NL_out :
            y_pred = self.linearOut(h[-1])
        elif NL_out:
            y_pred = torch.mul( torch.sigmoid( self.linearOut(h[-1]).add_(-p[5]).mul_(p[0]) ), p[1] )
            y_pred.add_( self.linearOut(h[-1]).clamp(min=0).mul_(p[2]) )
            y_pred.add_( self.linearOut(h[-1]).clamp(min=p[5]).mul_(p[3]) )
            y_pred.add_( self.linearOut(h[-1]).clamp(min=p[6]).mul_(p[4]) )
        else:
            raise ValueError('NL_out must be either True or False, but it isn\'t')
                                
        return y_pred # output = y_pred(icted)

In [11]:
def generate_entry(ii, verbose = False):
    global models
    global LL
                     
    models[ii] = [LL[ii][0],]
    # append model
    if LL[ii][1]=='base':
        models[ii].append( Baseline(D_in, models[ii][0][1], D_out, models[ii][0][2]) )
    elif LL[ii][1]=='sigmoid':
        models[ii].append( BaseSigmoid(D_in, models[ii][0][1], D_out, models[ii][0][2]) )
    elif LL[ii][1]=='bestfit':
        models[ii].append( BestFitSigmoid(D_in, models[ii][0][1], D_out, models[ii][0][2]) )
    else:
        raise ValueError('model type not recognised')
    if verbose:
        print('model:', models[ii][1])
        print('model settings:',D_in, D_out, models[ii][0][1], models[ii][0][2], models[ii][0][3])
    
    # append criterion
    if LL[ii][2]=='MSE':
        models[ii].append( torch.nn.MSELoss(size_average=LL[ii][3][0] ) )
        models[ii].append( LL[ii][3] )
    elif LL[ii][2]=='CEL':
        models[ii].append( torch.nn.CrossEntropyLoss(size_average=LL[ii][3][0] ) )
        models[ii].append( LL[ii][3] )
    else:
        raise ValueError('criterion type not recognised')
        
    if verbose:
        print('criterion:', models[ii][2], '\ncriterion settings:', models[ii][3])
    
    # append optimizer
    if LL[ii][4]=='SGD':
        models[ii].append( torch.optim.SGD(models[ii][1].parameters(), lr=LL[ii][5][0]) )
        models[ii].append( LL[ii][5] )
    #elif ll[4]=='':
    else:
        raise ValueError('optimizer type not recognised')
    
    if verbose:
        print('optimizer:', models[ii][4], '\noptimizer settings:\n', models[ii][5])
    
    # append empty errors list
    models[ii].append( [] )
    models[ii].append( [] )
    print()

def run_training(ii):
    global models
    global x, y, x_test, y_test

    print('epochs\terror\t\tvalidation')
    for t in range(models[ii][0][4]):
        # Forward pass: Compute predicted y by passing x to the model
        y_pred = models[ii][1](x, models[ii][0][2], models[ii][0][3])

        if LL[ii][2]=='MSE':
            # Compute and print loss
            loss = models[ii][2](y_pred, y)
            models[ii][-2].append(loss.data[0])
        
            if (t+1)%(epochs//20) == 0:
                # validation test
                y_pred_valid = models[ii][1](x_test, models[ii][0][2], models[ii][0][3])
                loss_valid = models[ii][2](y_pred_valid, y_test)
                models[ii][-1].append(loss.data[0])
                
                print(t+1, '\t', loss.data[0], '\t', loss_valid.data[0])
                
        elif LL[ii][2]=='CEL':
            # Compute and print loss
            loss = models[ii][2](y_pred, y_int)
            models[ii][-2].append(loss.data[0])
        
            if (t+1)%(epochs//20) == 0:
                # validation test
                y_pred_valid = models[ii][1](x_test, models[ii][0][2], models[ii][0][3])
                loss_valid = models[ii][2](y_pred_valid, y_int_test) #np.argmax( 
                models[ii][-1].append(loss.data[0])
                
                print(t+1, '\t', loss.data[0], '\t', loss_valid.data[0])

        # Zero gradients, perform a backward pass, and update the weights.
        models[ii][4].zero_grad()
        loss.backward()
        models[ii][4].step()

def run_test(ii, verbose=True):
    global models
    global x_test, y_test
                     
    y_pred = models[ii][1](x_test, models[ii][0][2], models[ii][0][3])
    
    correctness = 0
    for jj in range(x_test.shape[0]):
        if y_test.data[jj, np.argmax(y_pred.data[jj,:]) ]:
            correctness += 1
    correctness = correctness/x_test.shape[0]*100
    
    if verbose:
        print('%3.2f'%correctness)

In [12]:
# N is batch size; D_in is input dimension;
# H is hidden dimension; D_out is output dimension.
N     = train['x'].shape[0]
D_in  = train['x'].shape[1]
H     = 64
D_out = train['y'].shape[1]
n     = 1

epochs = int(10e2)

In [13]:
##### model settings
k = 5e2
base_lr = 1e-4*k
sigm_lr = 1e-6*k
best_lr = 1e-5*k

In [14]:
# settings: '' name, H = width of hidden layers, n = number of hidden H-layers,
#           NL_out = nonlinear output layer/sum-only output layer,
#           'model'
#           'criterion' and parameters
#           'optimizer' and parameters
LL = [[['relu baseline', H, 1, False, epochs],
       'base',
       'MSE', [True],
       'SGD', [base_lr]
      ],
      [['base sigmoid', H, 1, False, epochs],
       'sigmoid',
       'MSE', [True],
       'SGD', [sigm_lr]
      ],
      [['bestfit', H, 1, False, epochs],
       'bestfit',
       'MSE', [True],
       'SGD', [best_lr]
      ],
      [['relu CEL baseline', H, 1, False, epochs],
       'base',
       'CEL', [True],
       'SGD', [base_lr]
      ],
      [['base CEL sigmoid', H, 1, False, epochs],
       'sigmoid',
       'CEL', [True],
       'SGD', [sigm_lr]
      ],
      [['Cel bestfit', H, 1, False, epochs],
       'bestfit',
       'CEL', [True],
       'SGD', [best_lr]
      ],
      [['relu2 baseline', H, 2, False, epochs],
       'base',
       'MSE', [True],
       'SGD', [base_lr]
      ],
      [['base2 sigmoid', H, 2, False, epochs],
       'sigmoid',
       'MSE', [True],
       'SGD', [sigm_lr]
      ],
      [['bestfit2', H, 2, False, epochs],
       'bestfit',
       'MSE', [True],
       'SGD', [best_lr]
      ],
      [['relu CEL baseline2', H, 2, False, epochs],
       'base',
       'CEL', [True],
       'SGD', [base_lr]
      ],
      [['base CEL sigmoid2', H, 2, False, epochs],
       'sigmoid',
       'CEL', [True],
       'SGD', [sigm_lr]
      ],
      [['Cel bestfit2', H, 2, False, epochs],
       'bestfit',
       'CEL', [True],
       'SGD', [best_lr]
      ],
     ]
print(len(LL))

models = {}

12


In [15]:
generate_entry(0, True)
run_training(0)
#show_graph(show_type)
run_test(0)

model: Baseline(
  (linear0): Linear(in_features=10, out_features=64)
  (linear1): Linear(in_features=64, out_features=64)
  (linearOut): Linear(in_features=64, out_features=11)
)
model settings: 10 11 64 1 False
criterion: MSELoss(
) 
criterion settings: [True]
optimizer: <torch.optim.sgd.SGD object at 0x7ff05a6b6588> 
optimizer settings:
 [0.05]

epochs	error		validation
50 	 0.0788329467177391 	 0.08258912712335587
100 	 0.07606469839811325 	 0.08098110556602478
150 	 0.07450424879789352 	 0.08036776632070541
200 	 0.07337924093008041 	 0.08003849536180496
250 	 0.07246287912130356 	 0.07973392307758331
300 	 0.07163048535585403 	 0.07941882312297821
350 	 0.07084082812070847 	 0.07921252399682999
400 	 0.07010692358016968 	 0.07900343090295792
450 	 0.06942302733659744 	 0.07881030440330505
500 	 0.068779356777668 	 0.0786455050110817
550 	 0.06816563755273819 	 0.0784861221909523
600 	 0.06757953017950058 	 0.07835321128368378
650 	 0.06701300293207169 	 0.07824403047561646
700 	 

In [16]:
generate_entry(1, True)
run_training(1)
#show_graph(show_type)
run_test(1)

model: BaseSigmoid(
  (linear0): Linear(in_features=10, out_features=64)
  (linear1): Linear(in_features=64, out_features=64)
  (linearOut): Linear(in_features=64, out_features=11)
)
model settings: 10 11 64 1 False
criterion: MSELoss(
) 
criterion settings: [True]
optimizer: <torch.optim.sgd.SGD object at 0x7ff05a6b6a90> 
optimizer settings:
 [0.0005]

epochs	error		validation
50 	 0.1748177707195282 	 0.17534221708774567
100 	 0.16072237491607666 	 0.16118869185447693
150 	 0.14879192411899567 	 0.14920452237129211
200 	 0.13869021832942963 	 0.13905426859855652
250 	 0.1301342248916626 	 0.1304537057876587
300 	 0.12288635224103928 	 0.12316490709781647
350 	 0.11674489080905914 	 0.11698564887046814
400 	 0.11154008656740189 	 0.11174646019935608
450 	 0.1071283146739006 	 0.10730337351560593
500 	 0.1033884808421135 	 0.10353456437587738
550 	 0.1002177894115448 	 0.10033747553825378
600 	 0.09752936661243439 	 0.09762495756149292
650 	 0.09524979442358017 	 0.0953230932354927
700

In [17]:
generate_entry(2, True)
run_training(2)
show_graph(show_type)
run_test(2)

model: BestFitSigmoid(
  (linear0): Linear(in_features=10, out_features=64)
  (linear1): Linear(in_features=64, out_features=64)
  (linearOut): Linear(in_features=64, out_features=11)
)
model settings: 10 11 64 1 False
criterion: MSELoss(
) 
criterion settings: [True]
optimizer: <torch.optim.sgd.SGD object at 0x7ff05a6b6860> 
optimizer settings:
 [0.005]

epochs	error		validation
50 	 0.10552364587783813 	 0.10407410562038422
100 	 0.09209729731082916 	 0.09075900912284851
150 	 0.08958316594362259 	 0.08862538635730743
200 	 0.08868959546089172 	 0.08797336369752884
250 	 0.08799739927053452 	 0.08760997653007507
300 	 0.08726722747087479 	 0.087273508310318
350 	 0.0868014395236969 	 0.08703386038541794
400 	 0.08625876158475876 	 0.08669880777597427
450 	 0.08581957966089249 	 0.08642252534627914
500 	 0.0854182317852974 	 0.0861688107252121
550 	 0.08500165492296219 	 0.08594093471765518


KeyboardInterrupt: 

In [None]:
generate_entry(3, True)
run_training(3)
#show_graph(show_type)
run_test(3)

In [None]:
generate_entry(4, True)
run_training(4)
#show_graph(show_type)
run_test(4)

In [None]:
generate_entry(5, True)
run_training(5)
#show_graph(show_type)
run_test(5)

In [None]:
generate_entry(6, True)
run_training(6)
run_test(6)

In [None]:
generate_entry(7, True)
run_training(7)
run_test(7)

In [None]:
generate_entry(8, True)
run_training(8)
run_test(8)

In [None]:
generate_entry(9, True)
run_training(9)
run_test(9)

In [None]:
generate_entry(10, True)
run_training(10)
run_test(10)

In [None]:
generate_entry(11, True)
run_training(11)
run_test(11)

In [None]:
show_graph(show_type)