In [None]:
# plotting libraries
import matplotlib
import matplotlib.pyplot as plt 
# numpy (math) libary
import numpy as np

from os import walk

path1 = '../../../data/'

# torch library and sublibraries
import torch
from torch.autograd import Variable

In [None]:
f = [] # empty list of files
for (dirpath, dirnames, filenames) in walk(path1):
    f.extend(filenames)

# sort list alphabetically
f.sort()
# remove non .ssv files from list
f = [x for x in f if x[-4:]==".ssv"]

In [None]:
print('list of input files:\n* ',f)

temp = np.loadtxt(path1+f[0])
data = {}

data['x'] = np.copy(temp)[:,3:-1]
data['c'] = np.copy(temp)[:,-1]

print('\ninput data has shape:\tx ',  data['x'].shape)
print( 'target data has shape:\tc ', data['c'].shape)
print()

In [None]:
# normalize data in the range [0,1]
data['nx'] = np.nan*np.empty(data['x'].shape)

for jj in range(data['x'].shape[1]):
    data['nx'][:,jj] = (data['x'][:,jj]-np.amin(data['x'][:,jj]))/(np.amax(data['x'][:,jj])-np.amin(data['x'][:,jj]))

if 0:
    fig = plt.figure(figsize=(3*6.4, 2*4.8)) # default = 6.4, 4.8
    ax1 = fig.add_subplot(111)

    ax1.plot(data['nx'])

    plt.show()
    plt.close()

In [None]:
# divide training and test sets
train = {}
test = {}

# The problem is to train the network as well as possible using only
# on data from "speakers" 0-47, and then to test the network on
# speakers 48-89, reporting the number of correct classifications
# in the test set.

# setting '0'
default    = [48*11, 42*11]
# setting '1'

setting = 0
if setting == 0:
    train['x'] = np.copy(data['nx'][:default[0],:])
    train['c'] = np.copy(data['c'][:default[0]])
    
    test['x']  = np.copy(data['nx'][-default[1]:,:])
    test['c']  = np.copy(data['c'][-default[1]:])
#elif setting == 1:

print('\ntrain set')    
print(train['x'].shape)
print(train['c'].shape)

print('\ntest set')
print(test['x'].shape)
print(test['c'].shape)

In [None]:
# create x & y torch Variables
x = Variable( torch.from_numpy(train['x']).float() )
c = Variable( torch.from_numpy(train['c']).long() )
# either input data is float or model becomes doublefloat
# https://stackoverflow.com/questions/44717100/pytorch-convert-floattensor-into-doubletensor?rq=1
# create 
x_test = Variable( torch.from_numpy(test['x']).float() )
c_test = Variable( torch.from_numpy(test['c']).long() )

In [None]:
### import UTILITY functions
from modules.graph_utils import *
    #
    # def show_graph(obj, string='loglog'):
    # def print_spec(obj):

### import NETWORK LOOPS functions
from modules.network_loops import *
    #
    # def run_training(obj, train_x, train_y, valid_x, valid_y):
    # def run_test(obj, test_x, test_y, verbose=True):

from modules.models import *
    #
    # class Baseline(torch.nn.Module):
    #     def __init__(self, D_in, H, D_out, n):
    #     def forward(self, x, n, NL_out=False):
    # class BaseSigmoid(torch.nn.Module):
    #     def __init__(self, D_in, H, D_out, n):
    #     def forward(self, x, n, NL_out=False):
    # class BestFitSigmoid(torch.nn.Module):
    #     def __init__(self, D_in, H, D_out, n):
    #     def forward(self, x, n, NL_out=False):
    #
    # def generate_entry(obj, verbose = False):

# prints errors in semi-log axis
show_type = 'lin'

# some colors
colors = (('xkcd:orange', 'xkcd:red'),#
          ('xkcd:blue', 'xkcd:purple'),#
          ('xkcd:green', 'xkcd:lime'),#
         )

In [None]:
### dimensions of 
# N is batch size
N     = train['x'].shape[0]
# D_in is input dimension
D_in  = train['x'].shape[1]
# H*n is the hidden layer dimension
H     = 11
n     = 8
# is the last hidden layer size
D_out = 11

# numer of epochs
epochs = int(1e3)

In [None]:
##### model settings
k = 1e-2
base_lr = k*5e0
sigm_lr = k*12
best_lr = k*.65e-1

base_mom = 0.25
sigm_mom = 0.25
best_mom = 0.5

In [None]:
# settings: '' name, H = width of hidden layers, n = number of hidden H-layers,
#           NL_out = nonlinear output layer/sum-only output layer,
#           'model'
#           'criterion' and parameters
#           'optimizer' and parameters

LL = []
#for size in [[11,2], [11,3], [22,2], [22,3]]:
#    for NL_type in [ ['base', base_lr, base_mom, False],
#                      ['sigmoid', sigm_lr, sigm_mom, True],
#                      ['bestfit', best_lr, best_mom, True]
#                    ]:
#        LL.append(
#            [['%s %dx%d'%(NL_type[0], size[0], size[1]), N, D_in, size[0], size[1], D_out, NL_type[3], epochs],
#             NL_type[0],
#             'CEL', [True],
#             'SGD', [NL_type[1], NL_type[2]],
#             'StepLR', [100, 0.99]
#            ]
#        )
size = [11,2]
NL_type = ['sigmoid', sigm_lr, sigm_mom, False]
rep = 30
par_size = 5
#for lrr in np.logspace(-1,-0.7,par_size):
for mom in np.linspace(0,0.5,par_size):
    for jj in range(rep):
        LL.append(
            [['%d mom %.1f'%(jj,mom), N, D_in, size[0], size[1], D_out, NL_type[3], epochs],
             NL_type[0],
             'CEL', [True],
             #'SGD', [NL_type[1], NL_type[2]],
             #'SGD', [lrr, NL_type[2]],
             'SGD', [NL_type[1], mom],
             'StepLR', [100, 0.99]
            ]
        )

print(len(LL))

models = {}
results = []

verb=False

In [None]:
for ii in range(len(LL)):
    models[ii] = generate_entry(LL[ii], verbose=verb)
    run_training(models[ii], x, c, x_test, c_test)
    results.append( max(models[jj][-3][:]) )
    print('%s\t%.2f%%'%(models[ii][0][0],results[ii]) )

In [None]:
show_graph(models, show_type)

In [None]:
for ii,mm in models.items():
    print('%s\t%3.2f %%'%(models[ii][0][0], results[ii]))

In [None]:
for jj in range(par_size):
    print('{:.2f}: ({:.2f} ± {:.2f})%'.format(models[rep*jj][5][1],np.average(results[rep*jj:rep*(jj+1)]),np.std(results[rep*jj:rep*(jj+1)])))

In [None]:
#show_graph(models, show_type)
#
#for ii,mm in models.items():
#    print('%s\t%3.2f %%'%(models[ii][0][0], results[ii]))
#
#print('({:.2f} ± {:.2f})%'.format(np.average(results[:]),np.std(results[:])))