In [1]:
# imports
from tqdm import tnrange
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
from torch.autograd import Variable
import matplotlib.pyplot as plt
import numpy as np
import gc
import os
%matplotlib inline  

# alphabet
import string

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cpu


In [2]:
def get_accuracy(logit, target):
    batch_size = len(target)
    corrects = (torch.max(logit, 1)[1].view(target.size()).data == target.data).sum()
    accuracy = 100.0 * corrects/batch_size
    return accuracy.item()

def nparam(ninputs,nhidden,noutputs):
    return ninputs*(nhidden+1) + nhidden*(nhidden+1)+nhidden*(noutputs+1)

# define the nnumber of parameters we need
def nparam_MLP(N_INPUTS,N_HIDDEN,N_OUTPUTS):
    input_to_hidden1 = (N_INPUTS+1)*N_HIDDEN #+1 for bias
    hidden1_to_hidden2 = (N_HIDDEN + 1)*N_HIDDEN
    hidden2_to_output = (N_OUTPUTS)*(N_HIDDEN+1)
    return(sum([input_to_hidden1,hidden1_to_hidden2,hidden2_to_output]))


In [None]:
# a prototype MLP
class MLP(nn.Module):
    def __init__(self, n_inputs, n_hidden_neurons, n_output,  device):
        super(MLP, self).__init__()
        self.n_inputs = n_inputs # set the number of neurons in the input layer
        self.n_hidden_neurons = n_hidden_neurons # how many neurons are in each hidden layer
        self.n_output = n_output # set the number of neurons in the output layer
        self.sig = nn.Sigmoid() # set the activation function 
        self.tanh = nn.Tanh()
        self.n_hidden = n_hidden_neurons
        self.encoder = nn.Linear(n_inputs, n_hidden_neurons) # encode input
        self.recurrent = nn.Linear(n_hidden_neurons,n_hidden_neurons) # recurrent connections
        self.decoder = nn.Linear(n_hidden_neurons, n_output) # decode output
                
    def forward(self, x):
        self.hidden1 = self.tanh(self.encoder(x))
        self.hidden2 = self.tanh(self.recurrent(self.hidden1))
        self.output = self.decoder(self.hidden2)
        return self.output
    

In [None]:
# Test MLP on Anna Karenina
# Load Anna Karenina
from torch.utils.data import DataLoader # dataloader 
import sys
sys.path.insert(0,'../final_project/Data/')
from AnnaDataset_MLP import AnnaDataset, InvertAnna # import AK dataset
import torchvision
import torchvision.transforms as transforms

# params
BATCH_SIZE = 500 # how many batches we are running
N_STEPS = 10 # How many characters are we inputting into the list at a time
N_HIDDEN_NEURONS = 512 # how many neurons per hidden layer
N_INPUTS = 77*N_STEPS
N_OUTPUTS = 77
N_LAYERS = 2 # 2 hidden layers
N_EPOCHS = 11 # how many training epocs
learning_rates = np.asarray([2]) # learning rates
N_REPS = 3 # len(learning_rates) # the number of learning repetitions
N_PARAMS = nparam_MLP(N_INPUTS,N_HIDDEN_NEURONS,N_OUTPUTS)
gidx = int(N_HIDDEN_NEURONS/2)

# regularization parameters
# lambdas = np.arange(0,1e-2,3e-3,dtype=np.float)
lambdas = np.arange(0,1e-1,1e-2,dtype=np.float) # full sweep
N_LAMBDA = len(lambdas)

# load data
# list all transformations
transform = transforms.Compose(
    [transforms.Normalize((0,), (0.3,))])

dataset = AnnaDataset(N_STEPS) # load the dataset
trainloader = DataLoader(dataset, batch_size=BATCH_SIZE,
                        shuffle=False, num_workers=4) # create a DataLoader. We want a batch of BATCH_SIZE entries
testloader = DataLoader(dataset, batch_size=BATCH_SIZE,
                        shuffle=False, num_workers=4) # create a DataLoader. We want a batch of BATCH_SIZE entries

In [None]:
# regularizing digonal blocks of the partitioned RNN
# initialize arrays of loss values and weights over the number of epohcs, the number of lambdas we are testing, and the number of reps. 
train_loss_P = np.zeros((N_EPOCHS,N_LAMBDA,N_REPS)) 
train_acc_P = np.zeros((N_EPOCHS,N_LAMBDA,N_REPS))
test_loss_P = np.zeros((N_EPOCHS,N_LAMBDA,N_REPS))
test_acc_P = np.zeros((N_EPOCHS,N_LAMBDA,N_REPS))
Phist_P = np.zeros((N_PARAMS,N_EPOCHS,N_LAMBDA,N_REPS))

model_P = [None]*N_LAMBDA*N_REPS # array of models
regval_P = [] # array of regularization values
for r in tnrange(N_REPS): # loop over the number of reps
    for k in tnrange(N_LAMBDA): # loop over the number of different lambda values
        reg_lambda = lambdas[k] # set the regularization lambda
        model_path = './model_P_rep_{}_lambda_{:d}_10.pt'.format(r,int(reg_lambda*10)) # path to which we will save the model
        model_P[k+r*N_LAMBDA] = MLP(N_INPUTS,N_HIDDEN_NEURONS,N_OUTPUTS,device).to(device) # create the model
        l2_reg = torch.tensor(1,device=device) # create the l2 regularization value tensor
        optimizer = torch.optim.SGD(model_P[k+r*N_LAMBDA].parameters(), lr=1e-2, momentum=0.9) # set the function for SGD
        criterion = nn.CrossEntropyLoss() # set the loss function
        
        # note that cross-entropy loss expects the indices of the class, not the one-hot. So, for A = [1,0,0,...] and B = [0,1,0,...], A is 0 and B is 1
        
        for epoch in range(N_EPOCHS): # for each training epoch
            nps = 0
            running_train_loss=0
            running_train_acc=0
            model_P[k+r*N_LAMBDA].train() 
            for p, param in enumerate(model_P[k+r*N_LAMBDA].parameters()): # go through all the model parameters
                if param.requires_grad:
                    plist = torch.flatten(param.data) # set the list of parameters
                    for j in range(plist.size(0)):
                        while nps < Phist_P.shape[0]:
                            Phist_P[nps,epoch,k,r]=plist[j].item() # update the parameters
                            nps+=1

            for i, (x, y_tar) in enumerate(trainloader):
                l2_reg = 0
                x, y_tar = x.to(device), y_tar.to(device) # x is the training set, y_tar is the output label
                x = x-0.3
                optimizer.zero_grad() # set gradients to 0
                y_pred = model_P[k+r*N_LAMBDA](x.view(x.shape[0],x.shape[1]*x.shape[2])) # compute the prediction. 
                loss = criterion(y_pred,y_tar) 
                for p,param in enumerate(model_P[k+r*N_LAMBDA].parameters()):
                    if param.requires_grad and len(param.shape)==2:
                        if param.shape[0]==N_HIDDEN_NEURONS and param.shape[1]==N_HIDDEN_NEURONS:
                            l2_reg = l2_reg + param[:gidx,:gidx].norm(p=1) # update the l2 regularization constant
                            l2_reg = l2_reg + param[gidx:,gidx:].norm(p=1)
                        elif param.shape[1]==N_HIDDEN_NEURONS:
                            l2_reg = l2_reg + param[:,gidx:].norm(p=1)
                        elif param.shape[0]==N_HIDDEN_NEURONS:
                            l2_reg = l2_reg + param[:gidx,:].norm(p=1)
                regval_P.append(l2_reg.item()) # add the l2 regularization to  the running list
                loss = loss + l2_reg*reg_lambda/BATCH_SIZE # compute the loss
                loss.backward() # backpropogate the loss
                optimizer.step() # run SGD
                running_train_loss+=loss.item()
                running_train_acc+=get_accuracy(y_pred, y_tar) # compute accuracy
            
            running_test_acc=0
            running_test_loss=0
            model_P[k+r*N_LAMBDA].eval()
            for i,(x_test, y_test_tar) in enumerate(testloader):
                x_test, y_test_tar = x_test.to(device), y_test_tar.to(device)
                x_test = x_test - 0.3
                y_test_pred = model_P[k+r*N_LAMBDA](x_test.view(x_test.shape[0],x_test.shape[1]*x_test.shape[2]))
                loss = criterion(y_test_pred,y_test_tar)
                
                running_test_loss+=loss.item()
                running_test_acc+=get_accuracy(y_test_pred, y_test_tar)
                
            train_loss_P[epoch,k,r] = running_train_loss/len(trainloader)
            train_acc_P[epoch,k,r] = running_train_acc/len(trainloader)
            test_loss_P[epoch,k,r] = running_test_loss/len(testloader)
            test_acc_P[epoch,k,r] = running_test_acc/len(testloader)
            print(train_acc_P[epoch,k,r])
            
        # save the model and free the memory  
        torch.save(model_P[k+r*N_LAMBDA].state_dict(), model_path)
        model_P[k+r*N_LAMBDA] = [None]
        del(l2_reg,loss,optimizer,criterion,plist,param)

HBox(children=(IntProgress(value=0, max=3), HTML(value='')))

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

31.7150880776
38.2129180495
41.4278784784
44.1845800357
46.4434516211
48.3740107225
49.9167730406
51.2491702834
52.38218024
53.3403114629
54.1628797549
31.4472810825
37.6372223641
40.3474597907
43.019402604
45.1414347715
46.952769977
48.4975746745
49.7978044422
50.9106459025
51.8710748021
52.7020679091
31.1935154455
37.3290783763
40.1861118203
42.9657901455
45.147817207
47.1444983406
48.819504723
50.1840694409
51.3645647179
52.3752872096
53.237426602
30.9371968343
37.1046719428
40.2923155476
43.3712024509
45.5501659433
47.4929793209
49.1748787337
50.5836099055
51.7727852949
52.7556803676
53.5756956855
30.752872096
37.1087567016
40.5958641818
43.6816441154
45.9124329844
47.8787337248
49.4493234618
50.7707429155
51.9114117947
52.8432473832
53.6482001532
30.6951748787
37.2131733469
40.7829971917
43.8789890222
46.0656114373
48.0237426602
49.6270104672
50.8994128159
51.9609394945
52.8800102119
53.6433495022
30.5322951238
37.266530508
40.8172070462
43.7949961705
46.0449323462
47.9200919071
4

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

31.6466683686
38.2167475109
41.3418432474
43.9369415369
46.104161348
48.066632627
49.6979831504
51.0988001021
52.2994638754
53.3178452898
54.1935154455
31.4646413071
37.6977278529
40.5248914986
43.1942813378
45.2060250191
47.0030635691
48.5274444728
49.8149093694
50.8902221088
51.8192494256
52.6461577738
31.2869543018
37.3298442686
40.2527444473
43.1092672964
45.2269594077
47.1506254787
48.7347459791
50.0822057697
51.2941026296
52.292570845
53.1585396987
30.9762573398
37.1143732448
40.3507786571
43.3505233597
45.5874393669
47.5353586929
49.1753893286
50.50395711
51.6165432729
52.5805463365
53.4454940005
30.9126882818
37.1370947153
40.6310952259
43.6841970896
45.9280061271
47.8567781465
49.4434516211
50.76436048
51.8777125351
52.7829971917
53.6035231044
30.648710748
37.2900178708
40.9568547358
43.9193260148
46.1085014041
47.9971917284
49.5014041358
50.7689558335
51.8856267552
52.8072504468
53.6065866735
30.5070206791
37.2767424049
40.9971917284
43.9640030636
46.1909624713
48.139902987
4

HBox(children=(IntProgress(value=0, max=10), HTML(value='')))

31.6637732959
38.3627776359
41.5808016339
44.0275721215
46.2147051315
48.087822313
49.7046208833
51.1085014041
52.3009956599
53.2953791167
54.165943324
31.5310186367
37.7485320398
40.6448812867
43.1427112586
45.2698493745
47.0783763084
48.5427623181
49.767424049
50.778912433
51.720194026
52.5769721726
31.2706152668
37.3073780955
40.2496808782
43.2879754914
45.3678835844
47.0842481491
48.5825887159
49.9177942303
51.0528465662
52.0962471279
52.9793209089
31.0500382946
37.2014296656
40.5930559101
43.6193515445
45.7847842737
47.7309165177
49.3632882308
50.6954301762
51.8151646668
52.7638498851
53.5634414092
30.7653816696
37.0334439622
40.4212407455
43.5629308144
45.8782231299
47.9047740618
49.5297421496
50.8335460812
51.933367373
52.9050293592
53.7452131733
30.6425836099
37.165943324
40.781976002
43.7181516467
45.9257084503
47.9009446005
49.4863415879
50.8246106714
51.9795762063
52.9499617054
53.73168241
30.4572376819
37.2550421241
41.04059229
43.8968598417
46.1935154455
48.1600714833
49.7

In [None]:
#plt.imshow(x[0,:,:])
#plt.plot(y_pred.detach().numpy()[0,:])
#torch.max(y_pred,1)
plt.plot(np.mean(test_acc_P,1))
plt.plot()

In [None]:
def readtxt(txt_name = 'anna.txt'):
    dir_path = os.path.dirname(os.path.realpath(__file__))
    txt_file = os.path.join(dir_path,txt_name)
    # load the whole book
    file = open(self.txt_file)
    alltxt = file.read()
    # remove newline formmating
    alltxt = alltxt.replace("\n\n", "&").replace("\n", " ").replace("&", "\n")
    # define categories
    categories = list(sorted(set(alltxt)))
    # integer encode
    label_encoder = LabelEncoder()
    label_encoder.fit(categories)
    integer_encoded = torch.LongTensor(label_encoder.transform(list(alltxt)))
    return integer_encoded, categories

# def onehotencode(integer_encoded_batch,n_cat):
    
def get_next_batch(dat,batch_size):
    x_int = 
    y_int = 
    x_hot = onehotencode(x_int): 
    return x_hot, y_int 
    
    