# TRAINING RECURRENT NEURAL NETWORKS

In [None]:
# !pip install import_ipynb --quiet
# !git clone https://github.com/gmshroff/aml-public.git
# %cd aml-public

In [None]:
import import_ipynb
import utils
import models
from IPython import display
import torch
from sklearn.manifold import TSNE
from matplotlib import pyplot as plt
import pickle
import numpy as np
import pandas as pd

In [None]:
from course_data import MyDS, TsDS

Financial Time Series

In [None]:
with open('../data/feed_train_ds.pickle','rb') as f: train_ds=pickle.load(f)

In [None]:
with open('../data/feed_test_ds.pickle','rb') as f: test_ds=pickle.load(f)

In [None]:
train_ds.samples[5].shape

Recurrent network: LSTM

In [None]:
net = models.SimpleLSTM(input_size=149,hidden_size=64,output_size=3,lr=1e-3)

In [None]:
net,losses,accs=models.Train(net,train_ds,epochs=10,verbose=True)

In [None]:
models.accuracy_variable(net,train_ds)

In [None]:
models.accuracy_variable(net,test_ds)

In [None]:
train_ds.samples[0].shape

Siusoidal time series data

In [None]:
#Generate data - sinusoidal mix
train_ds, test_ds, dloader = utils.sinDataset(n_samples=100,length=20,n_classes=5,batch_size=1)

Recurrent network: RNN

In [None]:
# Define an RNN to process a sequence; default is 1-D where input can be (batch,seq_vals) else input will need to be given
# with an extra dimension and input will be (batch,seq,features)
net = models.SimpleRNN(input_size=1,hidden_size=32,output_size=5,lr=1e-3)

In [None]:
# Train the network; note that network is trained in place so repeated calls further train it.
net,losses,accs=models.Train(net,dloader,epochs=50,verbose=True)

Training/Test Accuracies

In [None]:
s_tr=torch.concat([s.unsqueeze(0).unsqueeze(0) for s in train_ds.samples])
l_tr=torch.concat([l.unsqueeze(0).unsqueeze(0) for l in train_ds.labels])

In [None]:
newtr=MyDS(s_tr,l_tr)

In [None]:
models.accuracy_variable(net,newtr)

In [None]:
s_te=torch.concat([s.unsqueeze(0).unsqueeze(0) for s in test_ds.samples])
l_te=torch.concat([l.unsqueeze(0).unsqueeze(0) for l in test_ds.labels])

In [None]:
newte=MyDS(s_te,l_te)

In [None]:
models.accuracy_variable(net,newte)

Recurrent network: LSTM

In [None]:
net = models.SimpleLSTM(input_size=1,hidden_size=32,output_size=5,lr=1e-3)

In [None]:
# Train the network; note that network is trained in place so repeated calls further train it.
net,losses,accs=models.Train(net,dloader,epochs=50,verbose=True)

In [None]:
models.accuracy_variable(net,newtr)

In [None]:
models.accuracy_variable(net,newte)

Transformer (Encoder) on Financial Time Series

In [None]:
net = models.Transformer(input_size=149,hidden_size=128,output_size=3,lr=1e-3,num_layers=3)

In [None]:
net,losses,accs=models.Train(net,train_ds,epochs=10,verbose=True)

In [None]:
models.accuracy_variable(net,train_ds)

In [None]:
models.accuracy_variable(net,test_ds)

Transformer (Encoder) on Sinusoidal Time Series

In [None]:
net = models.Transformer(input_size=1,hidden_size=32,output_size=5,lr=1e-3,num_layers=3)

In [None]:
s_tr=torch.concat([s.unsqueeze(0).unsqueeze(0) for s in train_ds.samples])
l_tr=torch.concat([l.unsqueeze(0).unsqueeze(0) for l in train_ds.labels])

In [None]:
newtr=MyDS(s_tr,l_tr)

In [None]:
net,losses,accs=models.Train(net,newtr,epochs=20,verbose=True)

In [None]:
models.accuracy_variable(net,newtr)

In [None]:
s_te=torch.concat([s.unsqueeze(0).unsqueeze(0) for s in test_ds.samples])
l_te=torch.concat([l.unsqueeze(0).unsqueeze(0) for l in test_ds.labels])

In [None]:
newte=MyDS(s_te,l_te)

In [None]:
models.accuracy_variable(net,newte)

Sanity check

In [None]:
correct=0
for s in newte:
    print(s[1].item(), torch.argmax(net(s[0])).numpy())
    if s[1].item()==torch.argmax(net(s[0])): correct+=1

In [None]:
correct/len(newte)

Plots

In [None]:
i,N=0,20
for s,l in zip(train_ds.samples,train_ds.labels):
    if l==0: 
        plt.plot(s[0,:,1])
        i+=1
    if i>N: break
plt.show()

In [None]:
train_ds.samples[20].shape

In [None]:
for s,l in zip(train_ds.samples,train_ds.labels):
    plt.plot(s)
plt.show()

ICL as GD

In [None]:
m,n,r=3,4,1
W=torch.randn(r,m,requires_grad=True)
X=torch.randn(m,n,requires_grad=True)
Y=torch.randn(r,n,requires_grad=True)

In [None]:
L=(1/(2*n))*(W@X-Y)@(W@X-Y).T

In [None]:
L

In [None]:
DW=torch.autograd.grad(L,W)[0]

In [None]:
DW

In [None]:
Z=torch.concat((X,Y))

In [None]:
def LSA(Z,WK,WQ,WV,P):
    return Z+P@WV@Z@((WK@Z).T)@(WQ@Z)

In [None]:
U=torch.concat((torch.concat((torch.eye(m,m),torch.zeros(m,r)),dim=1),torch.zeros(r,m+r)))

In [None]:
WK,WQ=U,U

In [None]:
WV=torch.concat((torch.zeros(m,m+r),torch.concat((W,-torch.eye(r,r)),dim=1)))

In [None]:
P=(1/n)*torch.eye(m+r,m+r)

In [None]:
Z_new=LSA(Z,WK,WQ,WV,P)

In [None]:
Z_new

In [None]:
Y+DW@X

In [None]:
W_new=W-DW

In [None]:
W_new

In [None]:
W

In [None]:
L_new=L=(1/2*n)*(W_new@X-Y)@(W_new@X-Y).T

In [None]:
L_new

In [None]:
Y_new=Z_new[-1,:]

In [None]:
L_new_hat=(1/2*n)*(W@X-Y_new)@(W@X-Y_new).T

In [None]:
L_new_hat