In [None]:
%matplotlib inline

In [None]:
# Author: Robert Guthrie

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import preprocessing
import os
import pathlib

In [None]:
# get system home on MAC or Linux
SYS_HOME = str(pathlib.Path.home())

In [None]:
# make batches from 2d matrix m
def make_batches(m,ss=20,row_len=None):
    rl = m.shape[1] if row_len is None else row_len
    x = [m[i:(int((len(m)-i)/ss)*ss)+i].reshape(-1,ss,rl) for i in range(len(m)-ss)]
    xx = []
    for i in range(len(x)):
        xx.extend(x[i].reshape(-1))
    input_batches = np.array(xx).reshape(-1,ss,rl)
    return input_batches
    

In [None]:
# Here we define our model as a class
class LSTM(nn.Module):

    def __init__(self, input_dim, hidden_dim,hidden = None, output_dim=1,
                    num_layers=2):
        super(LSTM, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.hidden = hidden
        # Define the LSTM layer
        self.lstm = nn.LSTM(self.input_dim, self.hidden_dim, self.num_layers)

        # Define the output layer
        self.linear = nn.Linear(self.hidden_dim, output_dim)

    def init_hidden(self,batch_size):
        # This is what we'll initialise our hidden state as
        return (torch.zeros(self.num_layers, batch_size, self.hidden_dim),
                torch.zeros(self.num_layers, batch_size, self.hidden_dim))

    def forward(self, input):
        in_var = torch.Tensor(input)
        batch_size = in_var.size(1)
        if self.hidden is None:
          self.hidden = self.init_hidden(batch_size)
        lstm_out, self.hidden = self.lstm(in_var,(self.hidden[0].detach(),self.hidden[1].detach()))
        # Only take the output from the final timetep
        # Can pass on the entirety of lstm_out to the next layer if it is a seq2seq prediction
        y_pred = self.linear(lstm_out[-1].view(batch_size, -1))
        return y_pred.view(-1)



In [None]:
noise_level = .5
cycles = 300
x_vals = np.linspace(0,360*cycles,360*cycles/4 + 1)
y_vals = np.sin(x_vals*np.pi/180)
y_vals = y_vals + np.random.randn(len(y_vals)) * noise_level
# df = pd.DataFrame({'x_vals':x_vals,'close':y_vals})
# df.iloc[:1000].close.plot.line()
plt.plot(y_vals[:1000])

In [None]:
seq_len = 19
total_batches = int(len(y_vals)/(seq_len+1))
y_vals_2d = y_vals[0:(total_batches*(seq_len+1))].reshape(total_batches,seq_len+1)
x_in = y_vals_2d.reshape(total_batches,seq_len+1,1)
x_in = np.transpose(x_in,(1,0,2))
x_in = x_in[:seq_len,:,:]
y_in = y_vals_2d[:,-1].reshape(-1)
tests = 50
train = total_batches - tests
X_train = x_in[:,:train,:]
y_train = y_in[:train]
X_test = x_in[:,train:,:]
y_test = y_in[train:]

X_train.shape,y_train.shape,X_test.shape,y_test.shape


In [None]:
torch.Tensor(X_train).size()

In [None]:
lstm_input_size = X_train.shape[2]
h1 = 64
output_dim = 1
num_layers = 1
model = LSTM(lstm_input_size, h1,  output_dim=output_dim, num_layers=num_layers)

num_epochs = 500
learning_rate  =.02
loss_fn = torch.nn.MSELoss(size_average=False)

optimiser = torch.optim.Adam(model.parameters(), lr=learning_rate)

#####################
# Train model
#####################

hist = np.zeros(num_epochs)
yt = torch.tensor(y_train,dtype=torch.float32)
for t in range(num_epochs):
    # Clear stored gradient
#     model.zero_grad()
        
    # Forward pass
    y_pred = model(X_train)
    loss = loss_fn(y_pred, yt)
    if t % 20 == 0:
        print("Epoch ", t, "MSE: ", loss.item())
    hist[t] = loss.item()

    # Zero out gradient, else they will accumulate between epochs
    optimiser.zero_grad()

    # Backward pass
    loss.backward()

    # Update parameters
    optimiser.step()


In [None]:
# you should see just one line
model.hidden = None
y_t = model(X_test).data.numpy()
plt.plot(y_t.round(5))
plt.plot(y_test.round(5))



In [None]:

md_folder = SYS_HOME + '/Dropbox/market_data'
uso_path = f'{md_folder}/stocks/uso.csv'
df_uso = pd.read_csv(uso_path)
df_uso['year'] = df_uso.timestamp.str.slice(0,4).astype(int)
df_uso['month'] = df_uso.timestamp.str.slice(5,7).astype(int)
df_uso['day'] = df_uso.timestamp.str.slice(8,10).astype(int)
df_uso['hour'] = df_uso.timestamp.str.slice(11,13).astype(int)
df_uso['minute'] = df_uso.timestamp.str.slice(14,16).astype(int)


In [None]:
df_uso.columns.values

In [None]:
df_uso_2018 = df_uso[df_uso.tradingDay.str.contains('2018')]
df_uso_2018 = df_uso_2018[(df_uso_2018.hour>=7) & (df_uso_2018.hour<=17)]
# for c in ['close','open','high','low','volume']:
#     df_uso_2018[c] = df_uso_2018[c].pct_change()
# df_uso_2018 = df_uso_2018.iloc[1:]

In [None]:
def getm(dfgb):
    print(dfgb[['year','month','day']].iloc[0].as_matrix())
    return make_batches(dfgb.as_matrix())
df2018 = df_uso_2018[['close','open','high','low','volume','year','month','day','hour','minute']]
df_matrices = df2018.groupby(['year','month','day']).apply(getm)

In [None]:
g = df2018.groupby(['year','month','day']).get_group((2018, 1, 2))
g.shape,make_batches(g.as_matrix()).shape

In [None]:
mm = []
for mmm in df_matrices.values:
    mm.extend(mmm)
mm = np.array(mm)
mm.shape

In [None]:
plt.plot(df_uso_2018.close)

In [None]:
m = df_uso_2018[['close','open','high','low','volume','month','day','hour','minute']].as_matrix()


In [None]:
# define sequence size
ss = 20

In [None]:

inp_len = m.shape[1]
x = [m[i:(int((len(m)-i)/ss)*ss)+i].reshape(-1,ss,inp_len) for i in range(len(m)-ss)]
xx = []
for i in range(len(x)):
    if i % 5000 == 0:
        print(i)
        xx.extend(x[i].reshape(-1))
input_batches = np.array(xx).reshape(-1,ss,inp_len)
use_pct_change = False
if use_pct_change:
    a = input_batches[:,1:,:5]/input_batches[:,0:-1,:5] - 1
    input_batches = np.concatenate((a,input_batches[:,1:,5:]),axis=2)
input_batches.shape

In [None]:
input_batches = mm.copy()

In [None]:
feature_to_predict = 0
use_all_features=True
if use_all_features:
    X_vals = input_batches[:,:-1,:].transpose(1,0,2) # use all features
else:
    X_vals = input_batches[:,:-1,:1].transpose(1,0,2) # use only the first (like the close)
y_vals = input_batches[:,-1,feature_to_predict]

In [None]:
X_vals.shape,y_vals.shape

In [None]:
train_size = 100000
test_size = 100
X_train = X_vals[:,:train_size,:]
y_train = y_vals[:train_size]
X_test = X_vals[:,train_size:(train_size+test_size),:]
y_test = y_vals[train_size:(train_size+test_size)]
X_train.shape,y_train.shape,X_test.shape,y_test.shape

In [None]:

lstm_input_size = X_train.shape[2]
h1 = 64
output_dim = 1
num_layers = 1
model = LSTM(lstm_input_size, h1,  output_dim=output_dim, num_layers=num_layers)

num_epochs = 150
init_lr  = .01
min_lr = .01
loss_fn = torch.nn.MSELoss(size_average=False)

optimiser = torch.optim.Adam(model.parameters(), lr=init_lr)

#####################
# Train model
#####################

hist = np.zeros(num_epochs)
# yt = torch.tensor(y_train,dtype=torch.float32)
yt = Variable(torch.from_numpy(y_train).type(torch.FloatTensor))

lowest_loss = np.finfo('d').max

for t in range(num_epochs):
    # Clear stored gradient
#     model.zero_grad()
        
    # Forward pass
    y_pred = model(X_train)
    
    loss = loss_fn(y_pred, yt)
    if t % 1 == 0:
        print("Epoch ", t, "MSE: ", loss.item())
    hist[t] = loss.item()

    # Zero out gradient, else they will accumulate between epochs
    optimiser.zero_grad()

    # Backward pass
    loss.backward()

    # Update parameters
    optimiser.step()
        
    if loss.item() < lowest_loss:
        lowest_loss = loss.item()
#     if t % 5 == 0 and t > 0:
#         for param_group in optimiser.param_groups:
#             if param_group['lr'] > min_lr:
#                 param_group['lr'] = param_group['lr'] * 0.9
    


In [None]:
# y_pred.size()
yt.size()

In [None]:
beg_seq = 1
seq_len = 4
num_seq = 200000

seqs = np.arange(beg_seq,beg_seq+beg_seq*seq_len*num_seq).reshape(-1,l)
x_in = a[:,:-1]
y_in = a[:,-1:]

In [None]:
x_in.shape,y_in.shape

In [None]:
yearly_std = .2
daily_std = .2 / 252**.5
y_init = 100
steps = 100
pct_changes = np.random.choice([-1,1],size=(steps)) * np.random.randn(steps)* daily_std
vals = np.concatenate(([y_init],np.zeros(steps-1)))
# 100, i-1 * (1+c1),i-1 * (1+c2),i-1 * (1+c3)

# 100, 
# 100 * (1+c1), 
# (100 * (1+c1)*(1+c2)), 
# (100 * (1+c1)*(1+c2))* (1+c3)
p=1
plt.plot([y_init * np.prod([(1+pct_changes[i-p:i])]) for i in range(p,steps)])


In [None]:
X_vals = input_batches[:,:-1,:1].transpose(1,0,2)
y_vals = input_batches[:,-1,0]

In [None]:
x2 = make_batches(m)
x2.shape

In [None]:
(3012.79/36)/((33138.65+24180)) * 2400,(3012.79)/((33138.65+24180)*36) * 2400

In [None]:
r = .12
a = 1
[a*r**i for i in range(1,13)]

In [None]:
2/12

In [None]:
[1-1/12*0,1-1/12,1-1/12*2,1-1/12*3]
12*1 - 1*11/2  1(12*11/2)/12

In [None]:
n = 5
s = [(n-i)/n for i in range(n+1)]
sum(s)/len(s),len(s),1/n*np.array([n-i for i in range(n+1)]),s