In [1]:
import warnings
warnings.filterwarnings("ignore")

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 

In [2]:
def create_time_stepped_data(df, time_steps,num_features):
    d = []
    for i in range(time_steps):
        d.append(df.shift(-i).values[:-time_steps].reshape(-1,num_features+1))
    return np.transpose(np.array(d),(1,0,2))

In [3]:
def create_formatted_data(df, time_steps, num_features, fut_type=1):
    assert (time_steps%2 !=0), "Time steps should be odd!"
    d = create_time_stepped_data(df, time_steps, num_features)
    
    past = d[:,:int(time_steps/2),:]
    if(fut_type == 1):
        fut = np.flip(d[:,int(time_steps/2)+1:,:],1)
    else:
        fut = np.flip(d[:,:int(time_steps/2),:],1)
    y = d[:,int(time_steps/2),-1]
    if(num_features == 0):
        cur = np.zeros((d.shape[0],1,256))
    else:
        cur = d[:,int(time_steps/2),:-1]
        cur = np.dstack((cur.reshape(-1,1,num_features),np.zeros(shape=(d.shape[0],1,256-num_features))))
#     for i in range(past.shape[0]):
#         for j in range(past.shape[1]):
#             past[i][j] = (past[i][j] - df.mean())/df.var()
#             fut[i][j] = (fut[i][j] - df.mean())/df.var()
    return past,fut,y

In [4]:
def train_test_split(past,fut,y,df):
    train_split = int(0.8*y.shape[0])
    
    #'df' is a pd series.
#     mx = df.iloc[:train_split].max()
#     mn = df.iloc[:train_split].min()
    
#     return (past[:train_split]-mn)/(mx-mn), (past[train_split:]-mn)/(mx-mn), (fut[:train_split]-mn)/(mx-mn), np.zeros_like(past[train_split:]), y[:train_split], y[train_split:]
    return past[:train_split], past[train_split:], fut[:train_split], np.zeros_like(past[train_split:]), y[:train_split], y[train_split:]

In [5]:
class Net(nn.Module):
    
    def __init__(self, past_shape, fut_shape, hidden_dim):
        super(Net,self).__init__()
        self.hidden_dim = hidden_dim
        self.conv1_p = nn.Conv1d(past_shape[1],32,5)
        self.conv2_p = nn.Conv1d(32,32,3)
        self.lin1_p = nn.Linear(32,16)
        
        self.conv1_f = nn.Conv1d(fut_shape[1],32,5)
        self.conv2_f = nn.Conv1d(32,32,3)
        self.lin1_f = nn.Linear(32,16)
                
        self.lstm1 = nn.LSTM(16,self.hidden_dim,dropout=0.2)
        self.lin1 = nn.Linear(self.hidden_dim,16)
        self.lstm2 = nn.LSTM(16,self.hidden_dim,dropout=0.2)
        self.lin2 = nn.Linear(self.hidden_dim,16)
        self.lstm3 = nn.LSTM(16,self.hidden_dim,dropout=0.2)
        self.lin3 = nn.Linear(self.hidden_dim,16)
        
        self.fc1 = nn.Linear(256,64)
        self.fc2 = nn.Linear(64,64)
        self.fc3 = nn.Linear(64,64)
        self.fc4 = nn.Linear(64,64)
        self.fc5 = nn.Linear(64,64)
        self.fc6 = nn.Linear(64,64)
        self.fc7 = nn.Linear(64,1)
        self.hidden = self.init_hidden()
        
    def init_hidden(self):
        return (torch.autograd.Variable(torch.zeros(1, 8, self.hidden_dim).cuda()),
                torch.autograd.Variable(torch.zeros(1, 8, self.hidden_dim).cuda()))

    def forward(self, past, fut):
        conv_p = F.relu6(self.conv1_p(past))
        conv_p = F.relu6(self.conv2_p(conv_p))
        conv_p = conv_p.view(-1,conv_p.size()[2],conv_p.size()[1])
        conv_p = F.relu6(self.lin1_p(conv_p))
        
        conv_f = F.relu6(self.conv1_f(fut))
        conv_f = F.relu6(self.conv2_f(conv_f))
        conv_f = conv_f.view(-1,conv_f.size()[2],conv_f.size()[1])
        conv_f = F.relu6(self.lin1_f(conv_f))
        
        lstm_inp = torch.cat([conv_p,conv_f],dim=1)        
        
        lstm_out1, self.hidden = self.lstm1(lstm_inp,self.hidden)
        lstm_out1 = F.relu6(self.lin1(lstm_out1))
        lstm_out2, self.hidden = self.lstm1(lstm_out1,self.hidden)
        lstm_out2 = F.relu6(self.lin2(lstm_out2))
        lstm_out3, self.hidden = self.lstm1(lstm_out2,self.hidden)
        lstm_out3 = F.relu6(self.lin3(lstm_out3))
        
        dnn_inp = torch.cat([lstm_inp,lstm_out3],dim=1)
        dnn_inp = dnn_inp.view(-1,dnn_inp.size()[1]*dnn_inp.size()[2])
        
        out = F.relu6(self.fc1(dnn_inp))
        out = F.relu6(self.fc2(out))
        out = F.relu6(self.fc3(out))
        out = F.relu6(self.fc4(out))
        out = F.relu6(self.fc5(out))
        out = F.relu6(self.fc6(out))
        out = F.relu6(self.fc7(out))
                
        return out

In [6]:
stocks = ['AMZN','FB','WMT']
data = pd.read_csv("all_stocks_5yr.csv")
data = data[data['Name'] == stocks[2]][['close']].reset_index(drop=True)

In [7]:
time_steps = 21
num_features = 0
past,fut,y = create_formatted_data(data, time_steps, num_features)
past_train, past_test, fut_train, fut_test, y_train, y_test = train_test_split(past,fut,y,data)

In [8]:
past_train = np.transpose(past_train,(0,2,1))
fut_train = np.transpose(fut_train,(0,2,1))

past_test = np.transpose(past_test,(0,2,1))
fut_test = np.transpose(fut_test,(0,2,1))

In [9]:
past_train1 = torch.autograd.Variable(torch.from_numpy(past_train.copy())).float()
fut_train1 = torch.autograd.Variable(torch.from_numpy(fut_train.copy())).float()
y_train1 = torch.autograd.Variable(torch.from_numpy(y_train.copy())).float()

past_test1 = torch.autograd.Variable(torch.from_numpy(past_test.copy())).float()
fut_test1 = torch.autograd.Variable(torch.from_numpy(fut_test.copy())).float()
y_test1 = torch.autograd.Variable(torch.from_numpy(y_test.copy())).float()

In [13]:
model = Net(past_train1.size(),fut_train1.size(),32)
model.cuda()
criterion = nn.L1Loss()
optimizer = torch.optim.(model.parameters(), lr=0.1)
batch_size = 32

In [14]:
model.parameters

<bound method Module.parameters of Net(
  (conv1_p): Conv1d (1, 32, kernel_size=(5,), stride=(1,))
  (conv2_p): Conv1d (32, 32, kernel_size=(3,), stride=(1,))
  (lin1_p): Linear(in_features=32, out_features=16)
  (conv1_f): Conv1d (1, 32, kernel_size=(5,), stride=(1,))
  (conv2_f): Conv1d (32, 32, kernel_size=(3,), stride=(1,))
  (lin1_f): Linear(in_features=32, out_features=16)
  (lstm1): LSTM(16, 32, dropout=0.2)
  (lin1): Linear(in_features=32, out_features=16)
  (lstm2): LSTM(16, 32, dropout=0.2)
  (lin2): Linear(in_features=32, out_features=16)
  (lstm3): LSTM(16, 32, dropout=0.2)
  (lin3): Linear(in_features=32, out_features=16)
  (fc1): Linear(in_features=256, out_features=64)
  (fc2): Linear(in_features=64, out_features=64)
  (fc3): Linear(in_features=64, out_features=64)
  (fc4): Linear(in_features=64, out_features=64)
  (fc5): Linear(in_features=64, out_features=64)
  (fc6): Linear(in_features=64, out_features=64)
  (fc7): Linear(in_features=64, out_features=1)
)>

In [15]:
overall_loss = 0.0
model.hidden = model.init_hidden()

for e in range(10):
    running_loss = 0.0

    print("-----------------------------------------------------------------------------------------------")    
    for i in range(0,past_train1.size(0),batch_size):
        # get the inputs
        past, fut, labels = past_train1[i:i+batch_size].cuda(), fut_train1[i:i+batch_size].cuda(), y_train1[i:i+batch_size].cuda()

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(past,fut)
        loss = criterion(outputs, labels)
        
        loss.backward(retain_graph=True)
        optimizer.step()
        
        overall_loss += loss.data
        batch_loss = loss.data
        # print statistics
        print('batch loss [%d %5d] loss: %.3f' % (e+1, i+1, batch_loss / batch_size))
        
#         plt.figure(figsize=(10,10))
#         plt.plot(outputs.data.cpu().numpy(),label='pred')
#         plt.plot(labels.data.cpu().numpy(),label='true')
#         plt.legend()
#         plt.show()
        
    print('Epochs %d loss: %.5f' % (e + 1, overall_loss / ((e+1)*past_train1.size(0))))
    print("-----------------------------------------------------------------------------------------------")

print('Finished Training')

-----------------------------------------------------------------------------------------------
batch loss [1     1] loss: 2.310
batch loss [1    33] loss: 2.441
batch loss [1    65] loss: 2.345
batch loss [1    97] loss: 2.382
batch loss [1   129] loss: 2.291
batch loss [1   161] loss: 2.393
batch loss [1   193] loss: 2.433
batch loss [1   225] loss: 2.296
batch loss [1   257] loss: 2.331
batch loss [1   289] loss: 2.376
batch loss [1   321] loss: 2.319
batch loss [1   353] loss: 2.274
batch loss [1   385] loss: 2.305
batch loss [1   417] loss: 2.396
batch loss [1   449] loss: 2.551
batch loss [1   481] loss: 2.489
batch loss [1   513] loss: 2.347
batch loss [1   545] loss: 2.219
batch loss [1   577] loss: 2.085
batch loss [1   609] loss: 1.971
batch loss [1   641] loss: 1.799
batch loss [1   673] loss: 1.649
batch loss [1   705] loss: 1.737
batch loss [1   737] loss: 1.886
batch loss [1   769] loss: 1.953
batch loss [1   801] loss: 1.964
batch loss [1   833] loss: 2.087
batch loss [1

KeyboardInterrupt: 