In [None]:
# Clear and reset the workspace
%reset -f

### Import necessary libraries 
import pandas as pd
import random
from collections import deque
import os
import cv2
import numpy as np
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import time
import xlwt 
from xlwt import Workbook 

In [None]:
# Putting the labels
def classify(FREQUENCY, classes, all_available_data_df):
    
    target = []
    perc_difference = (all_available_data_df['Close'].shift(-FUTURE_PERIOD_PREDICT) - all_available_data_df['Close']) / all_available_data_df['Close'] * 100

    if FREQUENCY == '1Min':
        boundary_down = [-100 , -0.60, -0.45, -0.30, -0.15, 0.00, 0.15, 0.30, 0.45, 0.60]
        boundary_up =   [-0.60, -0.45, -0.30, -0.15,  0.00, 0.15, 0.30, 0.45, 0.60, 100]
    
    for perc_diff in perc_difference:
        if perc_diff > boundary_down[0] and perc_diff <= boundary_up[0]:
            target.append(classes[0]) 
        elif perc_diff > boundary_down[1] and perc_diff <= boundary_up[1]:
            target.append(classes[1])
        elif perc_diff > boundary_down[2] and perc_diff <= boundary_up[2]:
            target.append(classes[2])
        elif perc_diff > boundary_down[3] and perc_diff <= boundary_up[3]:
            target.append(classes[3])
        elif perc_diff > boundary_down[4] and perc_diff <= boundary_up[4]:
            target.append(classes[4])
        elif perc_diff > boundary_down[5] and perc_diff <= boundary_up[5]:
            target.append(classes[5])
        elif perc_diff > boundary_down[6] and perc_diff <= boundary_up[6]:
            target.append(classes[6])
        elif perc_diff > boundary_down[7] and perc_diff <= boundary_up[7]:
            target.append(classes[7])
        elif perc_diff > boundary_down[8] and perc_diff <= boundary_up[8]:
            target.append(classes[8])
        elif perc_diff > boundary_down[9] and perc_diff <= boundary_up[9]:
            target.append(classes[9])
            
    all_available_data_df['Target'] = pd.DataFrame(target)
    all_available_data_df.dropna(inplace=True)
    
    return all_available_data_df

In [None]:
# Prepare the data for the learning
def preprocess_df(df, SEQ_LEN, classes):
    
    for col in df.columns:  # go through all of the columns
        if col != "Target":  # normalize all ... except for the target itself!
            df[col] = (df[col]-df[col].mean()) / df[col].std()
            #df[col] = df[col] / (df[col].max()-df[col].min())
            df.dropna(inplace=True) 
            
    df.dropna(inplace=True) 
    
    sequential_data = []  # this is a list that will CONTAIN the sequences
    prev_days = deque(maxlen=SEQ_LEN)  # These will be our actual sequences. They are made with deque, which keeps the maximum length by popping out older values as new ones come in

    for i in df.values:  # iterate over the values
        prev_days.append([n for n in i[:-1]])  # store all but the target
        if len(prev_days) == SEQ_LEN:  # 
            sequential_data.append([np.array(prev_days), np.eye(len(classes))[int(i[-1])]])  # append those bad boys!
    
    random.shuffle(sequential_data)  
    
    sequenced_classes = []  # list that will store our  sequences and targets
    for i in range(len(classes)):
        sequenced_classes.append([])
    
    for seq, target in sequential_data:  # iterate over the sequential data
        sequenced_classes[np.argmax(target)].append([seq, target])  
     
    all_sequential_data = []
    lower = df['Target'].value_counts().min() # Balance data w.r.t a class with a minimum # of sample 
    for i in range(len(classes)):
        random.shuffle(sequenced_classes[i])
        sequenced_classes[i] = sequenced_classes[i][:lower]
        all_sequential_data = all_sequential_data + sequenced_classes[i]
    random.shuffle(all_sequential_data)
    
    X = []
    y = []
        
    for seq, target in all_sequential_data:  # going over our new sequential data
        X.append(seq)  # X is the sequences
        y.append(target)  # y is the targets/labels 
    
        
    return np.array(X),np.array(y) #np.array(y_last)  # return X and y...and make X a numpy array!

In [None]:
### Set the Neural Net
class Net(nn.Module):
    
    def __init__(self, IMG_HEIGHT, IMG_WIDTH, output_size):
        '''
        First inherit the nn.module class to use pytorch.
        Then add the convulutional layers.
        Do the flattening.
        Then liner/dense layerse can be added accordingly.
        '''
        super().__init__() # just run the init of parent class (nn.Module)
        
        self.IMG_HEIGHT = IMG_HEIGHT
        self.IMG_WIDTH  = IMG_WIDTH
        self.output_size = output_size
        
        self.conv1 = nn.Conv2d(1, 16, 3, padding=IMG_HEIGHT-IMG_WIDTH) # input is 1 image, 32 output channels, 5x5 kernel / window
        self.conv2 = nn.Conv2d(16, 32, 3) # input is 32, bc the first layer output 32. Then we say the output will be 64 channels, 3x3 kernel / window
        self.conv3 = nn.Conv2d(32, 64, 3)
   
        x = torch.randn(self.IMG_HEIGHT,self.IMG_WIDTH).view(-1,1,self.IMG_HEIGHT,self.IMG_WIDTH) # A random tensor is passed through the conv. layers once so that
        self._to_linear = None                  # the size of the output of the last conv. layer can be found.
        self.convs(x)

        self.fc1 = nn.Linear(self._to_linear, 512) #flattening.
        self.fc2 = nn.Linear(512, 512) # 
        self.fc3 = nn.Linear(512, 512) # 
        self.fc4 = nn.Linear(512, 512) # 
        self.fc5 = nn.Linear(512, 512) #
        self.fc6 = nn.Linear(512, self.output_size) #
        

    def convs(self, x):
        '''
        Applying max_pooling and using appropiate activation function for conv. layers
        '''
        # max pooling over 2x2
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))

        if self._to_linear is None:
            self._to_linear = x[0].shape[0]*x[0].shape[1]*x[0].shape[2]
        return x
    

    def forward(self, x):
        '''
        Reshaping after conv. layers accordingly 
        Then applying activation functions to the linear layers and the output layers.
        '''
        x = self.convs(x)
        x = x.view(-1, self._to_linear)  # .view is reshape ... this flattens X before 
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        #x = F.dropout(x, p=0.35, training=True, inplace=False)
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = F.relu(self.fc5(x))
        x = self.fc6(x) # bc this is our output layer. No activation here.
        
        return F.softmax(x, dim=1)

In [None]:
class Model:
    
    def __init__(self, MODEL_NAME,  BATCH_SIZE, EPOCHS, test_X, test_y, testing_size, IMG_HEIGHT, IMG_WIDTH):
        self.BATCH_SIZE = BATCH_SIZE
        self.EPOCHS = EPOCHS
        self.test_X = test_X
        self.test_y = test_y
        self.IMG_HEIGHT = IMG_HEIGHT
        self.IMG_WIDTH  = IMG_WIDTH
        self.size = testing_size        
        self.MODEL_NAME = MODEL_NAME
    
    def fwd_pass(self, X, y, train=False):
        '''
        Passing the data for both training and testing
        '''
        if train:
            net.zero_grad()
        outputs = net(X)
        matches  = [torch.argmax(i)==torch.argmax(j) for i, j in zip(outputs, y)]
        acc = matches.count(True)/len(matches)
        loss = loss_function(outputs, y)

        if train:
            loss.backward()
            optimizer.step()

        return acc, loss
    
    
    def fit(self, net, train_X, train_y):
        '''
        Train the data, set HYPER_PARAMETERS(Epocsh, batch_size).
        Set the optimizer    
        '''
        wb = Workbook()
        sheet_name = self.MODEL_NAME 
        s1 = wb.add_sheet(sheet_name)
        s1.write(0,0,'Training Accuracy')
        s1.write(0,1,'Test Accuracy')
        s1.write(0,2,'Training Loss')
        s1.write(0,3,'Test Loss')
        for epoch in range(self.EPOCHS):
            for i in tqdm(range(0, len(train_X), self.BATCH_SIZE)):
                batch_X = train_X[i:i+self.BATCH_SIZE].view(-1, 1, IMG_HEIGHT, IMG_WIDTH)
                batch_y = train_y[i:i+self.BATCH_SIZE]

                batch_X, batch_y = batch_X.to(device), batch_y.to(device)

                acc, loss = self.fwd_pass(batch_X, batch_y, train=True)
                
                if i == int(len(train_X)/self.BATCH_SIZE) * self.BATCH_SIZE /2 :
                    random_start = np.random.randint(len(self.test_X)-self.size)
                    X, y = self.test_X[random_start:random_start+self.size], self.test_y[random_start:random_start+self.size]
                    with torch.no_grad():
                        val_acc, val_loss = self.fwd_pass(X.view(-1, 1, IMG_HEIGHT, IMG_WIDTH).to(device), y.to(device))
                    print(f"Epoch  {epoch+1} :\n")
                    print(' Training Accuracy :', acc, '\n', 'Test Accuracy :', val_acc, '\n', 'Training Loss :', loss.item(), '\n', 'Test Loss :', val_loss.item())   
                    s1.write(epoch+1,0, round(float(acc),3))
                    s1.write(epoch+1,1, round(float(val_acc),3))
                    s1.write(epoch+1,2, round(float(loss), 4))
                    s1.write(epoch+1,3, round(float(val_loss), 4))
        filename = self.MODEL_NAME +'.xls'
        wb.save(filename) 
            
            
    def predict(self, net, X_pred):
        X_pred = X_pred.view(-1, 1, IMG_HEIGHT, IMG_WIDTH)
        X_pred = X_pred.to(device)
        outputs = net(X_pred)
        prediction = torch.argmax(outputs)
        if prediction == 0:
            print('It is a cat with probability of ',outputs[0])
        else:
            print('It is a dog with probability of ',outputs[0])  

In [None]:
### Main Code ###
# Loading the data, if data is already prepared once, just load it. Once the data is prepared, save it and turn the flag to False
REBUILD_DATA = True # set to true to one once, then back to false unless you want to change something in your training data.

X_name = "X_all.npy"
y_name = "y_all.npy"

# Set the preprocessing HYPERPARAMETERS
FREQUENCY = '1Min'
SEQ_LEN =   60 #how long of a preceeding sequence to collect for R
FUTURE_PERIOD_PREDICT = 3  # how far into the future are we trying to predict?
classes   = [0,1,2,3,4,5,6,7,8,9]
columns_to_load = [1,2,3,4,5]
IMG_HEIGHT = SEQ_LEN
IMG_WIDTH = len(columns_to_load)

if REBUILD_DATA:
    # Load the data
    file = 'TSLA.USUSD_Candlestick_1_M_BID_24.07.2017-24.07.2020.csv'
    all_available_data_df = pd.read_csv(file, usecols=columns_to_load) 

    all_availabl_df = classify(FREQUENCY, classes, all_available_data_df)
    X_all, y_all = preprocess_df(all_availabl_df, SEQ_LEN, classes)
    
    np.save("X_all.npy", X_all)
    np.save("y_all.npy", y_all)
else:
    X_all = np.load(X_name, allow_pickle=True)
    y_all = np.load(y_name, allow_pickle=True)

### Define a device to start GPU
if torch.cuda.is_available():
    device = torch.device("cuda:0")  # you can continue going on here, like cuda:1 cuda:2....etc. 
    print("Running on the GPU")
else:
    device = torch.device("cpu")
    print("Running on the CPU")
    
### Connecting the Neural Net to the GPU. Choosing the loss function, and optimizer parameters
LEARNING_RATE = 0.001
net = Net(IMG_HEIGHT, IMG_WIDTH, len(classes)).to(device)
loss_function = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr=LEARNING_RATE)
print('Neural Net Features : ',net)
print('Optimizer Features : ',optimizer)
print('Loss Function : ', loss_function)

### Splitting the data as training and testing data
VAL_PCT = 0.1  # Test set ratio
val_size = int(len(X_all)*VAL_PCT)

X_all = torch.Tensor([i for i in X_all]).view(-1,IMG_HEIGHT, IMG_WIDTH)
y_all = torch.Tensor([i for i in y_all])

train_X = X_all[:-val_size]
train_y = y_all[:-val_size]

test_X = X_all[-val_size:]
test_y = y_all[-val_size:]

print('Size of the Data Set :', len(X_all))
print('Training Set Ratio [%] :', 100 * VAL_PCT)
print('Size of the Train Set :', len(train_X), ' <---> Size of the Test Set :', len(test_X))

### Training HYPERPARAMETERS
BATCH_SIZE = 128
EPOCHS = 30
testing_size = 300 # len(test_X) -1 #

batch = BATCH_SIZE
MODEL_NAME = f"stocks_v3_model-Batch{int(batch)}"

print('\n HYPERPARAMETERS : ')
print('Batch Size : ', batch)
print('# of Epochs : ', EPOCHS)

model = Model(MODEL_NAME, batch, EPOCHS, test_X, test_y, testing_size, IMG_HEIGHT, IMG_WIDTH)
model.fit(net, train_X, train_y)

In [None]:
##### Data Visualization #####
columns = ['Training Accuracy', 'Test Accuracy', 'Training Loss', 'Test Loss' ]
results_df = pd.read_excel(MODEL_NAME+'.xls')
results_df.columns = columns

In [None]:
results_df[['Training Accuracy','Test Accuracy']].plot(grid=True)
results_df[['Training Loss','Test Loss']].plot(grid=True)

In [None]:
train_y

In [None]:
test_y[0]