In [104]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.autograd import Variable
import pandas as pd
import scipy
from scipy import stats
import numpy as np

In [46]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


Import raw data from yahoo finance

In [86]:
high_risk = pd.read_csv('SPY.csv')
low_risk = pd.read_csv('O9P.SI.csv')
high_risk = high_risk[1:1008]
low_risk = low_risk[1:1008]
print(high_risk.shape)
print(low_risk.shape)

(1007, 7)
(1007, 7)


In [87]:
high_risk.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
1,2017-08-29,243.059998,245.149994,242.929993,244.850006,224.497574,51135700
2,2017-08-30,244.830002,246.320007,244.619995,246.009995,225.561142,62030800
3,2017-08-31,246.720001,247.770004,246.050003,247.490005,226.918106,103803900
4,2017-09-01,247.919998,248.330002,247.669998,247.839996,227.239059,62007000
5,2017-09-05,247.259995,247.520004,244.949997,246.059998,225.606995,91398800


In [88]:
low_risk.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
1,2017-08-29,10.89,10.9,10.89,10.9,10.9,1400
2,2017-08-30,10.94,10.94,10.94,10.94,10.94,5300
3,2017-08-31,10.92,10.96,10.92,10.93,10.93,30700
4,2017-09-04,10.89,10.89,10.8,10.83,10.83,59800
5,2017-09-05,10.84,10.84,10.79,10.79,10.79,17400


Calculate daily returns

In [89]:
def add_daily_return(market_data):
    market_data["Daily Return"]  = market_data['Close'] - market_data['Open']

add_daily_return(high_risk)
add_daily_return(low_risk)

Calculate moving average (MA) of daily returns

In [90]:
def add_moving_average(market_data):
    N_SUMMANDS = 25
    temp_vars = []

    # df = market_data
    for i in range(0,N_SUMMANDS):
        temp_var = "M_{0}".format(i)
        market_data[temp_var] = market_data["Daily Return"].shift(i)
        temp_vars.append(temp_var)

    market_data["MA"] = market_data[temp_vars].mean(axis=1)

    for i in range(0,N_SUMMANDS):
        temp_var = "M_{0}".format(i)
        market_data.drop(temp_var, axis = 1, inplace = True)

add_moving_average(high_risk)
add_moving_average(low_risk)


In [91]:
high_risk.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Daily Return,MA
1003,2021-08-23,445.160004,448.230011,443.440002,447.26001,440.990021,54973000,2.100006,0.8804
1004,2021-08-24,447.970001,448.540009,447.420013,447.970001,441.690033,38744700,0.0,0.6652
1005,2021-08-25,448.170013,449.459991,447.769989,448.910004,442.616913,40529700,0.739991,0.6064
1006,2021-08-26,448.609985,448.859985,446.160004,446.26001,440.004089,57829600,-2.349975,0.483601
1007,2021-08-27,447.119995,450.649994,447.059998,450.25,443.93811,77235100,3.130005,0.512001


In [92]:
low_risk.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Daily Return,MA
1003,2021-08-24,9.83,9.86,9.81,9.84,9.84,19960,0.01,-0.0316
1004,2021-08-25,9.9,9.9,9.85,9.86,9.86,102205,-0.04,-0.032
1005,2021-08-26,9.89,9.89,9.86,9.86,9.86,66520,-0.03,-0.0296
1006,2021-08-27,9.9,9.9,9.86,9.88,9.88,68015,-0.02,-0.0308
1007,2021-08-30,9.79,9.79,9.77,9.78,9.78,1017465,-0.01,-0.0312


Build feature space

In [94]:
def standardize_columns(market_data, columns):
  for column in columns:
    market_data[column] = stats.zscore(market_data[column])

standardize_columns(high_risk, ['Volume', 'Daily Return', 'MA'])
standardize_columns(low_risk, ['Volume', 'Daily Return', 'MA'])

In [109]:
high_risk.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Daily Return,MA
1,2017-08-29,243.059998,245.149994,242.929993,244.850006,224.497574,-0.693747,0.680202,4.085306
2,2017-08-30,244.830002,246.320007,244.619995,246.009995,225.561142,-0.468644,0.442287,3.371024
3,2017-08-31,246.720001,247.770004,246.050003,247.490005,226.918106,0.394425,0.282385,2.812886
4,2017-09-01,247.919998,248.330002,247.669998,247.839996,227.239059,-0.469136,-0.04913,2.03617
5,2017-09-05,247.259995,247.520004,244.949997,246.059998,225.606995,0.138125,-0.485945,1.045568


In [110]:
low_risk.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Daily Return,MA
1,2017-08-29,10.89,10.9,10.89,10.9,10.9,-0.424098,0.1071,0.557794
2,2017-08-30,10.94,10.94,10.94,10.94,10.94,-0.378056,0.060713,0.43635
3,2017-08-31,10.92,10.96,10.92,10.93,10.93,-0.078194,0.1071,0.476832
4,2017-09-04,10.89,10.89,10.8,10.83,10.83,0.265349,-0.217608,0.072017
5,2017-09-05,10.84,10.84,10.79,10.79,10.79,-0.235208,-0.171221,-0.122293


In [107]:
def to_features(low_risk, high_risk):
  # print(market_data)
  return np.vstack((high_risk['Daily Return'], high_risk['MA'], high_risk['Volume'], low_risk['Daily Return'], low_risk['MA'], low_risk['Volume']))
  # def f(x):
  #  print (x) 
  #  #sample function
  #  return x.max()

In [108]:
X = to_features(low_risk, high_risk)
print(X)

[[ 0.68020159  0.44228669  0.28238489 ...  0.27067938 -0.93445321
   1.2028203 ]
 [ 4.08530647  3.37102429  2.81288581 ...  1.31347319  1.02589611
   1.09240393]
 [-0.6937466  -0.46864419  0.39442504 ... -0.91287595 -0.5554447
  -0.15450989]
 [ 0.10709955  0.06071278  0.10709955 ... -0.07844754 -0.03206077
   0.014326  ]
 [ 0.55779444  0.4363502   0.47683161 ... -0.40404393 -0.43319055
  -0.44290609]
 [-0.42409813 -0.37805628 -0.07819396 ...  0.34468274  0.36233212
  11.571162  ]]


In [None]:
x = Variable(torch.rand(1, 6), requires_grad=True)
b = a**2
c = b*2
d = c.mean()
e = c.sum()

In [None]:
batch_size = 64

# Create data loaders.
train_dataloader = DataLoader(tsla, batch_size=batch_size)
# test_dataloader = DataLoader(test_data, batch_size=batch_size)




# for X in train_dataloader:
#     print(f"Shape of X [N, C, H, W]: {X.shape}")
#     # print(f"Shape of y: {y.shape} {y.dtype}")
#     break

(253, 7)
