In [56]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.autograd import Variable
import pandas as pd
import scipy
from scipy import stats
import numpy as np

In [57]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


In [58]:
MA_DAYS = 25

Import raw data from yahoo finance

In [59]:
high_risk = pd.read_csv('SPY.csv')
low_risk = pd.read_csv('O9P.SI.csv')
high_risk = high_risk[:1008]
low_risk = low_risk[:1008]
print(high_risk.shape)
print(low_risk.shape)

(1008, 7)
(1008, 7)


In [60]:
high_risk.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2017-08-28,245.169998,245.199997,244.089996,244.570007,224.24086,40565600
1,2017-08-29,243.059998,245.149994,242.929993,244.850006,224.497574,51135700
2,2017-08-30,244.830002,246.320007,244.619995,246.009995,225.561142,62030800
3,2017-08-31,246.720001,247.770004,246.050003,247.490005,226.918106,103803900
4,2017-09-01,247.919998,248.330002,247.669998,247.839996,227.239059,62007000


In [61]:
low_risk.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2017-08-28,10.9,10.9,10.88,10.88,10.88,7400
1,2017-08-29,10.89,10.9,10.89,10.9,10.9,1400
2,2017-08-30,10.94,10.94,10.94,10.94,10.94,5300
3,2017-08-31,10.92,10.96,10.92,10.93,10.93,30700
4,2017-09-04,10.89,10.89,10.8,10.83,10.83,59800


Calculate daily returns

In [62]:
def add_daily_return(market_data):
    market_data["Daily Return"]  = market_data['Close'] - market_data['Open']

add_daily_return(high_risk)
add_daily_return(low_risk)

Calculate moving average (MA) of daily returns

In [63]:
def add_moving_average(market_data, ma_days):
    temp_vars = []

    # df = market_data
    for i in range(0,ma_days):
        temp_var = "M_{0}".format(i)
        market_data[temp_var] = market_data["Daily Return"].shift(i)
        temp_vars.append(temp_var)

    market_data["MA"] = market_data[temp_vars].mean(axis=1)

    for i in range(0,ma_days):
        temp_var = "M_{0}".format(i)
        market_data.drop(temp_var, axis = 1, inplace = True)

add_moving_average(high_risk, MA_DAYS)
add_moving_average(low_risk, MA_DAYS)


In [64]:
high_risk.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Daily Return,MA
1003,2021-08-23,445.160004,448.230011,443.440002,447.26001,440.990021,54973000,2.100006,0.8804
1004,2021-08-24,447.970001,448.540009,447.420013,447.970001,441.690033,38744700,0.0,0.6652
1005,2021-08-25,448.170013,449.459991,447.769989,448.910004,442.616913,40529700,0.739991,0.6064
1006,2021-08-26,448.609985,448.859985,446.160004,446.26001,440.004089,57829600,-2.349975,0.483601
1007,2021-08-27,447.119995,450.649994,447.059998,450.25,443.93811,77235100,3.130005,0.512001


In [65]:
low_risk.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Daily Return,MA
1003,2021-08-24,9.83,9.86,9.81,9.84,9.84,19960,0.01,-0.0316
1004,2021-08-25,9.9,9.9,9.85,9.86,9.86,102205,-0.04,-0.032
1005,2021-08-26,9.89,9.89,9.86,9.86,9.86,66520,-0.03,-0.0296
1006,2021-08-27,9.9,9.9,9.86,9.88,9.88,68015,-0.02,-0.0308
1007,2021-08-30,9.79,9.79,9.77,9.78,9.78,1017465,-0.01,-0.0312


Calculate ROE

In [66]:
def add_roe(market_data):    
    market_data["Prev Close"] = market_data["Close"].shift(1)
    market_data["ROE"] = (market_data["Close"] - market_data["Prev Close"]) / market_data['Prev Close']

add_roe(high_risk)
add_roe(low_risk)

In [67]:
def add_roe_binary(market_data, tau=-0.005):    
    market_data["ROE Binary"] = np.where(market_data["ROE"].values < tau, -1, 1)

add_roe_binary(high_risk)
add_roe_binary(low_risk)

In [68]:
high_risk.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Daily Return,MA,Prev Close,ROE,ROE Binary
0,2017-08-28,245.169998,245.199997,244.089996,244.570007,224.24086,40565600,-0.599991,-0.599991,,,1
1,2017-08-29,243.059998,245.149994,242.929993,244.850006,224.497574,51135700,1.790008,0.595009,244.570007,0.001145,1
2,2017-08-30,244.830002,246.320007,244.619995,246.009995,225.561142,62030800,1.179993,0.790003,244.850006,0.004738,1
3,2017-08-31,246.720001,247.770004,246.050003,247.490005,226.918106,103803900,0.770004,0.785004,246.009995,0.006016,1
4,2017-09-01,247.919998,248.330002,247.669998,247.839996,227.239059,62007000,-0.080002,0.612002,247.490005,0.001414,1


In [69]:
low_risk.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Daily Return,MA,Prev Close,ROE,ROE Binary
0,2017-08-28,10.9,10.9,10.88,10.88,10.88,7400,-0.02,-0.02,,,1
1,2017-08-29,10.89,10.9,10.89,10.9,10.9,1400,0.01,-0.005,10.88,0.001838,1
2,2017-08-30,10.94,10.94,10.94,10.94,10.94,5300,0.0,-0.003333,10.9,0.00367,1
3,2017-08-31,10.92,10.96,10.92,10.93,10.93,30700,0.01,0.0,10.94,-0.000914,1
4,2017-09-04,10.89,10.89,10.8,10.83,10.83,59800,-0.06,-0.012,10.93,-0.009149,-1


Build feature space

In [72]:
def remove_for_ma(market_data, ma_days):
  return market_data[ma_days:]

high_risk = remove_for_ma(high_risk, MA_DAYS)
low_risk = remove_for_ma(low_risk, MA_DAYS)

In [74]:
print(high_risk.shape)

(983, 12)


In [75]:
def standardize_columns(market_data, columns):
  for column in columns:
    market_data[column] = market_data[column]/market_data[column].std()

standardize_columns(high_risk, ['Volume', 'Daily Return', 'MA'])
standardize_columns(low_risk, ['Volume', 'Daily Return', 'MA'])

In [76]:
high_risk.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Daily Return,MA,Prev Close,ROE,ROE Binary
25,2017-10-03,252.320007,252.889999,252.229996,252.860001,232.992355,1.367891,0.208144,0.70871,252.320007,0.00214,1
26,2017-10-04,252.690002,253.440002,252.559998,253.160004,233.268799,1.14561,0.181166,0.583476,252.860001,0.001186,1
27,2017-10-05,253.539993,254.679993,253.199997,254.660004,234.650909,1.300584,0.431716,0.577785,253.160004,0.005925,1
28,2017-10-06,254.149994,254.699997,253.850006,254.369995,234.383713,1.651169,0.084801,0.525604,254.660004,-0.001139,1
29,2017-10-09,254.630005,254.699997,253.649994,253.949997,233.996689,0.733043,-0.262114,0.468679,254.369995,-0.001651,1


In [77]:
low_risk.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Daily Return,MA,Prev Close,ROE,ROE Binary
25,2017-10-03,10.86,10.87,10.86,10.86,10.86,0.659605,0.0,-0.383962,10.85,0.000922,1
26,2017-10-04,10.86,10.86,10.86,10.86,10.86,0.0,0.0,-0.393561,10.86,0.0,1
27,2017-10-05,10.89,10.89,10.88,10.88,10.88,0.172782,-0.045821,-0.403161,10.86,0.001842,1
28,2017-10-06,10.89,10.89,10.87,10.88,10.88,0.371247,-0.045821,-0.422359,10.88,0.0,1
29,2017-10-09,10.88,10.88,10.85,10.88,10.88,0.254503,0.0,-0.364764,10.88,0.0,1


In [80]:
def to_X_train_features(low_risk, high_risk):
  high_risk_train = high_risk[:800]
  low_risk_train = low_risk[:800]
  return np.vstack((high_risk_train['Daily Return'], high_risk_train['MA'], high_risk_train['Volume'], low_risk_train['Daily Return'], low_risk_train['MA'], low_risk_train['Volume']))

X = to_X_train_features(low_risk, high_risk).T
print(X.shape, X)

(800, 6) [[ 0.20814449  0.7087102   1.36789127  0.         -0.38396238  0.65960516]
 [ 0.18116558  0.58347574  1.14561011  0.         -0.39356144  0.        ]
 [ 0.43171613  0.57778499  1.30058409 -0.04582138 -0.4031605   0.17278153]
 ...
 [ 0.75935115 -0.39561758  0.94032262  0.         -0.27837273  0.03076212]
 [ 0.00385804  0.12144766  1.28746417  0.13746413 -0.27837273  0.62726699]
 [ 0.97520594  0.07021485  1.03906806 -0.41239238 -0.35516521  0.1856234 ]]


In [81]:
def to_Y_train_features(low_risk, high_risk):
  high_risk_train = high_risk[0:800]
  return high_risk_train['ROE Binary'].values

Y = to_Y_train_features(low_risk, high_risk)
print(Y.shape, Y)

(800,) [ 1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1 -1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
  1  1  1  1  1  1  1  1 -1 -1  1  1 -1 -1  1 -1 -1  1  1  1  1  1  1 -1
  1  1  1  1 -1 -1 -1  1  1  1  1  1  1  1 -1 -1  1  1 -1  1  1 -1 -1  1
 -1  1  1 -1  1  1  1 -1  1  1 -1  1  1  1  1  1 -1 -1  1 -1  1  1  1 -1
  1 -1  1  1  1  1  1  1  1  1 -1  1  1  1  1  1  1  1  1 -1  1 -1  1  1
  1  1  1  1  1  1  1  1 -1  1  1  1 -1  1 -1  1 -1  1  1  1  1  1  1  1
  1 -1  1  1  1  1  1  1  1  1  1  1  1 -1 -1  1  1  1  1  1  1  1  1 -1
  1  1 -1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1  1
 -1  1  1  1 -1  1  1  1  1  1  1  1  1 -1 -1  1  1 -1 -1  1 -1  1  1 -1
  1  1 -1 -1  1 -1 -1  1  1  1 -1  1  1  1  1 -1 -1  1 -1  1  1 -1 -1  1
 -1  1  1  1  1  1  1 -1  1 -1  1  1  1  1 -1 -1  1 -1 -1 -1 -1  1  1  1
  1  1 -1  1  1  1  1  1  1 -1  1  1  1  1 -

Build graph

In [82]:
X = torch.from_numpy(X)
print(type(X))

<class 'torch.Tensor'>


In [83]:
a = torch.randn(6, requires_grad=True, dtype=torch.float)
b = torch.randn(6, requires_grad=True, dtype=torch.float)
print(a, b)

tensor([-0.7780, -1.6325,  0.3035,  0.7309, -1.0906, -0.8960],
       requires_grad=True) tensor([ 0.6495, -2.1430, -4.3028,  0.2674,  2.0328, -1.5759],
       requires_grad=True)


In [84]:
lr = 1e-1
n_epochs = 1000
torch.manual_seed(42)

<torch._C.Generator at 0x7f5a56f2aa30>

In [None]:
for epoch in range(n_epochs):
    yhat = np.exp(X * a) / (np.exp (X * a) + np.exp(X*b))
    
    error = Y - yhat
    loss = (error ** 2).mean()

    # No more manual computation of gradients! 
    # a_grad = -2 * error.mean()
    # b_grad = -2 * (x_tensor * error).mean()
    
    # We just tell PyTorch to work its way BACKWARDS from the specified loss!
    loss.backward()
    # Let's check the computed gradients...
    print(a.grad)
    print(b.grad)
    
    # What about UPDATING the parameters? Not so fast...
    
    # FIRST ATTEMPT
    # AttributeError: 'NoneType' object has no attribute 'zero_'
    # a = a - lr * a.grad
    # b = b - lr * b.grad
    # print(a)

    # SECOND ATTEMPT
    # RuntimeError: a leaf Variable that requires grad has been used in an in-place operation.
    # a -= lr * a.grad
    # b -= lr * b.grad        
    
    # THIRD ATTEMPT
    # We need to use NO_GRAD to keep the update out of the gradient computation
    # Why is that? It boils down to the DYNAMIC GRAPH that PyTorch uses...
    with torch.no_grad():
        a -= lr * a.grad
        b -= lr * b.grad
    
    # PyTorch is "clingy" to its computed gradients, we need to tell it to let it go...
    a.grad.zero_()
    b.grad.zero_()
    
print(a, b)

In [None]:
x = Variable(torch.rand(1, 6), requires_grad=True)
b = a**2
c = b*2
d = c.mean()
e = c.sum()

In [None]:
batch_size = 64

# Create data loaders.
train_dataloader = DataLoader(tsla, batch_size=batch_size)
# test_dataloader = DataLoader(test_data, batch_size=batch_size)




# for X in train_dataloader:
#     print(f"Shape of X [N, C, H, W]: {X.shape}")
#     # print(f"Shape of y: {y.shape} {y.dtype}")
#     break

(253, 7)
