In [1]:
# ！pip install functorch
import torch
from torch import nn
from torch import optim
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
from torch.autograd import Variable
import pandas as pd
import scipy
from scipy import stats
import numpy as np

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

Using cpu device


In [3]:
MA_DAYS = 25

# Import raw data from yahoo finance

In [4]:
high_risk = pd.read_csv('SPY.csv')
low_risk = pd.read_csv('O9P.SI.csv')
high_risk = high_risk[:1008]
low_risk = low_risk[:1008]
print(high_risk.shape)
print(low_risk.shape)

(1008, 7)
(1008, 7)


In [5]:
high_risk.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2017-08-28,245.169998,245.199997,244.089996,244.570007,224.24086,40565600
1,2017-08-29,243.059998,245.149994,242.929993,244.850006,224.497574,51135700
2,2017-08-30,244.830002,246.320007,244.619995,246.009995,225.561142,62030800
3,2017-08-31,246.720001,247.770004,246.050003,247.490005,226.918106,103803900
4,2017-09-01,247.919998,248.330002,247.669998,247.839996,227.239059,62007000


In [6]:
low_risk.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2017-08-28,10.9,10.9,10.88,10.88,10.88,7400
1,2017-08-29,10.89,10.9,10.89,10.9,10.9,1400
2,2017-08-30,10.94,10.94,10.94,10.94,10.94,5300
3,2017-08-31,10.92,10.96,10.92,10.93,10.93,30700
4,2017-09-04,10.89,10.89,10.8,10.83,10.83,59800


## Calculate daily returns

In [7]:
def add_daily_return(market_data):
    market_data["Daily Return"]  = market_data['Close'] - market_data['Open']

add_daily_return(high_risk)
add_daily_return(low_risk)

## Calculate moving average (MA) of daily returns

In [8]:
def add_moving_average(market_data, ma_days):
    temp_vars = []

    # df = market_data
    for i in range(0,ma_days):
        temp_var = "M_{0}".format(i)
        market_data[temp_var] = market_data["Daily Return"].shift(i)
        temp_vars.append(temp_var)

    market_data["MA"] = market_data[temp_vars].mean(axis=1)

    for i in range(0,ma_days):
        temp_var = "M_{0}".format(i)
        market_data.drop(temp_var, axis = 1, inplace = True)

add_moving_average(high_risk, MA_DAYS)
add_moving_average(low_risk, MA_DAYS)


In [9]:
high_risk.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Daily Return,MA
1003,2021-08-23,445.160004,448.230011,443.440002,447.26001,440.990021,54973000,2.100006,0.8804
1004,2021-08-24,447.970001,448.540009,447.420013,447.970001,441.690033,38744700,0.0,0.6652
1005,2021-08-25,448.170013,449.459991,447.769989,448.910004,442.616913,40529700,0.739991,0.6064
1006,2021-08-26,448.609985,448.859985,446.160004,446.26001,440.004089,57829600,-2.349975,0.483601
1007,2021-08-27,447.119995,450.649994,447.059998,450.25,443.93811,77235100,3.130005,0.512001


In [10]:
low_risk.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Daily Return,MA
1003,2021-08-24,9.83,9.86,9.81,9.84,9.84,19960,0.01,-0.0316
1004,2021-08-25,9.9,9.9,9.85,9.86,9.86,102205,-0.04,-0.032
1005,2021-08-26,9.89,9.89,9.86,9.86,9.86,66520,-0.03,-0.0296
1006,2021-08-27,9.9,9.9,9.86,9.88,9.88,68015,-0.02,-0.0308
1007,2021-08-30,9.79,9.79,9.77,9.78,9.78,1017465,-0.01,-0.0312


## Calculate ROE

In [11]:
def add_roe(market_data):    
    market_data["Prev Close"] = market_data["Close"].shift(1)
    market_data["ROE"] = (market_data["Close"] - market_data["Prev Close"]) / market_data['Prev Close']

add_roe(high_risk)
add_roe(low_risk)

In [12]:
def add_roe_binary(market_data, tau=-0.005):    
    market_data["ROE Binary"] = np.where(market_data["ROE"].values < tau, 0, 1)

add_roe_binary(high_risk)
add_roe_binary(low_risk)

In [13]:
high_risk.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Daily Return,MA,Prev Close,ROE,ROE Binary
0,2017-08-28,245.169998,245.199997,244.089996,244.570007,224.24086,40565600,-0.599991,-0.599991,,,1
1,2017-08-29,243.059998,245.149994,242.929993,244.850006,224.497574,51135700,1.790008,0.595009,244.570007,0.001145,1
2,2017-08-30,244.830002,246.320007,244.619995,246.009995,225.561142,62030800,1.179993,0.790003,244.850006,0.004738,1
3,2017-08-31,246.720001,247.770004,246.050003,247.490005,226.918106,103803900,0.770004,0.785004,246.009995,0.006016,1
4,2017-09-01,247.919998,248.330002,247.669998,247.839996,227.239059,62007000,-0.080002,0.612002,247.490005,0.001414,1


In [14]:
low_risk.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Daily Return,MA,Prev Close,ROE,ROE Binary
0,2017-08-28,10.9,10.9,10.88,10.88,10.88,7400,-0.02,-0.02,,,1
1,2017-08-29,10.89,10.9,10.89,10.9,10.9,1400,0.01,-0.005,10.88,0.001838,1
2,2017-08-30,10.94,10.94,10.94,10.94,10.94,5300,0.0,-0.003333,10.9,0.00367,1
3,2017-08-31,10.92,10.96,10.92,10.93,10.93,30700,0.01,0.0,10.94,-0.000914,1
4,2017-09-04,10.89,10.89,10.8,10.83,10.83,59800,-0.06,-0.012,10.93,-0.009149,0


# Build feature space

In [15]:
def remove_for_ma(market_data, ma_days):
  return market_data[ma_days:]

high_risk = remove_for_ma(high_risk, MA_DAYS)
low_risk = remove_for_ma(low_risk, MA_DAYS)

In [16]:
print(high_risk.shape)

(983, 12)


In [17]:
def standardize_columns(market_data, columns):
  for column in columns:
    market_data[column] = market_data[column]/market_data[column].std()

standardize_columns(high_risk, ['Volume', 'Daily Return', 'MA'])
standardize_columns(low_risk, ['Volume', 'Daily Return', 'MA'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [18]:
high_risk.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Daily Return,MA,Prev Close,ROE,ROE Binary
25,2017-10-03,252.320007,252.889999,252.229996,252.860001,232.992355,1.367891,0.208144,0.70871,252.320007,0.00214,1
26,2017-10-04,252.690002,253.440002,252.559998,253.160004,233.268799,1.14561,0.181166,0.583476,252.860001,0.001186,1
27,2017-10-05,253.539993,254.679993,253.199997,254.660004,234.650909,1.300584,0.431716,0.577785,253.160004,0.005925,1
28,2017-10-06,254.149994,254.699997,253.850006,254.369995,234.383713,1.651169,0.084801,0.525604,254.660004,-0.001139,1
29,2017-10-09,254.630005,254.699997,253.649994,253.949997,233.996689,0.733043,-0.262114,0.468679,254.369995,-0.001651,1


In [19]:
low_risk.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Daily Return,MA,Prev Close,ROE,ROE Binary
25,2017-10-03,10.86,10.87,10.86,10.86,10.86,0.659605,0.0,-0.383962,10.85,0.000922,1
26,2017-10-04,10.86,10.86,10.86,10.86,10.86,0.0,0.0,-0.393561,10.86,0.0,1
27,2017-10-05,10.89,10.89,10.88,10.88,10.88,0.172782,-0.045821,-0.403161,10.86,0.001842,1
28,2017-10-06,10.89,10.89,10.87,10.88,10.88,0.371247,-0.045821,-0.422359,10.88,0.0,1
29,2017-10-09,10.88,10.88,10.85,10.88,10.88,0.254503,0.0,-0.364764,10.88,0.0,1


In [20]:
def to_X_train_features(low_risk, high_risk):
  high_risk_train = high_risk[:1000]
  low_risk_train = low_risk[:1000]
  return np.vstack((high_risk_train['Daily Return'], high_risk_train['MA'], high_risk_train['Volume'], low_risk_train['Daily Return'], low_risk_train['MA'], low_risk_train['Volume']))

X = to_X_train_features(low_risk, high_risk).T
print(X.shape, X)

(983, 6) [[ 0.20814449  0.7087102   1.36789127  0.         -0.38396238  0.65960516]
 [ 0.18116558  0.58347574  1.14561011  0.         -0.39356144  0.        ]
 [ 0.43171613  0.57778499  1.30058409 -0.04582138 -0.4031605   0.17278153]
 ...
 [ 0.28523475  1.43829291  0.82981674 -0.13746413 -0.71033041  0.77658292]
 [-0.90581443  1.1470315   1.18401988 -0.09164275 -0.73912759  0.79403619]
 [ 1.20648249  1.2143914   1.58133367 -0.04582138 -0.74872665 11.87832143]]


In [21]:
def to_Y_train_features(low_risk, high_risk):
  high_risk_train = high_risk[0:1000]
  return high_risk_train['ROE Binary'].values

Y = to_Y_train_features(low_risk, high_risk)
print(Y.shape, Y)

(983,) [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 0 0 1 1 0 0 1 0 0 1 1 1 1 1 1 0 1 1 1 1 0 0 0 1 1 1 1 1 1 1 0
 0 1 1 0 1 1 0 0 1 0 1 1 0 1 1 1 0 1 1 0 1 1 1 1 1 0 0 1 0 1 1 1 0 1 0 1 1
 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 0 1 0
 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 0 0
 1 0 1 1 0 1 1 0 0 1 0 0 1 1 1 0 1 1 1 1 0 0 1 0 1 1 0 0 1 0 1 1 1 1 1 1 0
 1 0 1 1 1 1 0 0 1 0 0 0 0 1 1 1 1 1 0 1 1 1 1 1 1 0 1 1 1 1 0 1 1 1 0 1 1
 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 0 1
 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 1 0 1 1 1
 0 0 1 1 0 1 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 0 1 1
 1 1 1 1 0 1 0 1 1 1 1 1 1 1 0 0 0 0 1 1 1 0 0 1 0 1 1 1 0 1 1 0 1 1 1 1 1
 0 1 1 1 1 1 1 1 1

# Build graph

In [22]:
X_tensor = torch.from_numpy(X)
print(type(X), X.shape, X_tensor.size())
Y_tensor = torch.from_numpy(Y).double()
print(type(Y), Y.shape, Y_tensor.size())

<class 'numpy.ndarray'> (983, 6) torch.Size([983, 6])
<class 'numpy.ndarray'> (983,) torch.Size([983])


In [23]:
a = torch.randn((6), requires_grad=True, dtype=torch.double)
b = torch.randn((6), requires_grad=True, dtype=torch.double)
print(a, a.size(), b, b.size())

tensor([ 0.6285,  0.7991, -0.2457,  1.9027,  0.4300, -0.6631],
       dtype=torch.float64, requires_grad=True) torch.Size([6]) tensor([-0.0122, -0.1009,  0.2089, -0.6902, -0.1436, -1.2529],
       dtype=torch.float64, requires_grad=True) torch.Size([6])


In [24]:
lr = 1e-1
n_epochs = 500
torch.manual_seed(42)
lambda1 = 1e-3 #0.5
lambda2 = 1e-3 #0.5

loss_fn = nn.BCELoss()
optimizer = optim.SGD([a, b], lr=lr)

In [25]:
for epoch in range(n_epochs):
  
    yhat = torch.exp(torch.matmul(X_tensor, a)) / (torch.exp (torch.matmul(X_tensor, a)) + torch.exp(torch.matmul(X_tensor, b)))

    loss = loss_fn(yhat, Y_tensor)
    loss.backward()   

    if epoch % 10 == 0:
      print(f"Epoch: {epoch}. Loss: {loss}")

    optimizer.step()
    optimizer.zero_grad()
    
print(a, b)

Epoch: 0. Loss: 0.9908778422957564
Epoch: 10. Loss: 0.6128986133779176
Epoch: 20. Loss: 0.5572870832693047
Epoch: 30. Loss: 0.5359187956961771
Epoch: 40. Loss: 0.5227013053099607
Epoch: 50. Loss: 0.513209205138364
Epoch: 60. Loss: 0.505905391170479
Epoch: 70. Loss: 0.5000277632815253
Epoch: 80. Loss: 0.49513968005396947
Epoch: 90. Loss: 0.4909696498177184
Epoch: 100. Loss: 0.4873387892787602
Epoch: 110. Loss: 0.48412386363594917
Epoch: 120. Loss: 0.48123691670692775
Epoch: 130. Loss: 0.4786133459038819
Epoch: 140. Loss: 0.4762045700760429
Epoch: 150. Loss: 0.47397333292395494
Epoch: 160. Loss: 0.4718905882134059
Epoch: 170. Loss: 0.46993337138786956
Epoch: 180. Loss: 0.46808330703172785
Epoch: 190. Loss: 0.4663255383032119
Epoch: 200. Loss: 0.4646479436863768
Epoch: 210. Loss: 0.46304055389654586
Epoch: 220. Loss: 0.461495111085508
Epoch: 230. Loss: 0.4600047310727756
Epoch: 240. Loss: 0.4585636413929452
Epoch: 250. Loss: 0.4571669759556347
Epoch: 260. Loss: 0.4558106125358032
Epoch: 2

# Build efficient frontier

In [31]:
with torch.no_grad():
  y_test = torch.exp(torch.matmul(X_tensor, a)) / (torch.exp (torch.matmul(X_tensor, a)) + torch.exp(torch.matmul(X_tensor, b)))
  print(y_test)

tensor([8.9734e-01, 8.2501e-01, 9.0920e-01, 8.6093e-01, 5.6764e-01, 7.3047e-01,
        8.1278e-01, 7.0853e-01, 7.3952e-01, 7.2666e-01, 9.0630e-01, 7.1277e-01,
        8.8590e-01, 8.7593e-01, 4.5653e-01, 7.9779e-01, 7.3578e-01, 7.1589e-01,
        9.3639e-01, 7.7103e-01, 8.2545e-01, 6.0956e-01, 7.7759e-01, 8.2893e-01,
        8.0743e-01, 7.0435e-01, 8.0796e-01, 8.9453e-01, 7.5351e-01, 8.6420e-01,
        7.8824e-01, 8.0871e-01, 9.1259e-01, 7.6294e-01, 8.6255e-01, 9.0027e-01,
        7.1775e-01, 6.3251e-01, 6.5905e-01, 9.7591e-01, 7.7075e-01, 9.6953e-01,
        9.2160e-01, 4.1970e-01, 8.2353e-01, 7.9180e-01, 7.9691e-01, 8.1848e-01,
        8.1179e-01, 7.1942e-01, 8.1909e-01, 6.3658e-01, 9.5574e-01, 8.1172e-01,
        7.6063e-01, 5.0344e-01, 6.0815e-01, 7.3634e-01, 6.5881e-01, 6.5862e-01,
        3.8312e-01, 3.5506e-01, 8.6312e-01, 9.4027e-01, 7.8660e-01, 9.1756e-01,
        8.0292e-01, 7.4484e-01, 9.0651e-01, 8.8833e-01, 9.4892e-01, 3.7858e-01,
        9.5663e-01, 8.0337e-01, 9.5284e-

## Build MV Portfolio

## Build ML Portfolio

In [33]:
k = 0.5
calculate_ml_portfolio_weights_lambda = lambda x: 0 if x < k else 1
calculate_ml_portfolio_weights = np.vectorize(calculate_ml_portfolio_weights_lambda)
# vfunc(x)
# calculate_ml_portfolio_weights = functorch.vmap(ml_portfolio_weights, out_dims=1)
# forecast = 
# portfolio_weights = calculate_ml_portfolio_weights(y_test.numpy())
# print(portfolio_weights)

portfolio_weights = y_test.apply_(calculate_ml_portfolio_weights_lambda)
print(portfolio_weights)

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 0., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 1., 1., 1., 1., 1.,
        1., 1., 0., 0., 1., 1., 0., 1., 1., 0., 0., 1., 0., 1., 1., 0., 1., 1.,
        1., 0., 1., 1., 1., 1., 0., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 0.,
        1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        0., 1., 0., 1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        0., 1., 1., 1., 1., 1., 1., 1., 