In [1]:
import torch
import pandas as pd
import numpy as np
import torch.nn as nn


In [2]:
import torch
import torch.nn as nn
import torch.optim as optim

class ModelSharpe(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(ModelSharpe, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.linear = nn.Linear(hidden_size, output_size)

    def _sharpe_loss(self, portfolio_returns):
        mean_returns = portfolio_returns.mean(dim=1)
        std_returns = portfolio_returns.std(dim=1) + 1e-6
        sharpe_ratio = mean_returns / std_returns
        return -sharpe_ratio.mean()

    def forward(self, x, y):
        output_lstm, (hn, cn) = self.lstm(x)
        unnormalized_weights = self.linear(output_lstm)
        weights = torch.softmax(unnormalized_weights, dim=-1)
        portfolio_returns = torch.sum(weights[:, :, :] * y[:, :, :], dim=2)
        loss = self._sharpe_loss(portfolio_returns)
        return loss

    def get_allocations(self, x):
        with torch.no_grad():
            output_lstm, (hn, cn) = self.lstm(x)
            unnormalized_weights = self.linear(output_lstm)
            weights = torch.softmax(unnormalized_weights, dim=-1)
        return weights[:, -1, :]


In [3]:
random_tensor = 2 * torch.rand(1, 50, 5) - 1
y = 2 * torch.rand(1, 50, 5) - 1

In [4]:
model_lstm = ModelSharpe(5, 64, 5)

In [5]:
A = model_lstm(random_tensor, y)
print(A.shape, A)
print(model_lstm.get_allocations(random_tensor))

torch.Size([]) tensor(0.1113, grad_fn=<NegBackward0>)
tensor([[0.2001, 0.1915, 0.2028, 0.2169, 0.1887]])


In [6]:
import numpy as np
import yfinance as yf
import pandas as pd

tickers = ['VTI', 'AGG', 'DBC', '^VIX']
data = yf.download(tickers, start="2006-01-01", end="2020-12-31", interval="1d")['Adj Close']
data_na = data.dropna(axis = 0)
for column in data_na.columns:
    data_na[f'{column}_R'] = data_na[f'{column}'].pct_change()
    data_na[f'{column}_y'] = data_na[f'{column}_R'].shift(-1)
data_na.dropna(axis=0, inplace=True)
data_na.reset_index(inplace=True)
data_na['Date'] = data_na['Date'].dt.date
data_na

[*********************100%***********************]  4 of 4 completed
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_na[f'{column}_R'] = data_na[f'{column}'].pct_change()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_na[f'{column}_y'] = data_na[f'{column}_R'].shift(-1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_na[f'{column}_

Ticker,Date,AGG,DBC,VTI,^VIX,AGG_R,AGG_y,DBC_R,DBC_y,VTI_R,VTI_y,^VIX_R,^VIX_y
0,2006-02-07,56.262363,20.285255,44.219494,13.590000,-0.000699,-0.000499,-0.028926,-0.004255,-0.009737,0.007194,0.042178,-0.055923
1,2006-02-08,56.234295,20.198933,44.537624,12.830000,-0.000499,0.000599,-0.004255,0.009402,0.007194,-0.001905,-0.055923,0.022603
2,2006-02-09,56.267998,20.388838,44.452793,13.120000,0.000599,-0.002097,0.009402,-0.018205,-0.001905,0.002068,0.022603,-0.019055
3,2006-02-10,56.149998,20.017660,44.544701,12.870000,-0.002097,0.000701,-0.018205,-0.015524,0.002068,-0.004523,-0.019055,0.037296
4,2006-02-13,56.189342,19.706907,44.343231,13.350000,0.000701,-0.001600,-0.015524,-0.008322,-0.004523,0.009486,0.037296,-0.082397
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3745,2020-12-22,106.425888,13.624198,181.857025,24.230000,0.001443,-0.000678,-0.011004,0.012517,0.000207,0.001710,-0.036963,-0.037969
3746,2020-12-23,106.353706,13.794738,182.168060,23.309999,-0.000678,0.000933,0.012517,0.002747,0.001710,0.001673,-0.037969,-0.076362
3747,2020-12-24,106.452972,13.832635,182.472748,21.530001,0.000933,0.000169,0.002747,-0.006164,0.001673,0.006327,-0.076362,0.007896
3748,2020-12-28,106.470993,13.747366,183.627304,21.700001,0.000169,0.000170,-0.006164,0.002757,0.006327,-0.004174,0.007896,0.063594


In [7]:
def create_batches(dataframe, start_date, end_date, window_size=50):
    dataframe['Date'] = pd.to_datetime(dataframe['Date'])
    
    filtered_data = dataframe[(dataframe['Date'] >= start_date) & (dataframe['Date'] <= end_date)]
    
    x_batches = []
    y_batches = []
    dates_batches = []

    for i in range(len(filtered_data) - window_size):
        window_returns = filtered_data.iloc[i:i+window_size][[col for col in dataframe.columns if col.endswith('_R')]].values
        window_prices = filtered_data.iloc[i:i+window_size][[col for col in dataframe.columns if col in ['AGG', 'DBC', 'VTI', '^VIX']]].values
        
        window_x = np.concatenate([window_returns, window_prices], axis=1)
        window_y = filtered_data.iloc[i:i+window_size][[col for col in dataframe.columns if col.endswith('_y')]].values
        
        window_dates = filtered_data.iloc[i:i+window_size]['Date'].values

        x_batches.append(window_x)
        y_batches.append(window_y)
        dates_batches.append(window_dates)

    return np.array(x_batches), np.array(y_batches), np.array(dates_batches)

x_batches, y_batches, dates_batches = create_batches(data_na, start_date="2006-01-01", end_date="2010-12-31")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataframe['Date'] = pd.to_datetime(dataframe['Date'])


In [8]:
print(x_batches.shape, y_batches.shape)
print(x_batches[0, :5, :])
print(y_batches[0,:5,:])

(1185, 50, 8) (1185, 50, 4)
[[-6.98955909e-04 -2.89255625e-02 -9.73666929e-03  4.21779289e-02
   5.62623634e+01  2.02852554e+01  4.42194939e+01  1.35900002e+01]
 [-4.98886658e-04 -4.25544478e-03  7.19434949e-03 -5.59234894e-02
   5.62342949e+01  2.01989326e+01  4.45376244e+01  1.28299999e+01]
 [ 5.99329118e-04  9.40174265e-03 -1.90470954e-03  2.26032707e-02
   5.62679977e+01  2.03888378e+01  4.44527931e+01  1.31199999e+01]
 [-2.09710733e-03 -1.82049451e-02  2.06753040e-03 -1.90548782e-02
   5.61499977e+01  2.00176601e+01  4.45447006e+01  1.28699999e+01]
 [ 7.00708623e-04 -1.55239357e-02 -4.52285948e-03  3.72960762e-02
   5.61893425e+01  1.97069073e+01  4.43432312e+01  1.33500004e+01]]
[[-0.00049889 -0.00425544  0.00719435 -0.05592349]
 [ 0.00059933  0.00940174 -0.00190471  0.02260327]
 [-0.00209711 -0.01820495  0.00206753 -0.01905488]
 [ 0.00070071 -0.01552394 -0.00452286  0.03729608]
 [-0.00159976 -0.00832242  0.00948555 -0.08239703]]


In [9]:
data_na.head(51)

Ticker,Date,AGG,DBC,VTI,^VIX,AGG_R,AGG_y,DBC_R,DBC_y,VTI_R,VTI_y,^VIX_R,^VIX_y
0,2006-02-07,56.262363,20.285255,44.219494,13.59,-0.000699,-0.000499,-0.028926,-0.004255,-0.009737,0.007194,0.042178,-0.055923
1,2006-02-08,56.234295,20.198933,44.537624,12.83,-0.000499,0.000599,-0.004255,0.009402,0.007194,-0.001905,-0.055923,0.022603
2,2006-02-09,56.267998,20.388838,44.452793,13.12,0.000599,-0.002097,0.009402,-0.018205,-0.001905,0.002068,0.022603,-0.019055
3,2006-02-10,56.149998,20.01766,44.544701,12.87,-0.002097,0.000701,-0.018205,-0.015524,0.002068,-0.004523,-0.019055,0.037296
4,2006-02-13,56.189342,19.706907,44.343231,13.35,0.000701,-0.0016,-0.015524,-0.008322,-0.004523,0.009486,0.037296,-0.082397
5,2006-02-14,56.099453,19.542898,44.763851,12.25,-0.0016,0.001101,-0.008322,-0.014576,0.009486,0.00379,-0.082397,0.004898
6,2006-02-15,56.161224,19.258041,44.93351,12.31,0.001101,-0.001,-0.014576,0.012102,0.00379,0.007788,0.004898,-0.067425
7,2006-02-16,56.105064,19.491108,45.283459,11.48,-0.001,0.004107,0.012102,0.014172,0.007788,-0.001171,-0.067425,0.046167
8,2006-02-17,56.335468,19.767332,45.230438,12.01,0.004107,-0.003691,0.014172,0.016594,-0.001171,-0.002814,0.046167,0.033306
9,2006-02-21,56.127522,20.09535,45.10318,12.41,-0.003691,0.004004,0.016594,-0.016323,-0.002814,0.007837,0.033306,-0.042707


In [10]:
mini_batch_size = 64

x_batches_tensor = torch.tensor(x_batches, dtype=torch.float32)
y_batches_tensor = torch.tensor(y_batches, dtype=torch.float32)

num_batches = x_batches_tensor.shape[0]

input_size = 8
hidden_size = 64
output_size = 4
model_lstm = ModelSharpe(input_size=input_size, hidden_size=hidden_size, output_size=output_size)


In [11]:
optimizer = torch.optim.Adam(model_lstm.parameters(), lr=0.001)
num_epochs = 100


for epoch in range(num_epochs):
    model_lstm.train()
    loss_epoch = 0
    
    for i in range(0, num_batches, mini_batch_size):
        x_mini_batch = x_batches_tensor[i:i+mini_batch_size]
        y_mini_batch = y_batches_tensor[i:i+mini_batch_size]
        
        optimizer.zero_grad()
        
        loss = model_lstm(x_mini_batch, y_mini_batch)
        loss_epoch += loss.item()
        loss.backward()
        optimizer.step()
    
    print(f"epoch [{epoch+1}/{num_epochs}], loss: {(loss_epoch/mini_batch_size)}")

#AJOUTER UNE VALIDATION (10%)

epoch [1/100], loss: -0.02079773171863053
epoch [2/100], loss: -0.030923958373023197
epoch [3/100], loss: -0.040932092582806945
epoch [4/100], loss: -0.04879126681043999
epoch [5/100], loss: -0.05226859386311844
epoch [6/100], loss: -0.05354498460656032
epoch [7/100], loss: -0.05142541437817272
epoch [8/100], loss: -0.0578384361906501
epoch [9/100], loss: -0.054337391036824556
epoch [10/100], loss: -0.058879014948615804
epoch [11/100], loss: -0.05960246729955543
epoch [12/100], loss: -0.05539454804966226
epoch [13/100], loss: -0.059767176368040964
epoch [14/100], loss: -0.06278908168314956
epoch [15/100], loss: -0.06323304109537276
epoch [16/100], loss: -0.06322519222158007
epoch [17/100], loss: -0.05862916700425558
epoch [18/100], loss: -0.06247627687116619
epoch [19/100], loss: -0.05918489710893482
epoch [20/100], loss: -0.06560766837537813
epoch [21/100], loss: -0.06514239532407373
epoch [22/100], loss: -0.06299903002218343
epoch [23/100], loss: -0.06844622158678249
epoch [24/100], 

In [12]:
def get_optimal_allocations_and_returns(model_used, dataframe, start_date, end_date, window_size=50):
    filtered_data = dataframe[(dataframe['Date'] >= start_date) & (dataframe['Date'] <= end_date)]

    x_data = []
    for i in range(len(filtered_data) - window_size):
        window_returns = filtered_data.iloc[i:i+window_size][[col for col in dataframe.columns if col.endswith('_R')]].values
        window_prices = filtered_data.iloc[i:i+window_size][[col for col in dataframe.columns if not (col.endswith('_R') or col.endswith('_y') or col == 'Date')]].values

        window_x = np.concatenate([window_returns, window_prices], axis=1)
        x_data.append(window_x)

    window_y = filtered_data.iloc[window_size:(len(filtered_data))][[col for col in dataframe.columns if col.endswith('_R')]].values

    x_tensor = torch.tensor(x_data, dtype=torch.float32)

    allocations = model_used.get_allocations(x_tensor)


    return allocations, window_y, filtered_data

#model_lstm = ModelSharpe(input_size, hidden_size, output_size)

allocations, window_y, filtered_data = get_optimal_allocations_and_returns(model_lstm, data_na, start_date="2011-01-01", end_date="2013-01-01")

  x_tensor = torch.tensor(x_data, dtype=torch.float32)


In [13]:
print(allocations.shape, window_y.shape)
rt = allocations*window_y

torch.Size([452, 4]) (452, 4)


  rt = allocations*window_y


In [14]:
rt_2 = torch.sum(rt, dim=-1)

In [15]:
rt_3 = rt_2+1
k=1
for i in range(len(rt_3)//2):
    k*=rt_3[i]
print(f'return cumulé 1 an (2011): {(k-1)*100}%', k)


return cumulé 1 an (2011): 19.404694022165092% tensor(1.1940, dtype=torch.float64)


In [16]:
rt_3 = rt_2+1
k=1
for i in range(len(rt_3)//2, len(rt_3)):
    k*=rt_3[i]
print(f'return cumulé 1 an (2012) : {(k-1)*100}%', k)


return cumulé 1 an (2012) : 27.753478093539673% tensor(1.2775, dtype=torch.float64)
