In [1]:
import torch
import sys
sys.path.append("../../")
from TCN.stock_price_prediction.model import TCN
import pandas as pd
import numpy as np
from torch.autograd import Variable

In [2]:
testing_period=100
prediction_period=5

In [3]:
model = torch.load("VUSA.L.t_plus_5.pt")
model.eval()

TCN(
  (tcn): TemporalConvNet(
    (network): Sequential(
      (0): TemporalBlock(
        (conv1): Conv1d(10, 150, kernel_size=(2,), stride=(1,), padding=(1,))
        (chomp1): Chomp1d()
        (batch_norm_1): BatchNorm1d(150, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU()
        (dropout1): Dropout(p=0.3, inplace=False)
        (conv2): Conv1d(150, 150, kernel_size=(2,), stride=(1,), padding=(1,))
        (chomp2): Chomp1d()
        (batch_norm_2): BatchNorm1d(150, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU()
        (dropout2): Dropout(p=0.3, inplace=False)
        (net): Sequential(
          (0): Conv1d(10, 150, kernel_size=(2,), stride=(1,), padding=(1,))
          (1): BatchNorm1d(150, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): Chomp1d()
          (3): ReLU()
          (4): Dropout(p=0.3, inplace=False)
          (5): Conv1d(150, 150, kernel_size=(2,), stri

In [11]:
def get_data(fname):
    df = pd.read_csv(fname)
    df = df.drop(columns=["Date", "Close",  "Volume"])
    df = df.rename(columns={"Adj Close": "Price"})
    df["5dayEWM"] = df["Price"].ewm(span=5, adjust=False).mean()
    df["20dayEWM"] = df["Price"].ewm(span=20, adjust=False).mean()
    df["50dayEWM"] = df["Price"].ewm(span=50, adjust=False).mean()
    df["macd"] = df["5dayEWM"] - df["20dayEWM"]
    df["macd_trigger"] = df["macd"].ewm(span=3, adjust=False).mean()
    df["macd_cd"] = df["macd"] - df["macd_trigger"]
    shift = prediction_period
    df["Price T+5"] = df["Price"].shift(-1 * shift)
    print(df.tail(10))
    df = df.dropna(axis=0)
    as_array = df.to_numpy().transpose()
    return np.array(as_array[0:-1]), as_array[-1]

In [12]:
features, targets = get_data("VUSA.L.csv")

           Open       High        Low      Price    5dayEWM   20dayEWM  \
2790  65.167503  65.500000  64.962502  65.500000  65.150364  64.312827   
2791  65.792503  65.980003  65.184998  65.787498  65.362742  64.453272   
2792  65.822502  65.977501  65.410004  65.654999  65.460161  64.567722   
2793  65.417503  65.500000  65.007500  65.422501  65.447608  64.649130   
2794  65.452499  65.724998  65.302498  65.368752  65.421322  64.717665   
2795  65.150002  65.331902  65.004997  65.147499  65.330048  64.758602   
2796  65.169998  65.470001  65.000000  65.062500  65.240865  64.787544   
2797  64.967499  65.519997  64.820000  64.977501  65.153077  64.805636   
2798  82.995102  82.995102  82.995102  82.995102  71.100419  66.537966   
2799  65.000000  65.192497  64.599998  64.801247  69.000695  66.372564   

       50dayEWM      macd  macd_trigger   macd_cd  Price T+5  
2790  63.481945  0.837537      0.852590 -0.015054  65.147499  
2791  63.572359  0.909470      0.881030  0.028440  65.06250

In [13]:
features, targets

(array([[ 1.57720000e+01,  1.57700000e+01,  1.59600000e+01, ...,
          6.58225020e+01,  6.54175030e+01,  6.54524990e+01],
        [ 1.57720000e+01,  1.57800000e+01,  1.60700000e+01, ...,
          6.59775010e+01,  6.55000000e+01,  6.57249980e+01],
        [ 1.57720000e+01,  1.57700000e+01,  1.59500000e+01, ...,
          6.54100040e+01,  6.50075000e+01,  6.53024980e+01],
        ...,
        [ 0.00000000e+00, -7.61904762e-03,  4.99319728e-02, ...,
          8.92438648e-01,  7.98477831e-01,  7.03657169e-01],
        [ 0.00000000e+00, -3.80952381e-03,  2.30612245e-02, ...,
          8.86734354e-01,  8.42606092e-01,  7.73131631e-01],
        [ 0.00000000e+00, -3.80952381e-03,  2.68707483e-02, ...,
          5.70429396e-03, -4.41282613e-02, -6.94744615e-02]]),
 array([16.110001, 16.08    , 15.92    , ..., 64.977501, 82.995102,
        64.801247]))

In [14]:
def build_backtest_cases(data_arrs, target_arr, num_test_cases, seq_length=300):
    X = torch.zeros([num_test_cases, 10, seq_length])
    y = torch.zeros([num_test_cases, 1])
    data_arr_length = len(data_arrs[0])

    backtest_idx = 0
    for end_idx in range(data_arr_length-num_test_cases, data_arr_length):
        starting_idx = end_idx-seq_length
        end_idx = end_idx
        random_sub_sequence = np.array([arr[starting_idx+1:end_idx+1] for arr in data_arrs])
        if len(random_sub_sequence[0]) != seq_length:
            raise Exception(f"incorrect seq length, expected {seq_length} but found {len(random_sub_sequence[0])}")
        X[backtest_idx] = torch.from_numpy(random_sub_sequence)
        y[backtest_idx] = target_arr[end_idx]
        backtest_idx += 1
    return Variable(X), Variable(y)

In [15]:
X_test, Y_test = build_backtest_cases(features, targets, testing_period-prediction_period)

In [16]:
with torch.no_grad():
    preds = model(X_test)

In [17]:
targets_preds =  torch.cat((Y_test, preds), 1)

In [18]:
df = pd.DataFrame(targets_preds.numpy())

In [19]:
df

Unnamed: 0,0,1
0,65.042503,61.876194
1,64.897499,61.944725
2,64.714996,62.384888
3,64.427498,63.705887
4,64.082497,64.676697
...,...,...
90,65.147499,64.910858
91,65.062500,65.234573
92,64.977501,65.257515
93,82.995102,64.981575


In [20]:
raw_df = pd.read_csv("VUSA.L.csv")
raw_df["Exit Date"] = raw_df["Date"].shift(-1*prediction_period)

In [21]:
raw_df = raw_df.tail(testing_period).head(testing_period-prediction_period)

In [22]:
raw_df = raw_df.reset_index()

In [23]:
combined = pd.concat([raw_df, df], axis=1)
combined

Unnamed: 0,index,Date,Open,High,Low,Close,Adj Close,Volume,Exit Date,0,1
0,2700,2023-01-30,62.032501,62.257500,61.599998,62.007500,62.007500,146758,2023-02-06,65.042503,61.876194
1,2701,2023-01-31,61.799999,62.357498,61.514999,62.264999,62.264999,231521,2023-02-07,64.897499,61.944725
2,2702,2023-02-01,62.605000,62.682499,62.340000,62.619999,62.619999,96815,2023-02-08,64.714996,62.384888
3,2703,2023-02-02,63.314999,64.677498,63.246899,64.677498,64.677498,181949,2023-02-09,64.427498,63.705887
4,2704,2023-02-03,64.500000,65.617500,64.084999,65.617500,65.617500,156031,2023-02-10,64.082497,64.676697
...,...,...,...,...,...,...,...,...,...,...,...
90,2790,2023-06-12,65.167503,65.500000,64.962502,65.500000,65.500000,120638,2023-06-19,65.147499,64.910858
91,2791,2023-06-13,65.792503,65.980003,65.184998,65.787498,65.787498,156849,2023-06-20,65.062500,65.234573
92,2792,2023-06-14,65.822502,65.977501,65.410004,65.654999,65.654999,128148,2023-06-21,64.977501,65.257515
93,2793,2023-06-15,65.417503,65.500000,65.007500,65.422501,65.422501,419414,2023-06-22,82.995102,64.981575


In [24]:
combined = combined.drop(columns=["index"])

In [25]:
combined = combined.rename(columns={0: "Target", 1: "Prediction"})

In [28]:
combined.head(10)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Exit Date,Target,Prediction
0,2023-01-30,62.032501,62.2575,61.599998,62.0075,62.0075,146758,2023-02-06,65.042503,61.876194
1,2023-01-31,61.799999,62.357498,61.514999,62.264999,62.264999,231521,2023-02-07,64.897499,61.944725
2,2023-02-01,62.605,62.682499,62.34,62.619999,62.619999,96815,2023-02-08,64.714996,62.384888
3,2023-02-02,63.314999,64.677498,63.246899,64.677498,64.677498,181949,2023-02-09,64.427498,63.705887
4,2023-02-03,64.5,65.6175,64.084999,65.6175,65.6175,156031,2023-02-10,64.082497,64.676697
5,2023-02-06,64.845001,65.077499,64.169998,65.042503,65.042503,156588,2023-02-13,64.522499,64.619698
6,2023-02-07,64.919998,65.217499,64.717499,64.897499,64.897499,143520,2023-02-14,64.0,64.660622
7,2023-02-08,65.214996,65.370003,64.690002,64.714996,64.714996,185379,2023-02-15,65.147499,64.620888
8,2023-02-09,64.860001,65.057503,64.322502,64.427498,64.427498,146879,2023-02-16,65.092499,64.343346
9,2023-02-10,63.872501,64.202499,63.5,64.082497,64.082497,155442,2023-02-17,64.1325,63.815964


In [29]:
combined["Expected Diff"] = combined["Prediction"] - combined["Adj Close"]

In [30]:
combined["Actual Diff"] = combined["Target"] - combined["Adj Close"]

combined.head()

In [31]:
combined["Same Direction"] = combined["Expected Diff"] * combined["Actual Diff"] > 0

In [32]:
combined["Expected Profit %"] = (combined["Prediction"]-combined["Adj Close"])/combined["Adj Close"]

In [33]:
combined["Actual Profit %"] = (combined["Target"]-combined["Adj Close"])/combined["Adj Close"]

In [35]:
combined.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Exit Date,Target,Prediction,Expected Diff,Actual Diff,Same Direction,Expected Profit %,Actual Profit %
0,2023-01-30,62.032501,62.2575,61.599998,62.0075,62.0075,146758,2023-02-06,65.042503,61.876194,-0.131306,3.035003,False,-0.002118,0.048946
1,2023-01-31,61.799999,62.357498,61.514999,62.264999,62.264999,231521,2023-02-07,64.897499,61.944725,-0.320274,2.6325,False,-0.005144,0.042279
2,2023-02-01,62.605,62.682499,62.34,62.619999,62.619999,96815,2023-02-08,64.714996,62.384888,-0.235111,2.094997,False,-0.003755,0.033456
3,2023-02-02,63.314999,64.677498,63.246899,64.677498,64.677498,181949,2023-02-09,64.427498,63.705887,-0.971611,-0.25,True,-0.015022,-0.003865
4,2023-02-03,64.5,65.6175,64.084999,65.6175,65.6175,156031,2023-02-10,64.082497,64.676697,-0.940803,-1.535003,True,-0.014338,-0.023393


In [36]:
win_prob, loss_prob = (combined["Same Direction"].value_counts()/len(combined["Same Direction"])).values

In [37]:
win_prob

0.5368421052631579

In [38]:
rows = combined.to_dict("records")

In [41]:
mse = 0.975
mean_error = mse ** 0.5
for row in rows:
    predicted_long = row["Prediction"]-row["Adj Close"] > 0
    if predicted_long:
        worst_price_if_wrong = (row["Prediction"] - mean_error)
        perc_change_if_loss = (worst_price_if_wrong - row["Adj Close"]) / row["Adj Close"]
        row["Expected Loss %"] = perc_change_if_loss
        if perc_change_if_loss > 0:
            kelly = 1
        else:
            kelly = (win_prob/abs(perc_change_if_loss)) - (loss_prob/row["Expected Profit %"])
    else:
        kelly = 0
    row["Kelly Criterion"] = kelly

    

In [42]:
final_df = pd.DataFrame(rows)

In [45]:
final_df.tail()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume,Exit Date,Target,Prediction,Expected Diff,Actual Diff,Same Direction,Expected Profit %,Actual Profit %,Kelly Criterion,Expected Loss %
90,2023-06-12,65.167503,65.5,64.962502,65.5,65.5,120638,2023-06-19,65.147499,64.910858,-0.589142,-0.352501,True,-0.008995,-0.005382,0.0,
91,2023-06-13,65.792503,65.980003,65.184998,65.787498,65.787498,156849,2023-06-20,65.0625,65.234573,-0.552925,-0.724998,True,-0.008405,-0.01102,0.0,
92,2023-06-14,65.822502,65.977501,65.410004,65.654999,65.654999,128148,2023-06-21,64.977501,65.257515,-0.397484,-0.677498,True,-0.006054,-0.010319,0.0,
93,2023-06-15,65.417503,65.5,65.0075,65.422501,65.422501,419414,2023-06-22,82.995102,64.981575,-0.440926,17.572601,False,-0.00674,0.268602,0.0,
94,2023-06-16,65.452499,65.724998,65.302498,65.368752,65.368752,119021,2023-06-23,64.801247,65.000061,-0.368691,-0.567505,True,-0.00564,-0.008682,0.0,


In [44]:
def get_capital(cash, purchase_q, row):
    return sum([p["quantity"] * row["Adj Close"] for p in purchase_q]) + cash

cash = 1000
purchase_q = [] #(quantity, date_to_sell)
for day in rows:
    if purchase_q and purchase_q[0]["Exit Date"] == day["Date"]:
        to_sell = purchase_q.pop(0)
        sale_price = day["Adj Close"]
        cash += to_sell["quantity"] * sale_price
    kelly = day["Kelly Criterion"]
    if kelly > 0 and cash:
        quantity = cash/day["Adj Close"]
        purchase_q.append({
            "quantity": quantity,
            "entry": day["Adj Close"],
            "Exit Date": day["Exit Date"]})
        cash =0
    print(F"End of {day['Date']} - {cash=} {purchase_q=}, curr_price {day['Adj Close']} capital {get_capital(cash, purchase_q, day)}")
    

End of 2023-01-30 - cash=1000 purchase_q=[], curr_price 62.0075 capital 1000
End of 2023-01-31 - cash=1000 purchase_q=[], curr_price 62.264999 capital 1000
End of 2023-02-01 - cash=1000 purchase_q=[], curr_price 62.619999 capital 1000
End of 2023-02-02 - cash=1000 purchase_q=[], curr_price 64.677498 capital 1000
End of 2023-02-03 - cash=1000 purchase_q=[], curr_price 65.6175 capital 1000
End of 2023-02-06 - cash=1000 purchase_q=[], curr_price 65.042503 capital 1000
End of 2023-02-07 - cash=1000 purchase_q=[], curr_price 64.897499 capital 1000
End of 2023-02-08 - cash=1000 purchase_q=[], curr_price 64.714996 capital 1000
End of 2023-02-09 - cash=1000 purchase_q=[], curr_price 64.427498 capital 1000
End of 2023-02-10 - cash=1000 purchase_q=[], curr_price 64.082497 capital 1000
End of 2023-02-13 - cash=1000 purchase_q=[], curr_price 64.522499 capital 1000
End of 2023-02-14 - cash=1000 purchase_q=[], curr_price 64.0 capital 1000
End of 2023-02-15 - cash=1000 purchase_q=[], curr_price 65.14