In [None]:
import sys
import os
sys.path.append(os.path.abspath("D:\\Python\\commodity_futures_price"))

import math

import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

# %load_ext autoreload
# %autoreload 2
%reload_ext autoreload
from model.savvy_sh import SavvySh

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Read the Files of Commodities

In [None]:
df_dict = {}
valid_dates = pd.date_range(start="2011-01-01", end="2024-12-31", freq="B")
# Read each file in below folder
path = "../data/raw/"
files = os.listdir(path)
for file in files:
    if file.endswith(".csv"):
        df = pd.read_csv(os.path.join(path, file), index_col=0, parse_dates=True)
        df["Return"] = df.iloc[:, 0:1].apply(lambda x: np.log(x.shift(-1) / x))
        df = df.dropna()

        non_positive_LA = df[(df[df.columns[0]] <= 0) | (df['Volatility'] <= 0)]
        df = df.drop(index=non_positive_LA.index)

        df_dict[file[:-4]] = df
        valid_dates = valid_dates.intersection(df.index)
for name, df in df_dict.items():
    df_dict[name] = df[df.index.isin(valid_dates)]

In [None]:
dates_len = len(valid_dates)
train_size = 3 * 12
test_size = 1

start_end_list = []
start = 0
while(start < dates_len - train_size - test_size):
    end = start + train_size + test_size - 1
    start_end_list.append((start, end))
    start = start + test_size

# print(start_end_list)

[(0, 36), (1, 37), (2, 38), (3, 39), (4, 40), (5, 41), (6, 42), (7, 43), (8, 44), (9, 45), (10, 46), (11, 47), (12, 48), (13, 49), (14, 50), (15, 51), (16, 52), (17, 53), (18, 54), (19, 55), (20, 56), (21, 57), (22, 58), (23, 59), (24, 60), (25, 61), (26, 62), (27, 63), (28, 64), (29, 65), (30, 66), (31, 67), (32, 68), (33, 69), (34, 70), (35, 71), (36, 72), (37, 73), (38, 74), (39, 75), (40, 76), (41, 77), (42, 78), (43, 79), (44, 80), (45, 81), (46, 82), (47, 83), (48, 84), (49, 85), (50, 86), (51, 87), (52, 88), (53, 89), (54, 90), (55, 91), (56, 92), (57, 93), (58, 94), (59, 95), (60, 96), (61, 97), (62, 98), (63, 99), (64, 100), (65, 101), (66, 102), (67, 103), (68, 104), (69, 105), (70, 106), (71, 107), (72, 108), (73, 109), (74, 110), (75, 111), (76, 112), (77, 113), (78, 114), (79, 115), (80, 116), (81, 117), (82, 118), (83, 119), (84, 120), (85, 121), (86, 122), (87, 123), (88, 124), (89, 125), (90, 126), (91, 127), (92, 128), (93, 129), (94, 130), (95, 131), (96, 132), (97, 1

### Portfolio weighted by returns

### OLS

In [154]:
portfolio_return_list = []
for start, end in start_end_list:
    predicted_returns = {}
    for name, df in df_dict.items():
        train = df.iloc[start:end - test_size + 1]
        train_X, train_y = train.drop(columns=["Basis_Momentum", "Return"]), train["Return"]
        test_X = df.iloc[end - test_size + 1:end - test_size + 2].drop(columns=["Basis_Momentum", "Return"])

        ols_scaler = StandardScaler()
        train_X = ols_scaler.fit_transform(train_X)
        test_X = ols_scaler.transform(test_X)
        ols = LinearRegression()
        ols.fit(train_X, train_y)
        ols_predict = ols.predict(test_X)
        # print(name, df.index[end - test_size], ols_predict)
        cum_return = ols_predict[0]
        predicted_returns[name] = cum_return
    
    portfolio_return = 0
    long_total = sum([(returns if returns > 0 else 0) for _, returns in predicted_returns.items()])
    short_total = sum([(abs(returns) if returns < 0 else 0) for _, returns in predicted_returns.items()])

    for name, returns in predicted_returns.items():
        weight = returns / long_total if returns > 0 else -abs(returns) / short_total
        portfolio_return += weight * (df_dict[name].iloc[end - test_size + 2, :1].values[0] / df_dict[name].iloc[end - test_size + 1, 0:1].values[0] - 1)
    # print(f"From {end} to {end + test_size}, the portfolio return is: {portfolio_return}")
    portfolio_return_list.append(portfolio_return)

In [155]:
winning_rate = sum([1 for r in portfolio_return_list if r > 0]) / len(portfolio_return_list)
print(f"The winning rate is: {winning_rate}")

reward = np.cumprod(1 + np.array(portfolio_return_list)).tolist()
print(f"The final reward is: {reward[-1]}")

The winning rate is: 0.6637931034482759
The final reward is: 11.224324313880913


In [156]:
# std of portfolio returns
portfolio_return_std = np.std(portfolio_return_list)
sharpe_ratio = math.sqrt(12) * np.mean(portfolio_return_list) / portfolio_return_std if portfolio_return_std != 0 else 0
print(portfolio_return_std)
print(sharpe_ratio)

0.06510489305879334
1.2279287186831163


### Stein

In [157]:
portfolio_return_list = []
for start, end in start_end_list:
    predicted_returns = {}
    for name, df in df_dict.items():
        train = df.iloc[start:end - test_size + 1]
        train_X, train_y = train.drop(columns=["Basis_Momentum", "Return"]), train["Return"]
        test_X = df.iloc[end - test_size + 1:end - test_size + 2].drop(columns=["Basis_Momentum", "Return"])

        stein_scaler = StandardScaler()
        train_X = stein_scaler.fit_transform(train_X)
        test_X = stein_scaler.transform(test_X)

        model_mult = SavvySh(model_class="Multiplicative")
        model_mult.fit(train_X, train_y)
        stein_predict = model_mult.predict(test_X)["St"]
        # print(name, df.index[end - test_size], stein_predict)
        cum_return = stein_predict[0]
        predicted_returns[name] = cum_return
    
    portfolio_return = 0
    long_total = sum([(returns if returns > 0 else 0) for _, returns in predicted_returns.items()])
    short_total = sum([(abs(returns) if returns < 0 else 0) for _, returns in predicted_returns.items()])

    for name, returns in predicted_returns.items():
        weight = returns / long_total if returns > 0 else -abs(returns) / short_total
        portfolio_return += weight * (df_dict[name].iloc[end - test_size + 2, :1].values[0] / df_dict[name].iloc[end - test_size + 1, 0:1].values[0] - 1)
    # print(f"From {end} to {end + test_size}, the portfolio return is: {portfolio_return}")
    portfolio_return_list.append(portfolio_return)

In [158]:
winning_rate = sum([1 for r in portfolio_return_list if r > 0]) / len(portfolio_return_list)
print(f"The winning rate is: {winning_rate}")

reward = np.cumprod(1 + np.array(portfolio_return_list)).tolist()
print(f"The final reward is: {reward[-1]}")

The winning rate is: 0.6379310344827587
The final reward is: 10.170642741008706


In [159]:
# std of portfolio returns
portfolio_return_std = np.std(portfolio_return_list)
sharpe_ratio = math.sqrt(12) * np.mean(portfolio_return_list) / portfolio_return_std if portfolio_return_std != 0 else 0
print(portfolio_return_std)
print(sharpe_ratio)

0.06694085039979347
1.1549454208147905


### DSh

In [151]:
portfolio_return_list = []
for start, end in start_end_list:
    predicted_returns = {}
    for name, df in df_dict.items():
        train = df.iloc[start:end - test_size + 1]
        train_X, train_y = train.drop(columns=["Basis_Momentum", "Return"]), train["Return"]
        test = df.iloc[end - test_size + 1:end - test_size + 2]
        test_X, test_y = test.drop(columns=["Basis_Momentum", "Return"]), test["Return"]

        DSh_scaler = StandardScaler()
        train_X = DSh_scaler.fit_transform(train_X)
        test_X = DSh_scaler.transform(test_X)

        model_mult = SavvySh(model_class="Multiplicative")
        model_mult.fit(train_X, train_y)
        DSh_predict = model_mult.predict(test_X)["DSh"]
        # print(name, df.index[end - test_size], DSh_predict)
        cum_return = DSh_predict[0]
        predicted_returns[name] = cum_return
    
    portfolio_return = 0
    long_total = sum([(returns if returns > 0 else 0) for _, returns in predicted_returns.items()])
    short_total = sum([(abs(returns) if returns < 0 else 0) for _, returns in predicted_returns.items()])

    for name, returns in predicted_returns.items():
        weight = returns / long_total if returns > 0 else -abs(returns) / short_total
        portfolio_return += weight * (df_dict[name].iloc[end - test_size + 2, :1].values[0] / df_dict[name].iloc[end - test_size + 1, 0:1].values[0] - 1)
    # print(f"From {end} to {end + test_size}, the portfolio return is: {portfolio_return}")
    portfolio_return_list.append(portfolio_return)

In [147]:
winning_rate = sum([1 for r in portfolio_return_list if r > 0]) / len(portfolio_return_list)
print(f"The winning rate is: {winning_rate}")

reward = np.cumprod(1 + np.array(portfolio_return_list)).tolist()
print(f"The final reward is: {reward[-1]}")

The winning rate is: 0.7155172413793104
The final reward is: 16.321033996735345


In [None]:
# std of portfolio returns
portfolio_return_std = np.std(portfolio_return_list)
sharpe_ratio = math.sqrt(12) * np.mean(portfolio_return_list) / portfolio_return_std if portfolio_return_std != 0 else 0
print(portfolio_return_std)
print(sharpe_ratio)

0.06831533685081274
1.3462922334493437
