In [1]:
import pandas as pd
import yfinance as yf
import numpy as np
from datetime import datetime, timedelta

# Data

In [2]:
assets = [
    "FUTU",
    "NVDA",
    "ASTS",
    "TMF",
    "LABU",
    "GDXU",
    "TSLA",
    "PDD",
]
len(assets)

8

In [3]:
start_date = "2023-01-01"
end_date = "2024-11-11"

In [4]:
compute_date = datetime.strptime(start_date, '%Y-%m-%d') - timedelta(days=50)
compute_date = compute_date.strftime('%Y-%m-%d')
compute_date

'2022-11-12'

In [5]:
data = yf.download(assets, start=compute_date, end=end_date, group_by='ticker', auto_adjust=True)

if len(assets) == 1:
    # Create a MultiIndex for the columns
    multi_index_columns = pd.MultiIndex.from_tuples([(assets[0], col) for col in data.columns])

    # Assign the new MultiIndex to the DataFrame
    data.columns = multi_index_columns

[*********************100%***********************]  8 of 8 completed


In [6]:
data

Ticker,ASTS,ASTS,ASTS,ASTS,ASTS,PDD,PDD,PDD,PDD,PDD,...,NVDA,NVDA,NVDA,NVDA,NVDA,TMF,TMF,TMF,TMF,TMF
Price,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume,...,Open,High,Low,Close,Volume,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2022-11-14,10.200000,10.210000,7.910000,7.990000,9011400,67.589996,67.949997,63.000000,65.389999,13099500,...,16.202066,16.523750,15.906356,16.278990,527852000,69.298035,69.485324,67.612404,68.736160,683700
2022-11-15,8.120000,8.200000,7.340000,7.505000,3331700,69.940002,72.739998,69.879997,70.970001,15846800,...,16.705572,16.981300,16.365906,16.649628,545876000,70.234496,72.201060,69.766266,72.013771,2060490
2022-11-16,7.460000,7.460000,6.680000,6.700000,2079800,68.300003,69.309998,67.529999,67.559998,8174300,...,16.146122,16.345925,15.857406,15.894369,649547000,73.699384,76.789705,73.137510,76.696060,1989790
2022-11-17,6.550000,7.020000,6.550000,6.760000,1522700,66.410004,72.629997,66.150002,70.550003,13932100,...,15.699562,16.250021,15.519738,15.661599,710894000,73.980325,74.916785,73.043865,74.448555,1402210
2022-11-18,6.830000,7.060000,6.560000,6.600000,1096100,71.180000,71.980003,68.400002,70.150002,8061200,...,15.950313,15.986279,15.105144,15.393861,492072000,74.823139,75.572303,72.575634,72.856575,874000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-11-04,23.570000,24.299999,23.270000,23.940001,6580900,121.000000,123.290001,120.059998,122.320000,5301300,...,137.200558,138.950437,135.560671,136.040634,187528200,47.529810,47.954448,46.552154,47.450806,14393200
2024-11-05,23.950001,24.360001,23.129999,24.129999,6504200,124.639999,125.900002,121.110001,122.150002,6005100,...,137.440523,140.360320,137.320536,139.900360,160537400,47.154547,48.458086,46.463275,48.191452,14765900
2024-11-06,23.889999,24.299999,22.350000,22.520000,10198300,116.750000,122.129997,115.860001,120.570000,9902400,...,142.950149,146.479905,141.950218,145.599960,242043900,43.362433,45.041234,43.253804,44.330212,29791000
2024-11-07,22.879999,23.840000,20.200001,21.455000,21948000,125.160004,127.269997,123.500000,125.870003,10081500,...,146.379917,148.919735,146.159931,148.869751,207323300,45.051108,46.315148,44.893103,45.860882,14778400


In [7]:
returns_list = []

# Loop through each stock ticker and calculate returns
for stock in assets:
    # Access the 'Close' prices using xs method
    adjusted_close = data[stock]['Close']
    
    # Calculate percentage change
    returns_series = adjusted_close.pct_change()
    
    # Append the Series to the list
    returns_list.append(returns_series.rename(stock))  # Rename for clarity

# Concatenate all return Series into a single DataFrame
returns = pd.concat(returns_list, axis=1)

In [8]:
returns.reset_index(inplace=True)
returns = returns.set_index('Date')

In [9]:
dates = returns.index

In [10]:
adj_close = data.xs('Close', level=1, axis=1)
adj_close = adj_close.reindex(columns=returns.columns)

In [11]:
columns = pd.MultiIndex.from_product([assets, ['Close', 'Returns', "RSI"]])
df = pd.DataFrame(index=dates, columns=columns)
df.columns = columns
for stock in assets:
    df[(stock, 'Close')] = adj_close[stock]
    df[(stock, "Returns")] = returns[stock]
df = df.reset_index()

In [13]:
df

Unnamed: 0_level_0,Date,FUTU,FUTU,FUTU,NVDA,NVDA,NVDA,ASTS,ASTS,ASTS,...,LABU,GDXU,GDXU,GDXU,TSLA,TSLA,TSLA,PDD,PDD,PDD
Unnamed: 0_level_1,Unnamed: 1_level_1,Close,Returns,RSI,Close,Returns,RSI,Close,Returns,RSI,...,RSI,Close,Returns,RSI,Close,Returns,RSI,Close,Returns,RSI
0,2022-11-14,49.009998,,,16.278997,,,7.990000,,,...,,41.169998,,,190.949997,,,65.389999,,
1,2022-11-15,53.860001,0.098959,,16.649628,0.022767,,7.505000,-0.060701,,...,,39.570000,-0.038863,,194.419998,0.018172,,70.970001,0.085334,
2,2022-11-16,50.349998,-0.065169,,15.894370,-0.045362,,6.700000,-0.107262,,...,,38.380001,-0.030073,,186.919998,-0.038576,,67.559998,-0.048049,
3,2022-11-17,52.259998,0.037934,,15.661600,-0.014645,,6.760000,0.008955,,...,,36.610001,-0.046118,,183.169998,-0.020062,,70.550003,0.044257,
4,2022-11-18,50.619999,-0.031382,,15.393864,-0.017095,,6.600000,-0.023669,,...,,37.689999,0.029500,,180.190002,-0.016269,,70.150002,-0.005670,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,2024-11-04,96.900002,0.019142,,136.040634,0.004801,,23.940001,0.003774,,...,,46.549999,-0.001073,,242.839996,-0.024661,,122.320000,0.014599,
496,2024-11-05,99.080002,0.022497,,139.900360,0.028372,,24.129999,0.007936,,...,,47.080002,0.011386,,251.440002,0.035414,,122.150002,-0.001390,
497,2024-11-06,96.654999,-0.024475,,145.599960,0.040740,,22.520000,-0.066722,,...,,42.099998,-0.105777,,288.529999,0.147510,,120.570000,-0.012935,
498,2024-11-07,108.660004,0.124205,,148.869751,0.022457,,21.455000,-0.047291,,...,,45.049999,0.070071,,296.910004,0.029044,,125.870003,0.043958,


# Indicators

In [27]:
def EMA(w, price, last):
    a = 2/(1+w)
    return a*price + (1-a)*last
def MA(prices):
    return sum(prices) / 28
def MACD(long, short):
    return sum(long) - sum(short)
def RSI(returns):
    if (len(returns[returns > 0]) == 0):
        return 0
    if (len(returns[returns < 0]) == 0):
        return 100
    avg_gain = returns[returns > 0].mean()
    avg_loss = -returns[returns < 0].mean()
    return 100 * (1 - 1/(1+avg_gain/avg_loss))

In [28]:
for stock in assets:
    df[(stock, "RSI")] = df[(stock, "Returns")].rolling(2).apply(RSI)

# Tensor

In [42]:
close_data = df[1:len(df)]                                                                          # Drop the first row without the RSI value
close_data = close_data.reset_index(drop=True)
to_drop = ["Returns"]
close_data = close_data.drop(columns=[(stock, label) for stock in assets for label in to_drop])     # Drop unused columns
start_index = close_data[close_data['Date'] >= start_date].index[0]                                 # Index of the first trading date in range
close_data = close_data[start_index-1:].reset_index(drop=True)                                      # Take an extra day before the first in-range trading date

In [144]:
from scipy.optimize import minimize

In [153]:
def numpy_rolling_cov(returns, window_size):
    n_stocks = returns.shape[1]
    n_days = returns.shape[0]
    rolling_cov = np.zeros((n_days - window_size + 1, n_stocks, n_stocks))
    
    for t in range(n_days - window_size + 1):
        window = returns[t:t + window_size]
        cov = np.cov(window.T, ddof=0)  # Covariance
        rolling_cov[t] = cov
    
    return rolling_cov

def calculate_tangent_portfolio(expected_returns, covariance_matrix, risk_free_rate):

    n = len(expected_returns)
    
    def objective_function(weights):
        portfolio_return = np.dot(weights, expected_returns)
        portfolio_variance = np.dot(weights, np.dot(covariance_matrix, weights))
        sharpe_ratio = (portfolio_return - risk_free_rate) / np.sqrt(portfolio_variance)
        return -sharpe_ratio
    
    # Define the constraints
    constraints = [
        {'type': 'eq', 'fun': lambda x: np.sum(x) - 1},  # Sum of weights must be 1
        {'type': 'ineq', 'fun': lambda x: x}  # No short-selling (all weights must be non-negative)
    ]
    
    # Define the initial guess
    initial_guess = np.ones(n) / n
    
    # Solve the optimization problem
    result = minimize(objective_function, initial_guess, constraints=constraints, method='SLSQP')
    
    # Return the optimal weights
    return result.x

In [154]:
trading_dates = close_data["Date"].dt.date.astype(str).tolist()[1:]

In [None]:
rolling_cov = numpy_rolling_cov(returns.to_numpy(), 10)[-len(trading_dates)-1:]

468

In [None]:
rolling_exp_returns = returns.rolling(10).mean()[-len(trading_dates)-1:]

In [157]:
def read_treasury_rates(file_path):
    # Read the CSV file
    df = pd.read_csv(file_path)
    # Convert the 'date' column to datetime
    df['date'] = pd.to_datetime(df['date'])
    # Extract the year from the 'date' column
    df['year'] = df['date'].dt.year
    # Create a dictionary of year and value pairs
    treasury_rate = dict(zip(df['year'], df[' value']))
    return treasury_rate

In [158]:
file_path = './env/30y-treasury-rate.csv'
treasury_rate = read_treasury_rates(file_path)
tangent_portfolio = []
for i in range(len(rolling_cov)):
    cov = rolling_cov[i]
    exp_r = rolling_exp_returns.iloc[i].values
    year = rolling_exp_returns.iloc[i].name.year
    rate = (1 + treasury_rate[year]/100.0)**(1/252 * 10) - 1
    tangent_portfolio.append(calculate_tangent_portfolio(exp_r, cov, rate))

In [None]:
from collections import defaultdict

F = defaultdict(dict) # 4 * n * (m * n)
n = len(assets)
m = 10
T = len(close_data) // m

In [None]:
corr = [{} for _ in range(T)]
indicators = ['Close', "MA", "RSI", "MACD"]
for t in range(0, T): # t
    V = close_data[t*m:(t+1)*m] # m days closing data
    lag_t = max(0, t - 5)
    COR = close_data[lag_t*m:(t+1)*m]

    for indicator in indicators: # the 4 dimensions
        corr[t][indicator] = COR.filter([(stock, indicator) for stock in assets], axis=1).corr() # 60 days correlation matrix
        for stock in assets: # n assets
            F[t][(stock, indicator)] = V[(stock, indicator)].values.reshape(m,1).dot(corr[t][indicator][(stock, indicator)].values.reshape(1,n)) # m * n tensor for indicator i & stock n

In [None]:
F[0][("FUTU", "MACD")].shape

# CONV3D

In [None]:
import torch
import torch.nn as nn

In [None]:
f = []
for t in range(0, T):
    f.append([])
    for indicator in indicators:
        a = []
        for stock in assets:
            a.append(F[t][(stock, indicator)])
        f[-1].append(a)
f = list(map(torch.Tensor, np.array(f)))
f[0].shape

In [None]:
# Define the 3D Convolutional Neural Network layer
class Conv3DNet(nn.Module):
    def __init__(self):
        super(Conv3DNet, self).__init__()
        self.conv3d = nn.Conv3d(in_channels=4, out_channels=32, kernel_size=(1, 3, 1))
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.conv3d(x)
        x = self.relu(x)
        return x

In [None]:
# Instantiate the network
net = Conv3DNet()

# Pass the input tensor f[t] through the network
F_prime = []
for t in range(0, T):
    F_prime.append(net(f[t]))

# Don't know why the shape is 32 * n * (m-2) * n here but the paper says 32 * n * (m-4) * n
'''Kernel size 1,3,1 should be m-2?'''
F_prime[0].shape