In [106]:
import pandas as pd
import yfinance as yf
import numpy as np
from datetime import datetime, timedelta

# Data

In [107]:
tickers = [
    "FUTU",
    "NVDA",
    "ASTS",
    "TMF",
    "LABU",
    "GDXU",
    "TSLA",
]
len(tickers)

7

In [108]:
start_date = "2024-01-01"
end_date = "2024-11-11"

In [109]:
compute_date = datetime.strptime(start_date, '%Y-%m-%d') - timedelta(days=50)
compute_date = compute_date.strftime('%Y-%m-%d')
compute_date

'2023-11-12'

In [110]:
data = yf.download(tickers, start=compute_date, end=end_date, group_by='ticker', auto_adjust=False)

if len(tickers) == 1:
    # Create a MultiIndex for the columns
    multi_index_columns = pd.MultiIndex.from_tuples([(tickers[0], col) for col in data.columns])

    # Assign the new MultiIndex to the DataFrame
    data.columns = multi_index_columns

[*********************100%***********************]  7 of 7 completed


In [111]:
data

Ticker,ASTS,ASTS,ASTS,ASTS,ASTS,ASTS,FUTU,FUTU,FUTU,FUTU,...,TMF,TMF,TMF,TMF,NVDA,NVDA,NVDA,NVDA,NVDA,NVDA
Price,Open,High,Low,Close,Adj Close,Volume,Open,High,Low,Close,...,Low,Close,Adj Close,Volume,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2023-11-13,4.350000,4.470000,3.955000,4.000000,4.000000,3638500,59.000000,61.540001,58.619999,60.570000,...,44.599998,45.900002,43.724010,3258880,48.320000,49.116001,48.098999,48.619999,48.597473,384136000
2023-11-14,3.990000,3.990000,3.550000,3.770000,3.770000,4378300,61.680000,61.896000,59.560001,60.730000,...,48.299999,49.099998,46.772308,6318430,49.680000,49.834000,49.040001,49.655998,49.632996,416954000
2023-11-15,4.110000,4.940000,4.100000,4.660000,4.660000,8149700,62.700001,63.770000,60.400002,62.150002,...,46.700001,47.099998,44.867119,6035340,49.935001,49.959999,48.200001,48.888000,48.865349,475497000
2023-11-16,4.650000,4.745000,4.410000,4.570000,4.570000,2118800,59.840000,61.590000,58.465000,59.529999,...,48.099998,48.700001,46.391273,4061280,48.679001,49.525002,48.330002,49.480000,49.457081,339756000
2023-11-17,4.590000,4.890000,4.590000,4.830000,4.830000,2301100,60.000000,60.799999,59.119999,59.570000,...,48.700001,49.400002,47.058083,4248820,49.523998,49.716999,49.007000,49.298000,49.275166,325205000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-11-04,23.570000,24.299999,23.270000,23.940001,23.940001,6580900,96.290001,101.959999,96.169998,96.900002,...,47.139999,48.049999,47.130688,14393200,137.210007,138.960007,135.570007,136.050003,136.028122,187528200
2024-11-05,23.950001,24.360001,23.129999,24.129999,24.129999,6504200,101.678001,103.599998,98.300003,99.080002,...,47.049999,48.799999,47.866337,14765900,137.449997,140.369995,137.330002,139.910004,139.887497,160537400
2024-11-06,23.889999,24.299999,22.350000,22.520000,22.520000,10198300,95.879997,97.205002,92.367996,96.654999,...,43.799999,44.889999,44.031147,29791000,142.960007,146.490005,141.960007,145.610001,145.586578,242043900
2024-11-07,22.879999,23.840000,20.200001,21.455000,21.455000,21948000,104.000000,111.879997,100.800003,108.660004,...,45.459999,46.439999,45.551491,14778400,146.389999,148.929993,146.169998,148.880005,148.856064,207323300


In [112]:
returns_list = []

# Loop through each stock ticker and calculate returns
for stock in tickers:
    # Access the 'Adj Close' prices using xs method
    adjusted_close = data[stock]['Adj Close']
    
    # Calculate percentage change
    returns_series = adjusted_close.pct_change()
    
    # Append the Series to the list
    returns_list.append(returns_series.rename(stock))  # Rename for clarity

# Concatenate all return Series into a single DataFrame
returns = pd.concat(returns_list, axis=1)

In [113]:
returns.reset_index(inplace=True)
start_index = returns[returns['Date'] >= start_date].index[0]
returns = returns[start_index-27:].set_index('Date')

In [114]:
dates = returns.index

In [115]:
adj_close = data.xs("Adj Close", level=1, axis=1)
adj_close = adj_close.reindex(columns=returns.columns)
volume = data.xs("Volume", level=1, axis=1)
volume = volume.reindex(columns=returns.columns)

In [116]:
columns = pd.MultiIndex.from_product([tickers, ['Adj Close', 'Returns', 'Volume', 'OBV', "MA", "RSI", "RSI2", "MA20", "Std", "DevMidBand", "EMA_12", "EMA_26", "MACD", "signal"]])
df = pd.DataFrame(index=dates, columns=columns)
df.columns = columns
for stock in tickers:
    df[(stock, "Adj Close")] = adj_close[stock]
    df[(stock, "Returns")] = returns[stock]
    df[(stock, "Volume")] = volume[stock]
df = df.reset_index()

# Indicators

In [117]:
def EMA(w, price, last):
    a = 2/(1+w)
    return a*price + (1-a)*last
def MA(prices):
    return sum(prices) / 28
def MACD(long, short):
    return sum(long) - sum(short)
def RSI(returns):
    avg_gain = returns[returns > 0].mean()
    avg_loss = -returns[returns < 0].mean()
    return 100 * (1 - 1/(1+avg_gain/avg_loss)) if avg_loss != 0 else 100
def RSI2(diff):
    avg_gain = diff[diff > 0].mean()
    avg_loss = -diff[diff < 0].mean()
    return 100 * (1 - 1/(1+avg_gain/avg_loss)) if avg_loss != 0 else 100
def min_max_scaling(data):
    min_val = data.min()
    max_val = data.max()
    scaled_data = (data - min_val) / (max_val - min_val)
    return scaled_data

In [118]:
import math


for stock in tickers:
    df[(stock, "MA")] = df[(stock, "Adj Close")].rolling(window=28).apply(MA)
    df[(stock, "Diff")] = df[(stock, "Adj Close")].diff()
    max_vol_scale = math.floor(math.log10(df[(stock, "Volume")].max()))
    df[(stock, "Volume")] = df[(stock, "Volume")].apply(lambda x: x / (10 ** max_vol_scale))
    df[(stock, "OBV")] = (np.sign(df[(stock, "Returns")]) * df[(stock, "Volume")]).cumsum()
    df[(stock, "OBV")] = min_max_scaling(df[(stock, "OBV")])

    df[(stock, "MA20")] = df[(stock, "Adj Close")].rolling(window=20).mean()
    df[(stock, "Std")] = df[(stock, "Adj Close")].rolling(window=20).std()
    df[(stock, "DevMidBand")] = (df[(stock, "Adj Close")] - df[(stock, "MA20")]) / df[(stock, "Std")]

    df.loc[0, (stock, "EMA_12")] = df.loc[0, (stock, "Adj Close")]
    df.loc[0, (stock, "EMA_26")] = df.loc[0, (stock, "Adj Close")]
    for i in range(1, len(df)):
        df.loc[i, (stock, "EMA_12")] = EMA(12, df.loc[i, (stock, "Adj Close")], df.loc[i-1, (stock, "EMA_12")])
        df.loc[i, (stock, "EMA_26")] = EMA(26, df.loc[i, (stock, "Adj Close")], df.loc[i-1, (stock, "EMA_26")])

    df[(stock, "MACD")] = df[(stock, "EMA_12")] - df[(stock, "EMA_26")]
    df.loc[0, (stock, "signal")] = df.loc[0, (stock, "MACD")]
    for i in range(1, len(df)):
        df.loc[i, (stock, "signal")] = EMA(9, df.loc[i, (stock, "MACD")], df.loc[i-1, (stock, "signal")])
    df[(stock, "RSI")] = df[(stock, "Returns")].rolling(14).apply(RSI)
    df[(stock, "RSI2")] = df[(stock, "Diff")].diff().rolling(14).apply(RSI2)

# Tensor

In [119]:
close_data = df[27:len(df) - (len(df)-27)%10]
close_data = close_data.reset_index(drop=True)
to_drop = ["EMA_12", "EMA_26", "Returns", "Diff", "Volume", "RSI2", "MA20"]
close_data = close_data.drop(columns=[(stock, label) for stock in tickers for label in to_drop])
close_data

Unnamed: 0_level_0,Date,FUTU,FUTU,FUTU,FUTU,FUTU,FUTU,FUTU,FUTU,NVDA,...,GDXU,GDXU,TSLA,TSLA,TSLA,TSLA,TSLA,TSLA,TSLA,TSLA
Unnamed: 0_level_1,Unnamed: 1_level_1,Adj Close,OBV,MA,RSI,Std,DevMidBand,MACD,signal,Adj Close,...,MACD,signal,Adj Close,OBV,MA,RSI,Std,DevMidBand,MACD,signal
0,2024-01-02,50.303867,0.072144,52.595196,40.371927,1.181169,-0.977238,-1.04785,-1.327622,48.149918,...,1.040794,1.187696,248.419998,0.465957,245.323570,57.344897,7.642147,0.105010,3.077725,3.14828
1,2024-01-03,50.948666,0.079493,52.308387,41.807990,1.186544,-0.405919,-1.02687,-1.267472,47.551144,...,0.608738,1.071905,238.449997,0.415735,245.225356,51.788164,7.428168,-1.253472,1.856546,2.889933
2,2024-01-04,51.163593,0.086861,52.063798,35.673467,1.187066,-0.205342,-0.981585,-1.210294,47.979984,...,0.267285,0.910981,237.929993,0.373168,245.358213,56.941019,7.480690,-1.308904,0.837142,2.479375
3,2024-01-05,50.323406,0.080517,51.811881,34.496737,1.204359,-0.880547,-1.001942,-1.168624,49.078564,...,-0.016843,0.725416,237.490005,0.334852,245.431070,53.729987,7.601983,-1.333533,-0.006174,1.982265
4,2024-01-08,49.756767,0.067435,51.609509,33.351527,1.219088,-1.325497,-1.051676,-1.145234,52.233379,...,-0.263988,0.527535,240.449997,0.370176,245.587141,54.428062,7.692834,-0.918777,-0.430697,1.499672
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
205,2024-10-24,86.109589,0.679678,91.325531,32.315422,13.113956,-1.125777,2.639651,6.315007,140.387421,...,3.413765,2.401866,260.480011,0.855707,238.148929,71.085204,17.072037,1.447338,-1.78641,-1.891875
206,2024-10-25,90.447311,0.699228,92.449388,41.977421,12.730542,-0.845425,2.253974,5.502801,141.517227,...,3.076133,2.536719,269.190002,0.922739,239.624644,72.108776,17.831237,1.849703,1.492639,-1.214972
207,2024-10-28,94.648254,0.720477,93.753286,50.420609,12.694759,-0.521622,2.261238,4.854488,140.497406,...,2.696779,2.568731,262.510010,0.878088,240.885715,73.852229,17.898231,1.467101,3.511808,-0.269616
208,2024-10-29,95.908539,0.747746,94.812594,45.646266,12.715830,-0.385651,2.341695,4.351929,141.227280,...,2.580228,2.571031,259.519989,0.844690,241.442858,74.160204,17.997119,1.288733,4.815237,0.747355


In [120]:
from collections import defaultdict

F = defaultdict(dict) # 4 * n * (m * n)
n = len(tickers)
m = 10
T = len(close_data) // m

In [121]:
corr = [{} for _ in range(T)]
indicators = ["Adj Close", "MA", "RSI", "MACD"]
for t in range(0, T): # t
    V = close_data[t*m:(t+1)*m] # m days closing data
    lag_t = max(0, t - 5)
    COR = close_data[lag_t*m:(t+1)*m]

    for indicator in indicators: # the 4 dimensions
        corr[t][indicator] = COR.filter([(stock, indicator) for stock in tickers], axis=1).corr() # 60 days correlation matrix
        for stock in tickers: # n assets
            F[t][(stock, indicator)] = V[(stock, indicator)].values.reshape(m,1).dot(corr[t][indicator][(stock, indicator)].values.reshape(1,n)) # m * n tensor for indicator i & stock n

In [122]:
F[0][("FUTU", "MACD")].shape

(10, 7)

# CONV3D

In [123]:
import torch
import torch.nn as nn

In [124]:
f = []
for t in range(0, T):
    f.append([])
    for indicator in indicators:
        a = []
        for stock in tickers:
            a.append(F[t][(stock, indicator)])
        f[-1].append(a)
f = list(map(torch.Tensor, np.array(f)))
f[0].shape

TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, complex64, complex128, int64, int32, int16, int8, uint64, uint32, uint16, uint8, and bool.

In [None]:
# Define the 3D Convolutional Neural Network layer
class Conv3DNet(nn.Module):
    def __init__(self):
        super(Conv3DNet, self).__init__()
        self.conv3d = nn.Conv3d(in_channels=4, out_channels=32, kernel_size=(1, 3, 1))
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.conv3d(x)
        x = self.relu(x)
        return x

In [None]:
# Instantiate the network
net = Conv3DNet()

# Pass the input tensor f[t] through the network
F_prime = []
for t in range(0, T):
    F_prime.append(net(f[t]))

# Don't know why the shape is 32 * n * (m-2) * n here but the paper says 32 * n * (m-4) * n
'''Kernel size 1,3,1 should be m-2?'''
F_prime[0].shape

torch.Size([32, 7, 8, 7])