In [1]:
import pandas as pd
import yfinance as yf
import numpy as np
from datetime import datetime, timedelta

# Data

In [2]:
tickers = [
    "FUTU",
    "NVDA",
    "ASTS",
    "TMF",
    "LABU",
    "GDXU",
    "TSLA",
]
len(tickers)

7

In [3]:
start_date = "2024-01-01"
end_date = "2024-11-11"

In [4]:
compute_date = datetime.strptime(start_date, '%Y-%m-%d') - timedelta(days=50)
compute_date = compute_date.strftime('%Y-%m-%d')
compute_date

'2023-11-12'

In [5]:
data = yf.download(tickers, start=compute_date, end=end_date, group_by='ticker')

if len(tickers) == 1:
    # Create a MultiIndex for the columns
    multi_index_columns = pd.MultiIndex.from_tuples([(tickers[0], col) for col in data.columns])

    # Assign the new MultiIndex to the DataFrame
    data.columns = multi_index_columns

[*********************100%***********************]  7 of 7 completed


In [6]:
data

Ticker,GDXU,GDXU,GDXU,GDXU,GDXU,GDXU,FUTU,FUTU,FUTU,FUTU,...,TMF,TMF,TMF,TMF,LABU,LABU,LABU,LABU,LABU,LABU
Price,Open,High,Low,Close,Adj Close,Volume,Open,High,Low,Close,...,Low,Close,Adj Close,Volume,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2023-11-13,23.000000,23.600000,22.620001,22.680000,22.680000,634500,59.000000,61.540001,58.619999,60.570000,...,44.599998,45.900002,43.464039,3258880,53.599998,55.799999,50.000000,55.799999,55.538677,2289135
2023-11-14,24.520000,26.480000,24.340000,26.010000,26.010000,1112100,61.680000,61.896000,59.560001,60.730000,...,48.299999,49.099998,46.494213,6318430,62.000000,65.000000,61.599998,64.599998,64.297470,3337970
2023-11-15,26.049999,26.400000,25.000000,25.520000,25.520000,724600,62.700001,63.770000,60.400002,62.150002,...,46.700001,47.099998,44.600353,6035340,64.800003,71.300003,64.099998,65.400002,65.093727,3272395
2023-11-16,25.900000,27.959999,25.510000,26.809999,26.809999,1804500,59.840000,61.590000,58.465000,59.529999,...,48.099998,48.700001,46.115444,4061280,65.400002,66.400002,60.400002,62.599998,62.306835,2730240
2023-11-17,26.959999,27.426001,26.070000,26.350000,26.350000,725300,60.000000,60.799999,59.119999,59.570000,...,48.700001,49.400002,46.778294,4248820,64.160004,69.800003,63.400002,69.199997,68.875923,2773015
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-11-04,47.939999,48.393002,46.029999,46.549999,46.549999,592900,96.290001,101.959999,96.169998,96.900002,...,47.139999,48.049999,46.850464,14393200,125.620003,130.779999,122.349998,127.550003,127.550003,842600
2024-11-05,48.230000,48.779999,46.349998,47.080002,47.080002,628400,101.678001,103.599998,98.300003,99.080002,...,47.049999,48.799999,47.581741,14765900,125.099998,132.800003,123.519997,132.779999,132.779999,878100
2024-11-06,40.369999,43.305000,38.299999,42.099998,42.099998,1776300,95.879997,97.205002,92.367996,96.654999,...,43.799999,44.889999,43.769352,29791000,140.669998,143.369995,133.300003,140.000000,140.000000,1617900
2024-11-07,43.910000,45.770000,42.171001,45.049999,45.049999,1433100,104.000000,111.879997,100.800003,108.660004,...,45.459999,46.439999,45.280655,14778400,142.000000,145.910004,140.330002,142.369995,142.369995,1046300


In [7]:
returns_list = []

# Loop through each stock ticker and calculate returns
for stock in tickers:
    # Access the 'Adj Close' prices using xs method
    adjusted_close = data[stock]['Adj Close']
    
    # Calculate percentage change
    returns_series = adjusted_close.pct_change()
    
    # Append the Series to the list
    returns_list.append(returns_series.rename(stock))  # Rename for clarity

# Concatenate all return Series into a single DataFrame
returns = pd.concat(returns_list, axis=1)

In [8]:
returns.reset_index(inplace=True)
start_index = returns[returns['Date'] >= start_date].index[0]
returns = returns[start_index-27:].set_index('Date')

In [9]:
dates = returns.index

In [10]:
adj_close = data.xs("Adj Close", level=1, axis=1)
adj_close = adj_close.reindex(columns=returns.columns)

In [11]:
columns = pd.MultiIndex.from_product([tickers, ['Adj Close', 'Returns', "MA", "RSI", "RSI2", "EMA_12", "EMA_26", "MACD"]])
df = pd.DataFrame(index=dates, columns=columns)
df.columns = columns
for stock in tickers:
    df[(stock, "Adj Close")] = adj_close[stock]
    df[(stock, "Returns")] = returns[stock]
df = df.reset_index()

In [12]:
df

Unnamed: 0_level_0,Date,FUTU,FUTU,FUTU,FUTU,FUTU,FUTU,FUTU,FUTU,NVDA,...,GDXU,GDXU,TSLA,TSLA,TSLA,TSLA,TSLA,TSLA,TSLA,TSLA
Unnamed: 0_level_1,Unnamed: 1_level_1,Adj Close,Returns,MA,RSI,RSI2,EMA_12,EMA_26,MACD,Adj Close,...,EMA_26,MACD,Adj Close,Returns,MA,RSI,RSI2,EMA_12,EMA_26,MACD
0,2023-11-21,60.369999,-0.034852,,,,,,,49.925453,...,,,241.199997,0.023769,,,,,,
1,2023-11-22,59.380001,-0.016399,,,,,,,48.697906,...,,,234.210007,-0.028980,,,,,,
2,2023-11-24,58.730000,-0.010946,,,,,,,47.758259,...,,,235.449997,0.005294,,,,,,
3,2023-11-27,56.730000,-0.034054,,,,,,,48.224083,...,,,236.080002,0.002676,,,,,,
4,2023-11-28,55.860001,-0.015336,,,,,,,47.803238,...,,,246.720001,0.045069,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239,2024-11-04,96.900002,0.019142,,,,,,,136.040634,...,,,242.839996,-0.024661,,,,,,
240,2024-11-05,99.080002,0.022497,,,,,,,139.900360,...,,,251.440002,0.035414,,,,,,
241,2024-11-06,96.654999,-0.024475,,,,,,,145.599960,...,,,288.529999,0.147510,,,,,,
242,2024-11-07,108.660004,0.124205,,,,,,,148.869751,...,,,296.910004,0.029044,,,,,,


# Indicators

In [13]:
def EMA(w, price, last):
    a = 2/(1+w)
    return a*price + (1-a)*last
def MA(prices):
    return sum(prices) / 28
def MACD(long, short):
    return sum(long) - sum(short)
def RSI(returns):
    avg_gain = returns[returns > 0].mean()
    avg_loss = -returns[returns < 0].mean()
    return 100 * (1 - 1/(1+avg_gain/avg_loss)) if avg_loss != 0 else 100
def RSI2(diff):
    avg_gain = diff[diff > 0].mean()
    avg_loss = -diff[diff < 0].mean()
    return 100 * (1 - 1/(1+avg_gain/avg_loss)) if avg_loss != 0 else 100

In [14]:
for stock in tickers:
    df[(stock, "MA")] = df[(stock, "Adj Close")].rolling(window=28).apply(MA)
    df[(stock, "Diff")] = df[(stock, "Adj Close")].diff()

    df.loc[0, (stock, "EMA_12")] = df.loc[0, (stock, "Adj Close")]
    df.loc[0, (stock, "EMA_26")] = df.loc[0, (stock, "Adj Close")]
    for i in range(1, len(df)):
        df.loc[i, (stock, "EMA_12")] = EMA(12, df.loc[i, (stock, "Adj Close")], df.loc[i-1, (stock, "EMA_12")])
        df.loc[i, (stock, "EMA_26")] = EMA(26, df.loc[i, (stock, "Adj Close")], df.loc[i-1, (stock, "EMA_26")])

    df[(stock, "MACD")] = df[(stock, "EMA_26")].rolling(window=9).sum() - df[(stock, "EMA_12")].rolling(window=9).sum()
    df[(stock, "RSI")] = df[(stock, "Returns")].rolling(14).apply(RSI)
    df[(stock, "RSI2")] = df[(stock, "Diff")].diff().rolling(14).apply(RSI2)

# Tensor

In [15]:
close_data = df[27:len(df) - (len(df)-27)%10]
close_data = close_data.reset_index(drop=True)
to_drop = ["EMA_12", "EMA_26", "Returns", "Diff"]
close_data = close_data.drop(columns=[(stock, label) for stock in tickers for label in to_drop])
close_data

Unnamed: 0_level_0,Date,FUTU,FUTU,FUTU,FUTU,FUTU,NVDA,NVDA,NVDA,NVDA,...,GDXU,GDXU,GDXU,GDXU,GDXU,TSLA,TSLA,TSLA,TSLA,TSLA
Unnamed: 0_level_1,Unnamed: 1_level_1,Adj Close,MA,RSI,RSI2,MACD,Adj Close,MA,RSI,RSI2,...,Adj Close,MA,RSI,RSI2,MACD,Adj Close,MA,RSI,RSI2,MACD
0,2024-01-02,51.490002,53.835357,40.371929,58.724370,12.754085,48.154346,48.091693,39.815513,62.463439,...,32.020000,32.544286,51.816172,55.987558,-11.171534,248.419998,245.323570,57.344897,37.489402,-31.064168
1,2024-01-03,52.150002,53.541786,41.807990,57.840759,12.222410,47.555515,48.007053,38.387926,57.152034,...,29.280001,32.581071,51.033419,56.401074,-10.796480,238.449997,245.225356,51.788164,39.779834,-29.979870
2,2024-01-04,52.369999,53.291429,35.673466,57.664689,11.555686,47.984398,47.981570,38.380634,57.134235,...,29.330000,32.619286,38.290818,49.655410,-10.077260,237.929993,245.358213,56.941019,41.705069,-28.053996
3,2024-01-05,51.509998,53.033572,34.496732,60.169629,11.021293,49.083080,48.028885,43.017111,53.522436,...,29.160000,32.630714,39.424353,54.010493,-8.935453,237.490005,245.431070,53.729987,37.472975,-24.868494
4,2024-01-08,50.930000,52.826429,33.351526,56.360350,10.522480,52.238186,48.172246,53.636202,59.575388,...,28.850000,32.583929,41.045738,58.535597,-7.392608,240.449997,245.587141,54.428062,35.941240,-21.124884
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
205,2024-10-24,88.139999,93.478928,32.315429,54.154869,-64.759029,140.400330,128.006538,58.737145,48.966107,...,55.590000,50.161785,54.182648,45.413490,-18.129128,260.480011,238.148929,71.085204,63.092330,23.866945
206,2024-10-25,92.580002,94.629285,41.977427,51.788100,-54.175470,141.530243,128.933260,56.909343,48.573857,...,53.349998,50.406428,54.103346,42.277337,-20.757139,269.190002,239.624644,72.108776,55.532279,22.787640
207,2024-10-28,96.879997,95.963928,50.420608,63.230318,-45.319571,140.510315,129.902836,56.018645,51.865120,...,52.349998,50.679285,52.801542,41.997083,-22.804531,262.510010,240.885715,73.852229,54.038737,18.256929
208,2024-10-29,98.169998,97.048214,45.646263,58.918008,-38.097661,141.240265,130.737778,49.968700,50.887148,...,55.000000,50.961071,49.713152,39.819820,-24.491753,259.519989,241.442858,74.160204,50.142531,11.452676


In [16]:
from collections import defaultdict

F = defaultdict(dict) # 4 * n * (m * n)
n = len(tickers)
m = 10
T = len(close_data) // m

In [18]:
corr = [{} for _ in range(T)]
indicators = ["Adj Close", "MA", "RSI", "MACD"]
for t in range(0, T): # t
    V = close_data[t*m:(t+1)*m] # m days closing data
    for indicator in indicators: # the 4 dimensions
        corr[t][indicator] = V.filter([(stock, indicator) for stock in tickers], axis=1).corr()
        for stock in tickers: # n assets
            F[t][(stock, indicator)] = V[(stock, indicator)].values.reshape(m,1).dot(corr[t][indicator][(stock, indicator)].values.reshape(1,n)) # m * n tensor for indicator i & stock n

In [19]:
F[0][("FUTU", "MACD")].shape

(10, 7)

# CONV3D

In [20]:
import torch
import torch.nn as nn

In [24]:
f = []
for t in range(0, T):
    f.append([])
    for indicator in indicators:
        a = []
        for stock in tickers:
            a.append(F[t][(stock, indicator)])
        f[-1].append(a)
f = list(map(torch.Tensor, np.array(f)))
f[0].shape

torch.Size([4, 7, 10, 7])

In [25]:
# Define the 3D Convolutional Neural Network layer
class Conv3DNet(nn.Module):
    def __init__(self):
        super(Conv3DNet, self).__init__()
        self.conv3d = nn.Conv3d(in_channels=4, out_channels=32, kernel_size=(1, 3, 1))
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.conv3d(x)
        x = self.relu(x)
        return x

In [26]:
# Instantiate the network
net = Conv3DNet()

# Pass the input tensor f[t] through the network
F_prime = []
for t in range(0, T):
    F_prime.append(net(f[t]))

# Don't know why the shape is 32 * n * (m-2) * n here but the paper says 32 * n * (m-4) * n
'''Kernel size 1,3,1 should be m-2?'''
F_prime[0].shape

torch.Size([32, 7, 8, 7])