In [1]:
import pandas as pd
import yfinance as yf
import numpy as np
from datetime import datetime, timedelta

# Data

In [2]:
tickers = [
    "FUTU",
    "NVDA",
    "ASTS",
    "TMF",
    "LABU",
    "GDXU",
    "TSLA",
]
len(tickers)

7

In [3]:
start_date = "2024-01-01"
end_date = "2024-11-11"

In [4]:
compute_date = datetime.strptime(start_date, '%Y-%m-%d') - timedelta(days=50)
compute_date = compute_date.strftime('%Y-%m-%d')
compute_date

'2023-11-12'

In [5]:
data = yf.download(tickers, start=compute_date, end=end_date, group_by='ticker')

if len(tickers) == 1:
    # Create a MultiIndex for the columns
    multi_index_columns = pd.MultiIndex.from_tuples([(tickers[0], col) for col in data.columns])

    # Assign the new MultiIndex to the DataFrame
    data.columns = multi_index_columns

[*********************100%***********************]  7 of 7 completed


In [6]:
returns_list = []

# Loop through each stock ticker and calculate returns
for stock in tickers:
    # Access the 'Adj Close' prices using xs method
    adjusted_close = data[stock]['Adj Close']
    
    # Calculate percentage change
    returns_series = adjusted_close.pct_change()
    
    # Append the Series to the list
    returns_list.append(returns_series.rename(stock))  # Rename for clarity

# Concatenate all return Series into a single DataFrame
returns = pd.concat(returns_list, axis=1)

In [7]:
returns.reset_index(inplace=True)
start_index = returns[returns['Date'] >= start_date].index[0]
returns = returns[start_index-27:].set_index('Date')

In [8]:
dates = returns.index

In [9]:
adj_close = data.xs("Adj Close", level=1, axis=1)
adj_close = adj_close.reindex(columns=returns.columns)

In [10]:
columns = pd.MultiIndex.from_product([tickers, ['Adj Close', 'Returns', "MA", "RSI", "EMA_12", "EMA_26", "MACD"]])
df = pd.DataFrame(index=dates, columns=columns)
df.columns = columns
for stock in tickers:
    df[(stock, "Adj Close")] = adj_close[stock]
    df[(stock, "Returns")] = returns[stock]
df = df.reset_index()

# Indicators

In [11]:
def EMA(w, price, last):
    a = 2/(1+w)
    return a*price + (1-a)*last
def MA(prices):
    return sum(prices) / 28
def MACD(long, short):
    return sum(long) - sum(short)
def RSI(returns):
    avg_gain = returns[returns > 0].mean()
    avg_loss = -returns[returns < 0].mean()
    return 100 * (1 - 1/(1+avg_gain/avg_loss))

In [12]:
for stock in tickers:
    df[(stock, "MA")] = df[(stock, "Adj Close")].rolling(window=28).apply(MA)

    df.loc[0, (stock, "EMA_12")] = df.loc[0, (stock, "Adj Close")]
    df.loc[0, (stock, "EMA_26")] = df.loc[0, (stock, "Adj Close")]
    for i in range(1, len(df)):
        df.loc[i, (stock, "EMA_12")] = EMA(12, df.loc[i, (stock, "Adj Close")], df.loc[i-1, (stock, "EMA_12")])
        df.loc[i, (stock, "EMA_26")] = EMA(26, df.loc[i, (stock, "Adj Close")], df.loc[i-1, (stock, "EMA_26")])

    df[(stock, "MACD")] = df[(stock, "EMA_26")].rolling(window=9).sum() - df[(stock, "EMA_12")].rolling(window=9).sum()
    df[(stock, "RSI")] = df[(stock, "Returns")].rolling(14).apply(RSI)

# Tensor

In [25]:
close_data = df[27:len(df) - (len(df)-27)%10]
close_data = close_data.reset_index(drop=True)
to_drop = ["EMA_12", "EMA_26", "Returns"]
close_data = close_data.drop(columns=[(stock, label) for stock in tickers for label in to_drop])
close_data

Unnamed: 0_level_0,Date,FUTU,FUTU,FUTU,FUTU,NVDA,NVDA,NVDA,NVDA,ASTS,...,LABU,LABU,GDXU,GDXU,GDXU,GDXU,TSLA,TSLA,TSLA,TSLA
Unnamed: 0_level_1,Unnamed: 1_level_1,Adj Close,MA,RSI,MACD,Adj Close,MA,RSI,MACD,Adj Close,...,RSI,MACD,Adj Close,MA,RSI,MACD,Adj Close,MA,RSI,MACD
0,2024-01-02,51.490002,53.835357,40.371929,12.754085,48.154346,48.091694,39.815513,-0.910560,4.850000,...,51.950629,-100.503359,32.020000,32.544286,51.816172,-11.171534,248.419998,245.323570,57.344897,-31.064168
1,2024-01-03,52.150002,53.541786,41.807990,12.222410,47.555519,48.007053,38.387926,-1.002662,4.930000,...,52.189045,-102.111305,29.280001,32.581071,51.033419,-10.796480,238.449997,245.225356,51.788164,-29.979870
2,2024-01-04,52.369999,53.291429,35.673466,11.555686,47.984398,47.981571,38.380656,-1.054385,4.820000,...,47.430594,-104.112298,29.330000,32.619286,38.290818,-10.077260,237.929993,245.358213,56.941019,-28.053996
3,2024-01-05,51.509998,53.033572,34.496732,11.021293,49.083080,48.028886,43.017115,-1.091426,5.010000,...,51.730024,-105.809623,29.160000,32.630714,39.424353,-8.935453,237.490005,245.431070,53.729987,-24.868494
4,2024-01-08,50.930000,52.826429,33.351526,10.522480,52.238186,48.172246,53.636200,-1.380990,4.940000,...,58.037337,-107.704821,28.850000,32.583929,41.045738,-7.392608,240.449997,245.587141,54.428062,-21.124884
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
205,2024-10-24,88.139999,93.478928,32.315429,-64.759029,140.400330,128.006538,58.737145,-47.571134,25.070000,...,41.307060,10.666325,55.590000,50.161785,54.182648,-18.129128,260.480011,238.148929,71.085204,23.866945
206,2024-10-25,92.580002,94.629285,41.977427,-54.175470,141.530243,128.933260,56.909343,-48.417384,25.620001,...,46.092568,10.071002,53.349998,50.406428,54.103346,-20.757139,269.190002,239.624644,72.108776,22.787640
207,2024-10-28,96.879997,95.963928,50.420608,-45.319571,140.510315,129.902836,56.018645,-49.240153,26.730000,...,48.689021,9.778586,52.349998,50.679285,52.801542,-22.804531,262.510010,240.885715,73.852229,18.256929
208,2024-10-29,98.169998,97.048214,45.646263,-38.097661,141.240265,130.737778,49.968700,-49.764740,26.980000,...,52.636815,10.041125,55.000000,50.961071,49.713152,-24.491753,259.519989,241.442858,74.160204,11.452676


In [26]:
corr = {}
indicators = ["Adj Close", "MA", "RSI", "MACD"]
for indicator in indicators:
    corr[indicator] = close_data.filter([(stock, indicator) for stock in tickers], axis=1).corr()

In [27]:
from collections import defaultdict

F = defaultdict(dict) # 4 * n * (m * n)
n = len(tickers)
m = 10
T = len(close_data) // m

for t in range(0, T): # t
    V = close_data[t*m:(t+1)*m] # m days closing data
    for indicator in ["Adj Close", "MA", "RSI", "MACD"]: # the 4 dimensions
        for stock in tickers: # n assets
            F[t][(stock, indicator)] = V[(stock, indicator)].values.reshape(m,1).dot(corr[indicator][(stock, indicator)].values.reshape(1,n)) # m * n tensor for indicator i & stock n

# CONV3D

In [28]:
import torch
import torch.nn as nn

In [29]:
f = []
for t in range(0, T):
    f.append([])
    for indicator in indicators:
        a = []
        for stock in tickers:
            a.append(F[t][(stock, indicator)])
        f[-1].append(a)
f = list(map(torch.Tensor, f))
f[0].shape

torch.Size([4, 7, 10, 7])

In [30]:
# Define the 3D Convolutional Neural Network layer
class Conv3DNet(nn.Module):
    def __init__(self):
        super(Conv3DNet, self).__init__()
        self.conv3d = nn.Conv3d(in_channels=4, out_channels=32, kernel_size=(1, 3, 1))
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.conv3d(x)
        x = self.relu(x)
        return x

In [31]:
# Instantiate the network
net = Conv3DNet()

# Pass the input tensor f[t] through the network
F_prime = []
for t in range(0, T):
    F_prime.append(net(f[t]))

# Don't know why the shape is 32 * n * (m-2) * n here but the paper says 32 * n * (m-4) * n
'''Kernel size 1,3,1 should be m-2?'''
F_prime[0].shape

torch.Size([32, 7, 8, 7])