In [23]:
import pandas as pd
import yfinance as yf
import numpy as np
from datetime import datetime, timedelta

# Data

In [None]:
tickers = [
    "FUTU",
    "NVDA",
    "ASTS",
    "TMF",
    "LABU",
    "GDXU",
    "TSLA",
]
len(tickers)

In [25]:
start_date = "2024-01-01"
end_date = "2024-11-11"

In [None]:
compute_date = datetime.strptime(start_date, '%Y-%m-%d') - timedelta(days=50)
compute_date = compute_date.strftime('%Y-%m-%d')
compute_date

In [None]:
data = yf.download(tickers, start=compute_date, end=end_date, group_by='ticker')

if len(tickers) == 1:
    # Create a MultiIndex for the columns
    multi_index_columns = pd.MultiIndex.from_tuples([(tickers[0], col) for col in data.columns])

    # Assign the new MultiIndex to the DataFrame
    data.columns = multi_index_columns

In [None]:
data

In [29]:
returns_list = []

# Loop through each stock ticker and calculate returns
for stock in tickers:
    # Access the 'Adj Close' prices using xs method
    adjusted_close = data[stock]['Adj Close']
    
    # Calculate percentage change
    returns_series = adjusted_close.pct_change()
    
    # Append the Series to the list
    returns_list.append(returns_series.rename(stock))  # Rename for clarity

# Concatenate all return Series into a single DataFrame
returns = pd.concat(returns_list, axis=1)

In [30]:
returns.reset_index(inplace=True)
start_index = returns[returns['Date'] >= start_date].index[0]
returns = returns[start_index-27:].set_index('Date')

In [31]:
dates = returns.index

In [32]:
adj_close = data.xs("Adj Close", level=1, axis=1)
adj_close = adj_close.reindex(columns=returns.columns)

In [33]:
columns = pd.MultiIndex.from_product([tickers, ['Adj Close', 'Returns', "MA", "RSI", "RSI2", "EMA_12", "EMA_26", "MACD"]])
df = pd.DataFrame(index=dates, columns=columns)
df.columns = columns
for stock in tickers:
    df[(stock, "Adj Close")] = adj_close[stock]
    df[(stock, "Returns")] = returns[stock]
df = df.reset_index()

In [None]:
df

# Indicators

In [35]:
def EMA(w, price, last):
    a = 2/(1+w)
    return a*price + (1-a)*last
def MA(prices):
    return sum(prices) / 28
def MACD(long, short):
    return sum(long) - sum(short)
def RSI(returns):
    avg_gain = returns[returns > 0].mean()
    avg_loss = -returns[returns < 0].mean()
    return 100 * (1 - 1/(1+avg_gain/avg_loss)) if avg_loss != 0 else 100
def RSI2(diff):
    avg_gain = diff[diff > 0].mean()
    avg_loss = -diff[diff < 0].mean()
    return 100 * (1 - 1/(1+avg_gain/avg_loss)) if avg_loss != 0 else 100

In [36]:
for stock in tickers:
    df[(stock, "MA")] = df[(stock, "Adj Close")].rolling(window=28).apply(MA)
    df[(stock, "Diff")] = df[(stock, "Adj Close")].diff()

    df.loc[0, (stock, "EMA_12")] = df.loc[0, (stock, "Adj Close")]
    df.loc[0, (stock, "EMA_26")] = df.loc[0, (stock, "Adj Close")]
    for i in range(1, len(df)):
        df.loc[i, (stock, "EMA_12")] = EMA(12, df.loc[i, (stock, "Adj Close")], df.loc[i-1, (stock, "EMA_12")])
        df.loc[i, (stock, "EMA_26")] = EMA(26, df.loc[i, (stock, "Adj Close")], df.loc[i-1, (stock, "EMA_26")])

    df[(stock, "MACD")] = df[(stock, "EMA_26")].rolling(window=9).sum() - df[(stock, "EMA_12")].rolling(window=9).sum()
    df[(stock, "RSI")] = df[(stock, "Returns")].rolling(14).apply(RSI)
    df[(stock, "RSI2")] = df[(stock, "Diff")].diff().rolling(14).apply(RSI2)

# Tensor

In [None]:
close_data = df[27:len(df) - (len(df)-27)%10]
close_data = close_data.reset_index(drop=True)
to_drop = ["EMA_12", "EMA_26", "Returns", "Diff"]
close_data = close_data.drop(columns=[(stock, label) for stock in tickers for label in to_drop])
close_data

In [38]:
from collections import defaultdict

F = defaultdict(dict) # 4 * n * (m * n)
n = len(tickers)
m = 10
T = len(close_data) // m

In [39]:
corr = [{} for _ in range(T)]
indicators = ["Adj Close", "MA", "RSI", "MACD"]
for t in range(0, T): # t
    V = close_data[t*m:(t+1)*m] # m days closing data
    for indicator in indicators: # the 4 dimensions
        corr[t][indicator] = V.filter([(stock, indicator) for stock in tickers], axis=1).corr()
        for stock in tickers: # n assets
            F[t][(stock, indicator)] = V[(stock, indicator)].values.reshape(m,1).dot(corr[t][indicator][(stock, indicator)].values.reshape(1,n)) # m * n tensor for indicator i & stock n

In [None]:
F[0][("FUTU", "MACD")].shape

# CONV3D

In [41]:
import torch
import torch.nn as nn

In [None]:
f = []
for t in range(0, T):
    f.append([])
    for indicator in indicators:
        a = []
        for stock in tickers:
            a.append(F[t][(stock, indicator)])
        f[-1].append(a)
f = list(map(torch.Tensor, np.array(f)))
f[0].shape

In [43]:
# Define the 3D Convolutional Neural Network layer
class Conv3DNet(nn.Module):
    def __init__(self):
        super(Conv3DNet, self).__init__()
        self.conv3d = nn.Conv3d(in_channels=4, out_channels=32, kernel_size=(1, 3, 1))
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.conv3d(x)
        x = self.relu(x)
        return x

In [None]:
# Instantiate the network
net = Conv3DNet()

# Pass the input tensor f[t] through the network
F_prime = []
for t in range(0, T):
    F_prime.append(net(f[t]))

# Don't know why the shape is 32 * n * (m-2) * n here but the paper says 32 * n * (m-4) * n
'''Kernel size 1,3,1 should be m-2?'''
F_prime[0].shape