## Installing Dependencies

In [None]:
%pip install ta

Collecting ta
  Downloading ta-0.11.0.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: ta
  Building wheel for ta (setup.py) ... [?25l[?25hdone
  Created wheel for ta: filename=ta-0.11.0-py3-none-any.whl size=29412 sha256=2984a0b1e35b48371739898f2a79658de66aaa3344f877ad0c4797ab3cbeb14b
  Stored in directory: /root/.cache/pip/wheels/5c/a1/5f/c6b85a7d9452057be4ce68a8e45d77ba34234a6d46581777c6
Successfully built ta
Installing collected packages: ta
Successfully installed ta-0.11.0


## GPU Configuration

In [1]:
%load_ext cudf.pandas

# Loading Datasets

NOTE: Upload files to the colab before running the below cell

In [None]:
import pandas as pd

column_map = {'c': 'close', 'h' : 'high', 'l' : 'low', 'o' : 'open', 't' : 'date', 'v' : 'volume'}

df_price = pd.read_json('/content/aapl_price.json')[column_map.keys()].rename(columns=column_map)
df_fundamentals = pd.read_csv('/content/aapl_fundamentals.csv').rename(columns={'Quarter End Date' : 'date'})
df_vix = pd.read_csv('/content/vix.csv').rename(columns={'DATE' : 'date'})



In [None]:
display(df_price.head())
display(df_fundamentals.head())
display(df_vix.head())

Unnamed: 0,close,high,low,open,date,volume
0,23.78,23.78,23.02,23.16,2016-01-04T05:00:00Z,287741356
1,23.18,23.89,23.11,23.87,2016-01-05T05:00:00Z,234762144
2,22.73,23.1,22.54,22.69,2016-01-06T05:00:00Z,284319308
3,21.77,22.6,21.76,22.27,2016-01-07T05:00:00Z,343985812
4,21.88,22.37,21.84,22.24,2016-01-08T05:00:00Z,300265168


Unnamed: 0,date,EBITDA (USD millions),EV (USD millions)
0,2025-09-30,35550,3790000
1,2025-06-30,31032,3060000
2,2025-03-31,32250,3340000
3,2024-12-31,45912,3920000
4,2024-09-30,32502,3550000


Unnamed: 0,date,OPEN,HIGH,LOW,CLOSE
0,01/02/1990,17.24,17.24,17.24,17.24
1,01/03/1990,18.19,18.19,18.19,18.19
2,01/04/1990,19.22,19.22,19.22,19.22
3,01/05/1990,20.11,20.11,20.11,20.11
4,01/08/1990,20.26,20.26,20.26,20.26


# Data Curation

### DateTime Conversion

In [None]:
df_price['date'] = pd.to_datetime(df_price['date']).dt.tz_localize(None).dt.normalize()
df_fundamentals['date'] = pd.to_datetime(df_fundamentals['date']).dt.normalize()
df_vix['date'] = pd.to_datetime(df_vix['date']).dt.normalize()

In [None]:
print(df_price.dtypes)
print(df_fundamentals.dtypes)
print(df_vix.dtypes)

close            float64
high             float64
low              float64
open             float64
date      datetime64[ns]
volume             int64
dtype: object
date                     datetime64[ns]
EBITDA (USD millions)            object
EV (USD millions)                object
dtype: object
date     datetime64[ns]
OPEN            float64
HIGH            float64
LOW             float64
CLOSE           float64
dtype: object


### Numeric Column Conversion

In [None]:
df_fundamentals = df_fundamentals.replace(to_replace=',', value='', regex=True).astype({'EBITDA (USD millions)' : 'float', 'EV (USD millions)' : 'float'})

### Time Series Index

In [None]:
df_price = df_price.set_index('date')
df_fundamentals = df_fundamentals.set_index('date')
df_vix = df_vix.set_index('date')

# Feature Engineering

## MACD

In [None]:
from ta.trend import MACD

macd = MACD(df_price['close'])

df_price['macd_line'] = macd.macd()
df_price['macd_diff'] = macd.macd_diff()
df_price['macd_signal'] = macd.macd_signal()

## RSI

In [None]:
from ta.momentum import RSIIndicator

rsi = RSIIndicator(df_price['close'])

df_price['rsi'] = rsi.rsi()

## EV/EBIDTA

In [None]:
df_fundamentals['ev_ebidta'] = df_fundamentals['EV (USD millions)']/df_fundamentals['EBITDA (USD millions)']

## VIX Daily Returns

In [None]:
df_vix['vix_prct_returns'] = df_vix['CLOSE'].pct_change() * 100

## Closing price Daily Returns

In [None]:
df_price['daily_prct_change'] = df_price['close'].pct_change() * 100

## Data Set Creation

Combining different data sets into one

In [None]:
df = pd.concat([df_price[['close', 'daily_prct_change', 'macd_line', 'macd_diff', 'macd_signal', 'rsi', 'volume']], df_fundamentals[['ev_ebidta']], df_vix[['vix_prct_returns']]], axis=1)

Forward fill the ev_ebidta since its quarterly data

In [None]:
df['ev_ebidta'] = df['ev_ebidta'].ffill()

filtering nan rows

In [None]:
df = df.dropna()

aligning columns

In [None]:
df = df[['close', 'volume', 'macd_line', 'macd_diff', 'macd_signal', 'rsi', 'ev_ebidta', 'vix_prct_returns', 'daily_prct_change']]

writing to a csv file

In [None]:
df.to_csv("/content/aapl_model_dataset.csv", index=True, header=True)

# Model Implementation

## Data Ingestion

In [None]:
import pandas as pd
df = pd.read_csv("/content/aapl_model_dataset.csv", index_col='date')

In [None]:
df.head()

Unnamed: 0_level_0,close,volume,macd_line,macd_diff,macd_signal,rsi,ev_ebidta,vix_prct_returns,daily_prct_change
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-03-31T00:00:00.000000000Z,24.73,114645816,0.559188,0.066089,0.493099,71.00847,38.940719,2.876106,-0.522928
2016-04-01T00:00:00.000000000Z,24.96,113856948,0.584986,0.07351,0.511476,73.167919,38.940719,-6.09319,0.930044
2016-04-04T00:00:00.000000000Z,25.21,157112212,0.618475,0.085599,0.532876,75.319795,38.940719,7.78626,1.001603
2016-04-05T00:00:00.000000000Z,24.92,111266284,0.61453,0.065323,0.549207,68.460985,38.940719,9.206799,-1.150337
2016-04-06T00:00:00.000000000Z,25.18,111477272,0.625177,0.060776,0.564401,71.009868,38.940719,-8.625162,1.043339


## Functions

In [18]:
def test_train_split(df, target_col, test_size=0.1):
  total_length = df.count().max()
  split_len = int(total_length * 0.2)
  split_idx = total_length - split_len
  X, y = df[[x for x in df.columns if x != target_col]], df[[target_col]]
  X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx + 1:]
  y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx + 1:]
  return X_train, X_test, y_train, y_test, split_len

## ARIMA

### Test Train Split

In [None]:
X_train, X_test, y_train, y_test, split_len = test_train_split(df, 'daily_prct_change', 0.2)

### Training

In [None]:
import numpy as np
import cudf
from cuml.tsa.arima import ARIMA

arima = ARIMA(y_train, order=(2,0,2), seasonal_order=(2,0,2,7),
              fit_intercept=False)

arima.fit()

ARIMA()

### Prediction

In [None]:
y_predict = pd.concat([arima.predict(end=len(y_train)), arima.forecast(split_len)], axis=0)

### Metrics

In [None]:
from sklearn.metrics import r2_score
print(f'Training accuracy: {r2_score(y_train, y_predict.iloc[:len(y_train)]) * 100}')
print(f'Testing accuracy: {r2_score(y_test, y_predict.iloc[len(y_train)+1:]) * 100}')

Training accuracy: 1.2598714198852723
Testing accuracy: -0.33037051713848964


### Feature Egineering

In [None]:
y_predict = y_predict.reset_index(drop=True)
y_predict.index = df.index
df['arima_predict'] = y_predict

In [None]:
df.head()

Unnamed: 0_level_0,close,volume,macd_line,macd_diff,macd_signal,rsi,ev_ebidta,vix_prct_returns,daily_prct_change,arima_predict
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2016-03-31T00:00:00.000000000Z,24.73,114645816,0.559188,0.066089,0.493099,71.00847,38.940719,2.876106,-0.522928,0.0
2016-04-01T00:00:00.000000000Z,24.96,113856948,0.584986,0.07351,0.511476,73.167919,38.940719,-6.09319,0.930044,0.032484
2016-04-04T00:00:00.000000000Z,25.21,157112212,0.618475,0.085599,0.532876,75.319795,38.940719,7.78626,1.001603,-0.05151
2016-04-05T00:00:00.000000000Z,24.92,111266284,0.61453,0.065323,0.549207,68.460985,38.940719,9.206799,-1.150337,-0.072322
2016-04-06T00:00:00.000000000Z,25.18,111477272,0.625177,0.060776,0.564401,71.009868,38.940719,-8.625162,1.043339,0.053064


### Data Snapshot

In [None]:
df.to_csv("/content/aapl_model_arima_dataset.csv", index=True, header=True)

## TCN

In [22]:
%pip install darts

Collecting darts
  Downloading darts-0.39.0-py3-none-any.whl.metadata (61 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/62.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
Collecting nfoursid>=1.0.0 (from darts)
  Downloading nfoursid-1.0.2-py3-none-any.whl.metadata (1.9 kB)
Collecting pyod>=0.9.5 (from darts)
  Downloading pyod-2.0.5-py3-none-any.whl.metadata (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.3/46.3 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
Collecting pytorch-lightning<2.5.3,>=2.0.0 (from darts)
  Downloading pytorch_lightning-2.5.2-py3-none-any.whl.metadata (21 kB)
Collecting tensorboardX>=2.1 (from darts)
  Downloading tensorboardx-2.6.4-py3-none-any.whl.metadata (6.2 kB)
Collecting torchmetrics>=0.7.0 (from pytorch-lightning<2.5.3,>=2.0.0->darts)
  Downloading torchmetrics-1.8.2-py3-none-any.whl.meta

In [79]:
import pandas as pd
df = pd.read_csv("/content/aapl_model_arima_dataset.csv")
df['date'] = pd.to_datetime(df['date']).dt.tz_localize(None).dt.normalize()
df = df.set_index('date')

### Test Train Split

In [80]:
X_train, _, _, y_test, split_len = test_train_split(df, 'daily_prct_change', 0.2)

### Scaling

In [81]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X_train)
X = pd.DataFrame(scaler.transform(df[X_train.columns]), columns=X_train.columns, index=df.index)

### Training

In [82]:
from darts import TimeSeries
from darts.models import TCNModel
from darts.dataprocessing.transformers import MissingValuesFiller

feature_cols = list(X.columns.to_list())

X = TimeSeries.from_dataframe(X.reset_index(), time_col='date', value_cols=feature_cols, fill_missing_dates=True, freq="B")
y = TimeSeries.from_dataframe(df['daily_prct_change'].reset_index(), time_col='date', value_cols=['daily_prct_change'], fill_missing_dates=True, freq="B")

filler = MissingValuesFiller()

y = filler.transform(y)
X = filler.transform(X)

split_point = int(0.8 * len(y))     # 80% train
y_train, y_test = y[:split_point], y[split_point:]
X_train, X_test = X[:split_point], X[split_point:]

tcn = TCNModel(
    input_chunk_length=60,         # lookback window
    output_chunk_length=1,         # 1-day ahead
    kernel_size=3,
    num_filters=32,
    n_epochs=50,
    dropout=0.1,
    pl_trainer_kwargs={
        "accelerator": "gpu",
        "devices": [0],            # or -1 for all GPUs
    },
)

tcn.fit(
    series=y_train,
    past_covariates=X_train,
    verbose=False
)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=50` reached.


TCNModel(output_chunk_shift=0, kernel_size=3, num_filters=32, num_layers=None, dilation_base=2, weight_norm=False, dropout=0.1, input_chunk_length=60, output_chunk_length=1, n_epochs=50, pl_trainer_kwargs={'accelerator': 'gpu', 'devices': [0]})

### Prediction

In [83]:
start_time = y_test.start_time()
y_pred = tcn.historical_forecasts(
    series=y,
    past_covariates=X,
    start=start_time,
    forecast_horizon=1,     # predict 1 step ahead each time
    stride=1,               # every time point
    retrain=False,          # use your already-fitted model
    last_points_only=True,  # we only want the 1-step-ahead point
    verbose=False,
)

  return fn(*args, **kwargs)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


### Metrics

In [84]:
y_pred

Unnamed: 0_level_0,daily_prct_change
date,Unnamed: 1_level_1
2023-12-06,1.024556
2023-12-07,-0.262628
2023-12-08,-0.023962
2023-12-11,0.425906
2023-12-12,0.485322
...,...
2025-10-30,-0.758099
2025-10-31,-1.624808
2025-11-03,0.167058
2025-11-04,-0.371518


# Resources

* [https://www.kaggle.com/code/yasirabdaali/make-pandas-super-fast-with-gpu-acceleration](https://www.kaggle.com/code/yasirabdaali/make-pandas-super-fast-with-gpu-acceleration)
* [https://technical-analysis-library-in-python.readthedocs.io/en/latest/ta.html](https://technical-analysis-library-in-python.readthedocs.io/en/latest/ta.html)
* [https://pandas.pydata.org/docs/reference/index.html](https://pandas.pydata.org/docs/reference/index.html)
* [https://medium.com/rapids-ai/arima-forecast-large-time-series-datasets-with-rapids-cuml-18428a00d02e](https://medium.com/rapids-ai/arima-forecast-large-time-series-datasets-with-rapids-cuml-18428a00d02e)
* [https://unit8co.github.io/darts/index.html](https://unit8co.github.io/darts/index.html)