## Installing Dependencies

In [59]:
%pip install ta

Collecting ta
  Downloading ta-0.11.0.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: ta
  Building wheel for ta (setup.py) ... [?25l[?25hdone
  Created wheel for ta: filename=ta-0.11.0-py3-none-any.whl size=29412 sha256=d546ff00ddda01e11d5b9f7801eacae84522b8e1e30644d2368d256134dc44d1
  Stored in directory: /root/.cache/pip/wheels/5c/a1/5f/c6b85a7d9452057be4ce68a8e45d77ba34234a6d46581777c6
Successfully built ta
Installing collected packages: ta
Successfully installed ta-0.11.0


## GPU Configuration

In [60]:
%load_ext cudf.pandas

The cudf.pandas extension is already loaded. To reload it, use:
  %reload_ext cudf.pandas


# Loading Datasets

NOTE: Upload files to the colab before running the below cell

In [101]:
import pandas as pd

column_map = {'c': 'close', 'h' : 'high', 'l' : 'low', 'o' : 'open', 't' : 'date', 'v' : 'volume'}

df_price = pd.read_json('/content/aapl_price.json')[column_map.keys()].rename(columns=column_map)
df_fundamentals = pd.read_csv('/content/aapl_fundamentals.csv').rename(columns={'Quarter End Date' : 'date'})
df_vix = pd.read_csv('/content/vix.csv').rename(columns={'DATE' : 'date'})



In [102]:
display(df_price.head())
display(df_fundamentals.head())
display(df_vix.head())

Unnamed: 0,close,high,low,open,date,volume
0,23.78,23.78,23.02,23.16,2016-01-04T05:00:00Z,287741356
1,23.18,23.89,23.11,23.87,2016-01-05T05:00:00Z,234762144
2,22.73,23.1,22.54,22.69,2016-01-06T05:00:00Z,284319308
3,21.77,22.6,21.76,22.27,2016-01-07T05:00:00Z,343985812
4,21.88,22.37,21.84,22.24,2016-01-08T05:00:00Z,300265168


Unnamed: 0,date,EBITDA (USD millions),EV (USD millions)
0,2025-09-30,35550,3790000
1,2025-06-30,31032,3060000
2,2025-03-31,32250,3340000
3,2024-12-31,45912,3920000
4,2024-09-30,32502,3550000


Unnamed: 0,date,OPEN,HIGH,LOW,CLOSE
0,01/02/1990,17.24,17.24,17.24,17.24
1,01/03/1990,18.19,18.19,18.19,18.19
2,01/04/1990,19.22,19.22,19.22,19.22
3,01/05/1990,20.11,20.11,20.11,20.11
4,01/08/1990,20.26,20.26,20.26,20.26


# Data Curation

### DateTime Conversion

In [103]:
df_price['date'] = pd.to_datetime(df_price['date']).dt.tz_localize(None).dt.normalize()
df_fundamentals['date'] = pd.to_datetime(df_fundamentals['date']).dt.normalize()
df_vix['date'] = pd.to_datetime(df_vix['date']).dt.normalize()

In [104]:
print(df_price.dtypes)
print(df_fundamentals.dtypes)
print(df_vix.dtypes)

close            float64
high             float64
low              float64
open             float64
date      datetime64[ns]
volume             int64
dtype: object
date                     datetime64[ns]
EBITDA (USD millions)            object
EV (USD millions)                object
dtype: object
date     datetime64[ns]
OPEN            float64
HIGH            float64
LOW             float64
CLOSE           float64
dtype: object


### Numeric Column Conversion

In [105]:
df_fundamentals = df_fundamentals.replace(to_replace=',', value='', regex=True).astype({'EBITDA (USD millions)' : 'float', 'EV (USD millions)' : 'float'})

### Time Series Index

In [106]:
df_price = df_price.set_index('date')
df_fundamentals = df_fundamentals.set_index('date')
df_vix = df_vix.set_index('date')

# Feature Engineering

## Normalized Close

In [107]:
from ta.trend import SMAIndicator

sma = SMAIndicator(df_price['close'], window=20)

df_price['norm_close'] = df_price['close']/sma.sma_indicator()

## MACD

In [108]:
from ta.trend import MACD

macd = MACD(df_price['close'])

df_price['macd_line'] = macd.macd()
df_price['macd_diff'] = macd.macd_diff()
df_price['macd_signal'] = macd.macd_signal()

## RSI

In [109]:
from ta.momentum import RSIIndicator

rsi = RSIIndicator(df_price['close'])

df_price['rsi'] = rsi.rsi()

## EV/EBIDTA

In [110]:
df_fundamentals['ev_ebidta'] = df_fundamentals['EV (USD millions)']/df_fundamentals['EBITDA (USD millions)']

## OBV





In [111]:
from ta.volume import OnBalanceVolumeIndicator
from ta.trend import SMAIndicator

obv = OnBalanceVolumeIndicator(df_price['close'], df_price['volume'])

obv_series = obv.on_balance_volume()

sma = SMAIndicator(obv_series, window=20)

df_price['norm_obv_diff'] = obv_series.diff() / sma.sma_indicator()

## Normalized VIX Change

In [112]:
from ta.trend import SMAIndicator

vix_sma = SMAIndicator(df_vix['CLOSE'], window=20)

df_vix['norm_vix_change'] = df_vix['CLOSE'].pct_change(5) * 100 / vix_sma.sma_indicator()

## Closing price 5 Day Returns

In [113]:
df_price['price_change'] = df_price['close'].pct_change(periods=5) * 100

## Data Set Creation

Combining different data sets into one

In [114]:
df = pd.concat([df_price[['norm_close', 'price_change', 'macd_line', 'macd_diff', 'macd_signal', 'rsi', 'norm_obv_diff']], df_fundamentals[['ev_ebidta']], df_vix[['norm_vix_change']]], axis=1)

Forward fill the ev_ebidta since its quarterly data

In [115]:
df['ev_ebidta'] = df['ev_ebidta'].ffill()

filtering nan rows

In [116]:
df = df.dropna()

aligning columns

In [117]:
df = df[['norm_close', 'norm_obv_diff', 'macd_line', 'macd_diff', 'macd_signal', 'rsi', 'ev_ebidta', 'norm_vix_change', 'price_change']]

writing to a csv file

In [119]:
df.to_csv("/content/aapl_model_dataset.csv", index=True, header=True)

# Model Implementation

## Data Ingestion

In [139]:
import pandas as pd
df = pd.read_csv("/content/aapl_model_dataset.csv")
df['date'] = pd.to_datetime(df['date']).dt.tz_localize(None).dt.normalize()
df.set_index('date', inplace=True)
df.head()

Unnamed: 0_level_0,norm_close,norm_obv_diff,macd_line,macd_diff,macd_signal,rsi,ev_ebidta,norm_vix_change,price_change
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2016-03-31,1.041724,-0.107127,0.559188,0.066089,0.493099,71.00847,38.940719,-0.422218,2.699336
2016-04-01,1.047156,0.101163,0.584986,0.07351,0.511476,73.167919,38.940719,-0.717148,4.086739
2016-04-04,1.053577,0.13325,0.618475,0.085599,0.532876,75.319795,38.940719,-0.477911,5.613741
2016-04-05,1.037534,-0.090122,0.61453,0.065323,0.549207,68.460985,38.940719,0.757635,2.005731
2016-04-06,1.043449,0.08559,0.625177,0.060776,0.564401,71.009868,38.940719,0.25967,1.287208


## Functions

In [140]:
def test_train_split(df, target_col, test_size=0.1):
  total_length = df.count().max()
  split_len = int(total_length * 0.2)
  split_idx = total_length - split_len
  X, y = df[[x for x in df.columns if x != target_col]], df[[target_col]]
  X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
  y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]
  return X_train, X_test, y_train, y_test, split_len

## ARIMA

### Test Train Split

In [141]:
X_train, X_test, y_train, y_test, split_len = test_train_split(df, 'price_change', 0.2)

### Training

In [142]:
import numpy as np
import cudf
from cuml.tsa.arima import ARIMA

arima = ARIMA(y_train, order=(2,0,2), seasonal_order=(2,0,2,7),
              fit_intercept=False)

arima.fit()

ARIMA()

### Prediction

In [143]:
y_predict = pd.concat([arima.predict(end=len(y_train)), arima.forecast(split_len)], axis=0)

### Metrics

In [145]:
from sklearn.metrics import r2_score
print(f'Training accuracy: {r2_score(y_train, y_predict.iloc[:len(y_train)]) * 100}')
print(f'Testing accuracy: {r2_score(y_test, y_predict.iloc[len(y_train):]) * 100}')

Training accuracy: 35.67597086390337
Testing accuracy: -1.274612105625672


### Feature Egineering

In [153]:
y_predict = y_predict.reset_index(drop=True)
y_predict.index = df.index
df['arima_pred'] = y_predict
df_act = pd.concat([y_train, y_test], axis=0)
df['arima_res'] = df_act['price_change'] - df['arima_pred']
df = df[[*[x for x in df.columns if x != 'price_change'], 'price_change']]
df.head()

Unnamed: 0_level_0,norm_close,norm_obv_diff,macd_line,macd_diff,macd_signal,rsi,ev_ebidta,norm_vix_change,arima_pred,arima_res,price_change
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2016-03-31,1.041724,-0.107127,0.559188,0.066089,0.493099,71.00847,38.940719,-0.422218,0.0,2.699336,2.699336
2016-04-01,1.047156,0.101163,0.584986,0.07351,0.511476,73.167919,38.940719,-0.717148,0.417989,3.66875,4.086739
2016-04-04,1.053577,0.13325,0.618475,0.085599,0.532876,75.319795,38.940719,-0.477911,1.342803,4.270938,5.613741
2016-04-05,1.037534,-0.090122,0.61453,0.065323,0.549207,68.460985,38.940719,0.757635,2.0727,-0.06697,2.005731
2016-04-06,1.043449,0.08559,0.625177,0.060776,0.564401,71.009868,38.940719,0.25967,2.942193,-1.654985,1.287208


### Data Snapshot

In [155]:
df.to_csv("/content/aapl_model_arima_dataset.csv", index=True, header=True)

## TCN

In [156]:
%pip install darts



In [157]:
import pandas as pd
df = pd.DataFrame(pd.read_csv("/content/aapl_model_arima_dataset.csv"))
df['date'] = pd.to_datetime(df['date']).dt.tz_localize(None).dt.normalize()

### Train Test Split

In [158]:
from darts import TimeSeries

df = df.set_index('date')
df = df.asfreq('B')
df = df.ffill()

target_col = 'price_change'
feature_cols = [x for x in df.columns if x != target_col]

X = df[feature_cols]
y = df[[target_col]]

X = TimeSeries.from_dataframe(X.reset_index(), time_col='date', value_cols=feature_cols)
y = TimeSeries.from_dataframe(y.reset_index(), time_col='date', value_cols=target_col)

split_point = int(0.8 * len(y))     # 80% train
y_train, y_test = y[:split_point], y[split_point:]
X_train, X_test = X[:split_point], X[split_point:]

### Feature Scaling

In [159]:
from darts.dataprocessing.transformers.scaler import Scaler

scaler = Scaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
y_train = scaler.fit_transform(y_train)
y_test = scaler.transform(y_test)

### Training

In [160]:
from darts.models import TCNModel


tcn = TCNModel(
    input_chunk_length=60,         # lookback window
    output_chunk_length=1,         # 1-day ahead
    kernel_size=3,
    num_filters=16,
    n_epochs=30,
    dropout=0.3,
    pl_trainer_kwargs={
        "accelerator": "gpu",
        "devices": [0],            # or -1 for all GPUs
    },
)

tcn.fit(
    series=y_train,
    past_covariates=X_train,
    verbose=False
)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=30` reached.


TCNModel(output_chunk_shift=0, kernel_size=3, num_filters=16, num_layers=None, dilation_base=2, weight_norm=False, dropout=0.3, input_chunk_length=60, output_chunk_length=1, n_epochs=30, pl_trainer_kwargs={'accelerator': 'gpu', 'devices': [0]})

### Prediction

In [161]:
train_start = y_train.time_index[tcn.input_chunk_length]

y_pred_train = tcn.historical_forecasts(
    series=y_train,
    past_covariates=X_train,
    start=train_start,
    forecast_horizon=1,
    stride=1,
    retrain=False,
    last_points_only=True,
    verbose=False,
)


  return fn(*args, **kwargs)
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


In [162]:
y_pred = tcn.historical_forecasts(
    series=y_test,
    past_covariates=X_test,
    forecast_horizon=1,     # predict 1 step ahead each time
    stride=1,               # every time point
    retrain=False,          # use your already-fitted model
    last_points_only=True,  # we only want the 1-step-ahead point
    verbose=False,
)

INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


### Metrics

In [163]:
from darts.metrics import r2_score
print(f'Training accuracy: {r2_score(y_train, y_pred_train) * 100}')
print(f'Testing accuracy: {r2_score(y_test, y_pred) * 100}')

Training accuracy: 76.59565325583021
Testing accuracy: 71.14597813719331


### Feature Engineering

#### Unscaled Real Values

In [185]:
y_train = scaler.inverse_transform(y_train)
y_test = scaler.inverse_transform(y_test)
y_pred_train = scaler.inverse_transform(y_pred_train)
y_pred_test = scaler.inverse_transform(y_pred)

#### TCN Prediction + Residual

In [188]:
df_tcn = pd.DataFrame(pd.concat([y_pred_train.to_dataframe(), y_pred_test.to_dataframe()], axis=0))
df_tcn = df_tcn.rename(columns={'price_change' : 'tcn_pred'})
df_tcn['tcn_actual'] = pd.concat([y_train.to_dataframe(), y_test.to_dataframe()], axis=0)
df_tcn['tcn_res'] = df_tcn['tcn_actual'] - df_tcn['tcn_pred']
df_tcn.head()


Unnamed: 0_level_0,tcn_pred,tcn_actual,tcn_res
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2016-06-23,-1.77213,-1.481814,0.290316
2016-06-24,0.214812,-2.022059,-2.236871
2016-06-27,-2.025845,-3.224321,-1.198476
2016-06-28,-3.78205,-2.375514,1.406536
2016-06-29,-2.254864,-1.192114,1.06275


#### Add new features

In [189]:
df = pd.concat([df, df_tcn[['tcn_pred', 'tcn_res']]], axis=1, join='inner')
df = df[[*[x for x in df.columns if x != 'price_change'], 'price_change']]
df.head()

#### Data Snapshot

In [190]:
df.to_csv("/content/aapl_model_tcn_dataset.csv", index=True, header=True)

# Resources

* [https://www.kaggle.com/code/yasirabdaali/make-pandas-super-fast-with-gpu-acceleration](https://www.kaggle.com/code/yasirabdaali/make-pandas-super-fast-with-gpu-acceleration)
* [https://technical-analysis-library-in-python.readthedocs.io/en/latest/ta.html](https://technical-analysis-library-in-python.readthedocs.io/en/latest/ta.html)
* [https://pandas.pydata.org/docs/reference/index.html](https://pandas.pydata.org/docs/reference/index.html)
* [https://medium.com/rapids-ai/arima-forecast-large-time-series-datasets-with-rapids-cuml-18428a00d02e](https://medium.com/rapids-ai/arima-forecast-large-time-series-datasets-with-rapids-cuml-18428a00d02e)
* [https://unit8co.github.io/darts/index.html](https://unit8co.github.io/darts/index.html)