# The environment

The next three code cells contains commands that will configure your Coogle Colab environment.

When you transfer the strategy to Quantiacs, remove these cells.

They are not relevant for the Quantiacs platform.

At first, setup the toolbox from github using pip:

In [1]:
###DEBUG###

! pip install git+https://github.com/quantiacs/toolbox.git 2>/dev/null

# decrease height
from IPython.display import Javascript
display(Javascript('google.colab.output.setIframeHeight(0, true, {maxHeight: 100})'))

Collecting git+https://github.com/quantiacs/toolbox.git
  Cloning https://github.com/quantiacs/toolbox.git to /tmp/pip-req-build-_f0717bz
  Resolved https://github.com/quantiacs/toolbox.git to commit 272e66e017d3eb6d40517ffa39cd6a92dc5072d8
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting scipy>=1.14.0 (from qnt==0.0.407)
  Downloading scipy-1.15.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.0/62.0 kB[0m [31m1.0 MB/s[0m eta [36m0:00:00[0m
Collecting xarray==2024.6.0 (from qnt==0.0.407)
  Downloading xarray-2024.6.0-py3-none-any.whl.metadata (11 kB)
Collecting progressbar2<4,>=3.55 (from qnt==0.0.407)
  Downloading progressbar2-3.55.0-py2.py3-none-any.whl.metadata (11 kB)
Collecting cftime==1.6.4 (from qnt==0.0.407)
  Downloading cftime-1.6.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.7 kB)
Collecting plotly==5.22.0 (from qnt==0.0.407)
  Down

<IPython.core.display.Javascript object>

Then install TA-Lib (indicators library) if you need it.

Instead of TA-Lib you can use qnt.ta or another library. In this case, skip the next cell.

Note that the installation can take several minutes.

In [2]:
"""
###DEBUG###
!wget http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz
!tar -xzvf ta-lib-0.4.0-src.tar.gz
%cd ta-lib
!./configure --prefix=/usr
! make
!make install
!pip install Ta-Lib

# test import
import talib

# decrease height
from IPython.display import Javascript
display(Javascript('google.colab.output.setIframeHeight(0, true, {maxHeight: 100})'))
"""

"\n###DEBUG###\n!wget http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz\n!tar -xzvf ta-lib-0.4.0-src.tar.gz\n%cd ta-lib\n!./configure --prefix=/usr\n! make\n!make install\n!pip install Ta-Lib\n\n# test import\nimport talib\n\n# decrease height\nfrom IPython.display import Javascript\ndisplay(Javascript('google.colab.output.setIframeHeight(0, true, {maxHeight: 100})'))\n"

Finally specify the api key and disable interactive charts.

You can find the api key in [your profile](https://quantiacs.com/personalpage/homepage).

We disable interactive charts in the library because interact+plotly do not work correctly in Google Colab.

In [3]:
###DEBUG###
import os

os.environ['API_KEY'] = "aa39740a-02b3-4dbb-a6b5-b871631d4ccc"
os.environ['NONINTERACT'] = 'True'

# The strategy

The next cell contains the strategy code itself.

In [4]:
%%javascript
window.IPython && (IPython.OutputArea.prototype._should_scroll = function(lines) { return false; })
// run this cell for disabling widget scrolling

<IPython.core.display.Javascript object>

In [5]:
import logging

import xarray as xr  # xarray for data manipulation

import qnt.data as qndata     # functions for loading data
import qnt.backtester as qnbt # built-in backtester
import qnt.ta as qnta         # technical analysis library
import qnt.stats as qnstats   # statistical functions

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

np.seterr(divide = "ignore")

from qnt.ta.macd import macd
from qnt.ta.rsi  import rsi
from qnt.ta.stochastic import stochastic_k, stochastic, slow_stochastic

from sklearn import linear_model
from sklearn.metrics import r2_score
from sklearn.metrics import explained_variance_score
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import RandomForestClassifier

NOTICE: The environment variable DATA_BASE_URL was not specified. The default value is 'https://data-api.quantiacs.io/'
NOTICE: The environment variable CACHE_RETENTION was not specified. The default value is '7'
NOTICE: The environment variable CACHE_DIR was not specified. The default value is 'data-cache'


In [6]:
#stock_data = qndata.stocks.load_spx_data(tail = 365 * 5, assets = ['NAS:AAL','NAS:AAPL','NAS:EA','NAS:CEPH','NAS:AKAM','NAS:DXCM','NAS:CA','NAS:ALTR~1','NAS:TLAB','NAS:FANG','NAS:GEN','NAS:BMC','NAS:SNPS','NAS:SBAC','NAS:TXN','NAS:PTC','NAS:BKR','NAS:EXC','NAS:ALGN','NAS:LKQ','NAS:ENPH','NAS:CCEP','NAS:ALTR','NAS:FOSL','NAS:HST'])
stock_data = qndata.stocks.load_spx_data(tail = 365 * 5, assets = ['NAS:AAL','NAS:AAPL'])

| | #                                             | 15975 Elapsed Time: 0:00:00
| | #                                             | 15975 Elapsed Time: 0:00:00
| |  #                                            | 45926 Elapsed Time: 0:00:00


fetched chunk 1/1 2s
Data loaded 4s


In [7]:
def get_features(data):
    """Enhanced feature construction using multiple technical indicators."""

    # MACD and its components
    macd, macd_signal, macd_histogram = qnta.macd(data.sel(field="close"))

    # Logarithmic transformation of the closing price
    price = data.sel(field="close").ffill("time").bfill("time").fillna(0)
    log_price = np.log(price)

    # Relative Strength Index (RSI)
    rsi = qnta.rsi(data.sel(field="close"))

    # trend:
    trend = qnta.roc(qnta.lwma(data.sel(field="close"), 60), 1)

    # volatility:
    volatility = qnta.tr(data.sel(field="high"), data.sel(field="low"), data.sel(field="close"))
    volatility = volatility / data.sel(field="close")
    volatility = qnta.lwma(volatility, 14)

    # Combine all features
    result = xr.concat(
        [macd_signal, macd_histogram, log_price, rsi, trend, volatility],
        pd.Index(
            ["macd_signal", "macd_histogram", "log_price", "rsi", "trend", "volatility"],
            name="field"
        )
    )

    return result.transpose("time", "field", "asset")


In [8]:
my_features = get_features(stock_data)
display(my_features.sel(field="trend").to_pandas())

asset,NAS:AAL,NAS:AAPL
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-22,,
2020-01-23,,
2020-01-24,,
2020-01-27,,
2020-01-28,,
...,...,...
2025-01-10,0.678094,-0.011361
2025-01-13,0.493014,-0.045787
2025-01-14,0.595151,-0.061104
2025-01-15,0.545674,0.000734


In [9]:
def get_target_classes(data):
    """Clases objetivo para predecir si el precio sube o baja."""

    price_current = data.sel(field="close")
    price_future = qnta.shift(price_current, -1)

    class_positive = 1
    class_negative = 0

    target_price_up = xr.where(
        price_future > price_current, class_positive, class_negative
    )

    return target_price_up

In [10]:
# displaying the target classes:
my_targetclass = get_target_classes(stock_data)
display(my_targetclass.to_pandas())

asset,NAS:AAL,NAS:AAPL
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-01-22,1,1
2020-01-23,0,0
2020-01-24,0,0
2020-01-27,1,1
2020-01-28,0,1
...,...,...
2025-01-10,0,0
2025-01-13,1,0
2025-01-14,0,1
2025-01-15,1,0


In [11]:
def get_model():
    """Modelo de Random Forest."""
    return RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)

In [12]:
# Create and train the models working on an asset-by-asset basis.

asset_name_all = stock_data.coords["asset"].values

models = dict()

for asset_name in asset_name_all:
    target_cur = my_targetclass.sel(asset=asset_name).dropna(dim="time", how="any")
    features_cur = my_features.sel(asset=asset_name).dropna(dim="time", how="any")

    target_for_learn_df, feature_for_learn_df = xr.align(target_cur, features_cur, join="inner")

    if len(features_cur.time) < 10:
        continue  # Not enough data for training

    model = get_model()

    try:
        model.fit(feature_for_learn_df.values, target_for_learn_df)
        models[asset_name] = model
    except ValueError as e:
        logging.exception(f"ValueError occurred while training model for {asset_name}: {e}")
    except Exception as e:
        logging.exception(f"Error occurred while training model for {asset_name}: {e}")

print(models)

{'NAS:AAL': RandomForestClassifier(max_depth=5, random_state=42), 'NAS:AAPL': RandomForestClassifier(max_depth=5, random_state=42)}


In [13]:
# Performs prediction and generates output weights:

asset_name_all = stock_data.coords["asset"].values
weights = xr.zeros_like(stock_data.sel(field="close"))

for asset_name in asset_name_all:
    if asset_name in models:
        model = models[asset_name]
        features_all = my_features
        features_cur = features_all.sel(asset=asset_name).dropna(dim="time", how="any")
        if len(features_cur.time) < 1:
            continue
        try:
            weights.loc[dict(asset=asset_name, time=features_cur.time.values)] = model.predict(features_cur.values)
        except KeyboardInterrupt as e:
            raise e
        except:
            logging.exception("model prediction failed")

print(weights)

<xarray.DataArray 'stocks_s&p500' (time: 1255, asset: 2)> Size: 20kB
array([[0., 0.],
       [0., 0.],
       [0., 0.],
       ...,
       [0., 1.],
       [0., 0.],
       [0., 0.]])
Coordinates:
  * time     (time) datetime64[ns] 10kB 2020-01-22 2020-01-23 ... 2025-01-16
    field    <U5 20B 'close'
  * asset    (asset) <U8 64B 'NAS:AAL' 'NAS:AAPL'


In [14]:
def get_sharpe(stock_data, weights):
    """Calculates the Sharpe ratio"""
    rr = qnstats.calc_relative_return(stock_data, weights)
    sharpe = qnstats.calc_sharpe_ratio_annualized(rr).values[-1]
    return sharpe

sharpe = get_sharpe(stock_data, weights)
sharpe

5.655934173384878

In [15]:
def train_model(data):
    """Entrenar modelos Random Forest por activo."""

    asset_name_all = data.coords["asset"].values
    features_all = get_features(data)
    target_all = get_target_classes(data)

    models = dict()

    for asset_name in asset_name_all:
        target_cur = target_all.sel(asset=asset_name).dropna(dim="time", how="any")
        features_cur = features_all.sel(asset=asset_name).dropna(dim="time", how="any")

        target_for_learn_df, feature_for_learn_df = xr.align(
            target_cur, features_cur, join="inner"
        )

        if len(features_cur.time) < 10:
            continue  # No hay suficientes datos para entrenar

        model = get_model()

        try:
            model.fit(feature_for_learn_df.values, target_for_learn_df)
            models[asset_name] = model
        except ValueError as e:
            logging.exception(f"Error de valor al entrenar el modelo para {asset_name}: {e}")
        except Exception as e:
            logging.exception(f"Error al entrenar el modelo para {asset_name}: {e}")

    return models

In [16]:
def predict_weights(models, data):
    """Predicción de pesos usando los modelos."""

    asset_name_all = data.coords["asset"].values
    weights = xr.zeros_like(data.sel(field="close"))

    for asset_name in asset_name_all:
        if asset_name in models:
            model = models[asset_name]
            features_all = get_features(data)
            features_cur = features_all.sel(asset=asset_name).dropna(dim="time", how="any")

            if len(features_cur.time) < 1:
                continue

            try:
                weights.loc[dict(asset=asset_name, time=features_cur.time.values)] = (
                    model.predict(features_cur.values)
                )
            except KeyboardInterrupt as e:
                raise e
            except Exception as e:
                logging.exception(f"Error al predecir los pesos para {asset_name}: {e}")

    return weights

In [17]:
# Calculate weights using the backtester:
weights = qnbt.backtest_ml(
    train                         = train_model,
    predict                       = predict_weights,
    train_period                  =  2 *365,  # the data length for training in calendar days
    retrain_interval              = 10 *365,  # how often we have to retrain models (calendar days)
    retrain_interval_after_submit = 1,        # how often retrain models after submission during evaluation (calendar days)
    predict_each_day              = False,    # Is it necessary to call prediction for every day during backtesting?
                                              # Set it to True if you suspect that get_features is looking forward.
    competition_type              = "stocks_s&p500",  # competition type
    lookback_period               = 365,                 # how many calendar days are needed by the predict function to generate the output
    start_date                    = "2005-01-01",        # backtest start date
    analyze                       = True,
    build_plots                   = True  # do you need the chart?
)

Run the last iteration...


| |#                                              | 15975 Elapsed Time: 0:00:00
| |            #                                | 3756885 Elapsed Time: 0:00:01


fetched chunk 1/2 9s


| |       #                                      | 985399 Elapsed Time: 0:00:00


fetched chunk 2/2 12s
Data loaded 13s


| |      #                                       | 360907 Elapsed Time: 0:00:00


fetched chunk 1/1 8s
Data loaded 8s
Output cleaning...
fix uniq
ffill if the current price is None...
Check liquidity...
Fix liquidity...
Ok.
Check missed dates...
Ok.
Normalization...
Output cleaning is complete.


NOTICE: The environment variable OUTPUT_PATH was not specified. The default value is 'fractions.nc.gz'


Write output: fractions.nc.gz


NOTICE: The environment variable OUT_STATE_PATH was not specified. The default value is 'state.out.pickle.gz'


State saved.
---
Run First Iteration...


| | #                                             | 15975 Elapsed Time: 0:00:00
| |                    #                        | 3019809 Elapsed Time: 0:00:02


fetched chunk 1/2 8s


| |        #                                     | 924969 Elapsed Time: 0:00:00


fetched chunk 2/2 11s
Data loaded 11s
---
Run all iterations...
Load data...


| | #                                             | 15975 Elapsed Time: 0:00:00
| |            #                                | 3076413 Elapsed Time: 0:00:01


fetched chunk 1/15 3s


| |            #                                | 3086427 Elapsed Time: 0:00:01


fetched chunk 2/15 6s


| |            #                                | 3187772 Elapsed Time: 0:00:01


fetched chunk 3/15 9s


| |         #                                   | 2644762 Elapsed Time: 0:00:00


fetched chunk 4/15 11s


| |          #                                  | 3015779 Elapsed Time: 0:00:01


fetched chunk 5/15 14s


| |             #                               | 3256990 Elapsed Time: 0:00:01


fetched chunk 6/15 17s


| |                   #                         | 3319656 Elapsed Time: 0:00:01


fetched chunk 7/15 21s


| |            #                                | 3327157 Elapsed Time: 0:00:01


fetched chunk 8/15 24s


| |           #                                 | 3307863 Elapsed Time: 0:00:01


fetched chunk 9/15 26s


| |               #                             | 3355042 Elapsed Time: 0:00:01


fetched chunk 10/15 30s


| |           #                                 | 3345749 Elapsed Time: 0:00:01


fetched chunk 11/15 33s


| |           #                                 | 3411475 Elapsed Time: 0:00:01


fetched chunk 12/15 37s


| |          #                                  | 3505391 Elapsed Time: 0:00:01


fetched chunk 13/15 40s


| |           #                                 | 3728140 Elapsed Time: 0:00:01


fetched chunk 14/15 44s


| |          #                                  | 2394314 Elapsed Time: 0:00:01


fetched chunk 15/15 46s
Data loaded 48s


| |          #                                  | 3220427 Elapsed Time: 0:00:01


fetched chunk 1/13 3s


| |          #                                  | 3225538 Elapsed Time: 0:00:01


fetched chunk 2/13 5s


| |           #                                 | 3182992 Elapsed Time: 0:00:01


fetched chunk 3/13 9s


| |           #                                 | 3071597 Elapsed Time: 0:00:01


fetched chunk 4/13 11s


| |          #                                  | 3178706 Elapsed Time: 0:00:01


fetched chunk 5/13 14s


| |           #                                 | 3355064 Elapsed Time: 0:00:01


fetched chunk 6/13 17s


| |          #                                  | 3355841 Elapsed Time: 0:00:01


fetched chunk 7/13 20s


| |           #                                 | 3343032 Elapsed Time: 0:00:01


fetched chunk 8/13 22s


| |           #                                 | 3418604 Elapsed Time: 0:00:01


fetched chunk 9/13 25s


| |           #                                 | 3362407 Elapsed Time: 0:00:01


fetched chunk 10/13 28s


| |          #                                  | 3477916 Elapsed Time: 0:00:01


fetched chunk 11/13 31s


| |           #                                 | 3693847 Elapsed Time: 0:00:01


fetched chunk 12/13 34s


| |           #                                 | 3077057 Elapsed Time: 0:00:01


fetched chunk 13/13 37s
Data loaded 38s
Backtest...


| | #                                             | 15975 Elapsed Time: 0:00:00
| |           #                                 | 3239141 Elapsed Time: 0:00:01


fetched chunk 1/13 3s


| |          #                                  | 3248898 Elapsed Time: 0:00:01


fetched chunk 2/13 5s


| |            #                                | 3201548 Elapsed Time: 0:00:01


fetched chunk 3/13 8s


| |           #                                 | 3089468 Elapsed Time: 0:00:01


fetched chunk 4/13 11s


| |            #                                | 3196148 Elapsed Time: 0:00:01


fetched chunk 5/13 14s


| |           #                                 | 3377628 Elapsed Time: 0:00:01


fetched chunk 6/13 17s


| |          #                                  | 3378740 Elapsed Time: 0:00:01


fetched chunk 7/13 19s


| |           #                                 | 3365702 Elapsed Time: 0:00:01


fetched chunk 8/13 22s


| |          #                                  | 3442332 Elapsed Time: 0:00:01


fetched chunk 9/13 25s


| |           #                                 | 3385522 Elapsed Time: 0:00:01


fetched chunk 10/13 28s


| |              #                              | 3502810 Elapsed Time: 0:00:01


fetched chunk 11/13 31s


| |           #                                 | 3721838 Elapsed Time: 0:00:01


fetched chunk 12/13 34s


| |          #                                  | 3100271 Elapsed Time: 0:00:01


fetched chunk 13/13 36s
Data loaded 38s
Output cleaning...
fix uniq
ffill if the current price is None...
Check liquidity...
Fix liquidity...
Ok.
Check missed dates...
Ok.
Normalization...
Output cleaning is complete.


NOTICE: The environment variable OUTPUT_PATH was not specified. The default value is 'fractions.nc.gz'


Write output: fractions.nc.gz


NOTICE: The environment variable OUT_STATE_PATH was not specified. The default value is 'state.out.pickle.gz'


State saved.
---
Analyze results...
Check...
Check liquidity...
Ok.
Check missed dates...
Ok.
Check the sharpe ratio...
Period: 2006-01-01 - 2025-01-16
Sharpe Ratio = 0.23749444723496077


ERROR! The Sharpe Ratio is too low. 0.23749444723496077 < 0.7
Improve the strategy and make sure that the in-sample Sharpe Ratio more than 0.7.


---
Align...
Calc global stats...
---
Calc stats per asset...
Build plots...
---
Output:


asset,NAS:AAL,NAS:AAPL,NAS:ABNB,NAS:ACGL,NAS:ADBE,NAS:ADI,NAS:ADP,NAS:ADSK,NAS:AEP,NAS:AKAM
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2025-01-02,0.0,0.006329,0.006329,0.0,0.0,0.0,0.0,0.0,0.0,0.006329
2025-01-03,0.0,0.00578,0.00578,0.00578,0.00578,0.0,0.0,0.0,0.0,0.00578
2025-01-06,0.0,0.005025,0.005025,0.0,0.0,0.0,0.005025,0.0,0.005025,0.005025
2025-01-07,0.0,0.004505,0.004505,0.0,0.004505,0.0,0.004505,0.0,0.0,0.004505
2025-01-08,0.0,0.0,0.00495,0.00495,0.0,0.0,0.00495,0.0,0.0,0.00495
2025-01-10,0.0,0.0,0.003968,0.0,0.003968,0.003968,0.003968,0.0,0.003968,0.003968
2025-01-13,0.0,0.0,0.004149,0.0,0.004149,0.0,0.004149,0.0,0.004149,0.004149
2025-01-14,0.0,0.0,0.004444,0.004444,0.004444,0.004444,0.0,0.0,0.004444,0.004444
2025-01-15,0.0,0.0,0.004367,0.004367,0.0,0.0,0.0,0.004367,0.004367,0.004367
2025-01-16,0.0,0.0,0.004464,0.004464,0.0,0.004464,0.0,0.0,0.0,0.004464


Stats:


field,equity,relative_return,volatility,underwater,max_drawdown,sharpe_ratio,mean_return,bias,instruments,avg_turnover,avg_holding_time
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2025-01-02,2.582416,-0.004117,0.213042,-0.074082,-0.619445,0.228306,0.048639,1.0,480.0,0.329041,5.703236
2025-01-03,2.610179,0.010751,0.213033,-0.064127,-0.619445,0.230904,0.04919,1.0,482.0,0.329108,5.701718
2025-01-06,2.605773,-0.001688,0.213013,-0.065707,-0.619445,0.230463,0.049091,1.0,485.0,0.329181,5.700207
2025-01-07,2.597949,-0.003003,0.212993,-0.068512,-0.619445,0.229696,0.048924,1.0,489.0,0.329233,5.698984
2025-01-08,2.600839,0.001112,0.212972,-0.067476,-0.619445,0.229946,0.048972,1.0,489.0,0.329263,5.698201
2025-01-10,2.554431,-0.017843,0.212989,-0.084116,-0.619445,0.225448,0.048018,1.0,492.0,0.329304,5.697161
2025-01-13,2.57651,0.008644,0.212976,-0.076199,-0.619445,0.227534,0.048459,1.0,495.0,0.329359,5.69628
2025-01-14,2.594644,0.007038,0.21296,-0.069697,-0.619445,0.229231,0.048817,1.0,497.0,0.32938,5.695474
2025-01-15,2.616472,0.008413,0.212947,-0.061871,-0.619445,0.231262,0.049246,1.0,498.0,0.329405,5.694677
2025-01-16,2.636021,0.007472,0.212932,-0.054862,-0.619445,0.233064,0.049627,1.0,498.0,0.329431,5.693318


---


100% (5043 of 5043) |####################| Elapsed Time: 0:20:16 Time:  0:20:16
