# 6. 數據分析專案
# Demo4 – TA Systematic Testing

### Import data

In [3]:
import pandas as pd
import numpy as np
import talib
from talib import MA_Type
import math
import yfinance as yf
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

ModuleNotFoundError: No module named 'talib'

In [None]:
df_nvda = yf.download('NVDA', start='2003-01-01',
                      end='2023-01-01')
df_nvda

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2003-01-02,1.000000,1.037500,0.965833,1.025833,0.941203,130254000
2003-01-03,1.029167,1.062500,1.008333,1.025833,0.941203,103749600
2003-01-06,1.050833,1.125833,1.050000,1.109167,1.017663,103342800
2003-01-07,1.130833,1.156667,1.093333,1.105833,1.014604,149827200
2003-01-08,1.077500,1.109167,0.988333,0.995833,0.913678,185252400
...,...,...,...,...,...,...
2022-12-23,151.960007,153.389999,148.830002,152.059998,152.018448,34932600
2022-12-27,150.740005,151.000000,140.559998,141.210007,141.171417,46490200
2022-12-28,139.270004,142.619995,138.839996,140.360001,140.321640,35106600
2022-12-29,144.020004,146.830002,142.270004,146.029999,145.990097,35492300


### Label Target

In [None]:
df_nvda['Log_rtn'] = df_nvda['Close'].pct_change(1)
df_nvda['Target'] = df_nvda['Log_rtn'].shift(-1)

In [None]:
df_nvda

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Log_rtn,Target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2003-01-02,1.000000,1.037500,0.965833,1.025833,0.941203,130254000,,0.000000
2003-01-03,1.029167,1.062500,1.008333,1.025833,0.941203,103749600,0.000000,0.081235
2003-01-06,1.050833,1.125833,1.050000,1.109167,1.017663,103342800,0.081235,-0.003006
2003-01-07,1.130833,1.156667,1.093333,1.105833,1.014604,149827200,-0.003006,-0.099473
2003-01-08,1.077500,1.109167,0.988333,0.995833,0.913678,185252400,-0.099473,0.017573
...,...,...,...,...,...,...,...,...
2022-12-23,151.960007,153.389999,148.830002,152.059998,152.018448,34932600,-0.008671,-0.071353
2022-12-27,150.740005,151.000000,140.559998,141.210007,141.171417,46490200,-0.071353,-0.006019
2022-12-28,139.270004,142.619995,138.839996,140.360001,140.321640,35106600,-0.006019,0.040396
2022-12-29,144.020004,146.830002,142.270004,146.029999,145.990097,35492300,0.040396,0.000753


### Add indicators
We add indicators to our DF and drop the NaN.

In [None]:
df_nvda['rsi'] = talib.RSI(df_nvda['Close'], timeperiod=14)
df_nvda['k'],df_nvda['d'] = talib.STOCH(df_nvda['High'], df_nvda['Low'], df_nvda['Close'],14)
df_nvda['macd'], df_nvda['macdsignal'], df_nvda['macdhist'] = talib.MACD(df_nvda['Close'],
                                                                         fastperiod=12, slowperiod=26, signalperiod=9)
df_nvda['adosc'] = talib.ADOSC(df_nvda['High'], df_nvda['Low'], df_nvda['Close'],
                               df_nvda['Volume'],fastperiod=3, slowperiod=10)
df_nvda['atr'] = talib.ATR(df_nvda['High'], df_nvda['Low'], df_nvda['Close'],timeperiod=14)
df_nvda['tr'] = talib.TRANGE(df_nvda['High'], df_nvda['Low'], df_nvda['Close'])

In [None]:
df_nvda = df_nvda.dropna()

### Define how the signal raise

There a few ways to to define the signal. Here we use the straightforward
one. For RSI below 30, we define it as oversold and buy signal, over 70 is
sell signal. 1 denoted as buy, -1 denoted as sell, 0 denoted as hold.

In [None]:
# RSI 30 oversold, 70 overbought
df_nvda.loc[df_nvda.index, ['rsi_signal']] = np.where(df_nvda['rsi']<30,1,
                                                      np.where(df_nvda['rsi']>70, -1, 0)).tolist()

For Stochastic, if both k and d below 20, and k is larger than d,
it is buy signal.

In [None]:
# STOCHASTIC k & d below 20, and k>d, buy for oversold
df_nvda.loc[df_nvda.index, ['stoch_signal']] = np.where(
    (df_nvda['k']<20)&(df_nvda['d']<20)&(df_nvda['k']<df_nvda['d']), 1,
    np.where((df_nvda['k']>80)&(df_nvda['d']>80)&(df_nvda['k']<df_nvda['d']),-1,0)).tolist()

For MACD, we denote buy as MACD histogram is larger than 0.
Sell as MACD histogram is smaller than 0.

In [None]:
# MACD-histogram > 0, buy signal
df_nvda.loc[df_nvda.index, ['macd_signal']] = np.where(df_nvda['macdhist']>0, 1,
                                                       np.where(df_nvda['macdhist']<0, -1, 0)).tolist()

ADOSC is Accumulation/Distribution Oscillator.
For ADOSC, the threshold is 0. If its previous value is under 0, and now over 0, it is
buy signal. Vice versa for sell.

In [None]:
# ADOSC: 0 is threshold line, from <0 to >0, buy signal
df_nvda.loc[df_nvda.index, ['adosc_signal']] = np.where(
    (df_nvda['adosc'].shift(1)<0) & (df_nvda['adosc']>0), 1,
    np.where((df_nvda['adosc'].shift(1)>0) & (df_nvda['adosc']<0), -1, 0)).tolist()

ATR is Average True Range of 14-days period. TR is today’s True Range.
For ATR, if TR is larger than ATR, it is a buy signal.

In [None]:
# if current close higher than average range, buy signal
df_nvda.loc[df_nvda.index, ['atr_signal']] = np.where(df_nvda['tr']>df_nvda['atr'], 1,
                                                np.where(df_nvda['tr']<df_nvda['atr'], -1, 0).tolist())
    

### Create features

We gather the signals as features.

In [None]:
feats = df_nvda[['rsi_signal','stoch_signal','macd_signal',
                'adosc_signal','atr_signal','Log_rtn','Target']].copy()

In [None]:
feats.sample(3)

Unnamed: 0_level_0,rsi_signal,stoch_signal,macd_signal,adosc_signal,atr_signal,Log_rtn,Target
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2009-06-05,0.0,-1.0,1.0,0.0,-1.0,-0.00551,-0.00554
2006-12-01,0.0,0.0,-1.0,0.0,1.0,-0.0392,0.040518
2013-10-11,0.0,0.0,-1.0,0.0,-1.0,-0.008447,0.015072


### Check the indicators results

For RSI indicator performance, we may check how its buy/sell signal
compare to actual Target.

In [None]:
# correctly predicted buy
feats['Target'].loc[(feats['rsi_signal']==1)&(feats['Target']>0)].count()

78

In [None]:
# mean of correctly predict buy log_return
feats['Target'].loc[(feats['rsi_signal']==1)&(feats['Target']>0)].mean()

0.03375603065712058

In [None]:
# incorrectly predicted buy
feats['Target'].loc[(feats['rsi_signal']==1)&(feats['Target']<0)].count()

77

In [None]:
# mean of correctly predict buy log_return
feats['Target'].loc[(feats['rsi_signal']==1)&(feats['Target']<0)].mean()

-0.029394032496178

### Define a function to return the result

In [None]:
def indicator_result(indicator, feats):
    buy_correct_time = feats['Target'].loc[(feats[indicator]==1)&(feats['Target']>0)].count()
    buy_correct_mean = feats['Target'].loc[(feats[indicator]==1)&(feats['Target']>0)].mean()
    buy_incorrect_time = feats['Target'].loc[(feats[indicator]==1)&(feats['Target']<0)].count()
    buy_incorrect_mean = feats['Target'].loc[(feats[indicator]==1)&(feats['Target']<0)].mean()
    buy_correct_ratio = buy_correct_time / (buy_correct_time + buy_incorrect_time)
    
    sell_correct_time = feats['Target'].loc[(feats[indicator]==-1)&(feats['Target']<0)].count()
    sell_correct_mean = feats['Target'].loc[(feats[indicator]==-1)&(feats['Target']<0)].mean()
    sell_incorrect_time = feats['Target'].loc[(feats[indicator]==-1)&(feats['Target']>0)].count()
    sell_incorrect_mean = feats['Target'].loc[(feats[indicator]==-1)&(feats['Target']>0)].mean()
    sell_correct_ratio = sell_correct_time / (sell_correct_time + sell_incorrect_time)
    return [ indicator, f"buy_correct_ratio: {buy_correct_ratio:.4f}",
                        f"buy_correct_mean: {buy_correct_mean*100:.4f}",
                        f"buy_incorrect_ratio: {sell_correct_ratio:.4f}",
                        f"buy_incorrect_mean: {sell_correct_mean*100:.4f}%"]
    
    

### Indicator’s result

#### RSI

In [None]:
indicator_result('rsi_signal', feats)

['rsi_signal',
 'buy_correct_ratio: 0.5032',
 'buy_correct_mean: 3.3756',
 'buy_incorrect_ratio: 0.4790',
 'buy_incorrect_mean: -1.5680%']

#### STOCHASTIC

In [None]:
indicator_result('stoch_signal', feats)

['stoch_signal',
 'buy_correct_ratio: 0.4972',
 'buy_correct_mean: 2.4852',
 'buy_incorrect_ratio: 0.4675',
 'buy_incorrect_mean: -1.7943%']

#### MACD

In [None]:
indicator_result('macd_signal', feats)

['macd_signal',
 'buy_correct_ratio: 0.5177',
 'buy_correct_mean: 2.2111',
 'buy_incorrect_ratio: 0.4691',
 'buy_incorrect_mean: -2.2187%']

#### ADOSC

In [None]:
indicator_result('adosc_signal', feats)

['adosc_signal',
 'buy_correct_ratio: 0.5451',
 'buy_correct_mean: 1.8422',
 'buy_incorrect_ratio: 0.4721',
 'buy_incorrect_mean: -2.4982%']

#### ATR/TR

In [None]:
indicator_result('atr_signal', feats)

### Discovery the statistics

It looks like the overall buy signal is slightly overperform which
over 50% a bit. However the sell signal is slightly underperform.
Can you suggest a way to make a better signal?

In [None]:
indicator_result('adosc_signal', feats)

['adosc_signal',
 'buy_correct_ratio: 0.5451',
 'buy_correct_mean: 1.8422',
 'buy_incorrect_ratio: 0.4721',
 'buy_incorrect_mean: -2.4982%']

### Comprehensive Analyse

We perform buy if triggered by only 1 indicator, we may have 713 correct, and 656 wrong.

In [None]:
feats['Comp_signal'] = np.where((np.where(feats['rsi_signal']==1,1,0) +
            np.where(feats['stoch_signal']==1,1,0)+
            np.where(feats['macd_signal']==1,1,0)+
            np.where(feats['adosc_signal']==1,1,0)+
            np.where(feats['atr_signal']==1,1,0)
        )>1 , 1, 0 )

In [None]:
feats['Target'].loc[(feats['Comp_signal']==1) & (feats['Target']>0)].count()

724

In [None]:
feats['Target'].loc[(feats['Comp_signal']==1) & (feats['Target']<0)].count()

655

We perform buy if triggered by at least 2 indicator, we may have 51 correct, and 47
wrong.

In [None]:
feats['Comp_signal'] = np.where((np.where(feats['rsi_signal']==1,1,0) +
            np.where(feats['stoch_signal']==1,1,0)+
            np.where(feats['macd_signal']==1,1,0)+
            np.where(feats['adosc_signal']==1,1,0)+
            np.where(feats['atr_signal']==1,1,0)
        )>2 , 1, 0 )

In [None]:
feats['Target'].loc[(feats['Comp_signal']==1) & (feats['Target']<0)].count()

58

In [None]:
feats['Target'].loc[(feats['Comp_signal']==1) & (feats['Target']<0)].count()

58