In [1]:
import pandas as pd
import nasdaqdatalink as ndl
import json
import numpy as np
from typing import List, Callable, Dict
from realgam.quantlib import general_utils as gu
from realgam.quantlib.engineer.op_engineer_vect import OpEngineerV
from realgam.quantlib.engineer.ta_engineer_vect import TalibEngineerV
from realgam.quantlib.engineer.alpha_engineer_vect import AlphaEngineerV

import talib
from joblib import Parallel, delayed
import datetime
import os
import time
PROJECT_PATH = os.getenv('QuantSystemMVP')
DATA_PATH = f'{PROJECT_PATH}/Data/historical/stock_hist_perma.obj'

In [4]:
stocks_df, stocks_extended_df, available_tickers = gu.load_file(DATA_PATH)
stacked_hist = stocks_df.copy()

In [3]:
stacked_hist

Unnamed: 0_level_0,Unnamed: 1_level_0,ticker,open,high,low,close,openadj,highadj,lowadj,closeadj,volume
permaticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
101501,2012-01-03,BBUCQ,0.03,0.030,0.030,0.03,0.03,0.030,0.030,0.03,0.0
101501,2012-01-04,BBUCQ,0.03,0.030,0.030,0.03,0.03,0.030,0.030,0.03,0.0
101501,2012-01-05,BBUCQ,0.03,0.030,0.030,0.03,0.03,0.030,0.030,0.03,0.0
101501,2012-01-06,BBUCQ,0.03,0.030,0.030,0.03,0.03,0.030,0.030,0.03,0.0
101501,2012-01-09,BBUCQ,0.03,0.030,0.030,0.03,0.03,0.030,0.030,0.03,0.0
...,...,...,...,...,...,...,...,...,...,...,...
639045,2022-09-21,THRD,16.88,17.748,16.510,16.88,16.88,17.748,16.510,16.88,453144.0
639045,2022-09-22,THRD,16.88,18.360,16.350,16.75,16.88,18.360,16.350,16.75,266993.0
639045,2022-09-23,THRD,16.49,17.109,16.065,17.00,16.49,17.109,16.065,17.00,121250.0
639056,2022-09-22,GLSTU,9.97,9.990,9.960,9.98,9.97,9.990,9.960,9.98,606430.0


In [5]:
openg = OpEngineerV(stacked_hist, 'permaticker', 'date')
openg.ts_retn('closeadj', 20, inplace=True)
openg.ts_retn('closeadj', 10, inplace=True)
openg.ts_retn('closeadj', 5, inplace=True)
stacked_hist = openg.df.copy()
stacked_hist['fwd_ret20'] = stacked_hist.sort_values(['permaticker', 'date']).groupby('permaticker')['ts_retn20_closeadj'].shift(-20)
stacked_hist['fwd_ret10'] = stacked_hist.sort_values(['permaticker', 'date']).groupby('permaticker')['ts_retn10_closeadj'].shift(-20)
stacked_hist['fwd_ret5'] = stacked_hist.sort_values(['permaticker', 'date']).groupby('permaticker')['ts_retn5_closeadj'].shift(-20)



In [6]:
stacked_hist = stacked_hist.drop(columns = ['ts_retn20_closeadj', 'ts_retn10_closeadj','ts_retn5_closeadj'])

In [7]:
stacked_hist['ma_50'] = openg.ts_mean('closeadj', 50)
stacked_hist['ma_150'] = openg.ts_mean('closeadj', 150)
stacked_hist['ma_200'] = openg.ts_mean('closeadj', 200)
stacked_hist['w52_high'] = openg.ts_max('closeadj', 250)
stacked_hist['w52_low'] = openg.ts_min('closeadj', 250)

openg.set_df(stacked_hist)
# Calculate pct change for various metrics necessary for trend template
stacked_hist['ma_200_lag1m'] = openg.ts_lag('ma_200', 20)
stacked_hist['ma_200_lag5m'] = openg.ts_lag('ma_200', 110)
stacked_hist['w52_low_pct_diff'] = stacked_hist.closeadj / stacked_hist.w52_low - 1
stacked_hist['w52_high_pct_diff'] = (stacked_hist.closeadj / stacked_hist.w52_high - 1).abs()


In [12]:
stacked_hist[stacked_hist['ticker'] == 'AAPL']

Unnamed: 0_level_0,Unnamed: 1_level_0,ticker,open,high,low,close,openadj,highadj,lowadj,closeadj,volume,...,fwd_ret5,ma_50,ma_150,ma_200,w52_high,w52_low,ma_200_lag1m,ma_200_lag5m,w52_low_pct_diff,w52_high_pct_diff
permaticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
199059,2012-01-03,AAPL,14.621,14.732,14.607,14.687,12.482653,12.577419,12.470700,12.539,302220800.0,...,0.021294,,,,,,,,,
199059,2012-01-04,AAPL,14.643,14.810,14.617,14.766,12.500993,12.643564,12.478796,12.606,260022000.0,...,0.023604,,,,,,,,,
199059,2012-01-05,AAPL,14.820,14.948,14.738,14.930,12.652091,12.761367,12.582086,12.746,271269600.0,...,0.027717,,,,,,,,,
199059,2012-01-06,AAPL,14.992,15.098,14.972,15.086,12.798752,12.889244,12.781678,12.879,318292800.0,...,0.024180,,,,,,,,,
199059,2012-01-09,AAPL,15.197,15.277,15.048,15.062,12.974255,13.042554,12.847048,12.859,394024400.0,...,0.027087,,,,,,,,,
199059,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199059,2022-09-19,AAPL,149.310,154.560,149.100,154.480,149.310000,154.560000,149.100000,154.480,79599270.0,...,,159.54238,156.782520,160.465640,181.26,129.879,160.026755,157.163375,0.189415,0.147744
199059,2022-09-20,AAPL,153.400,158.080,153.080,156.900,153.400000,158.080000,153.080000,156.900,107035944.0,...,,159.78700,156.705860,160.434710,181.26,129.879,160.111370,157.330145,0.208047,0.134393
199059,2022-09-21,AAPL,157.340,158.740,153.600,153.720,157.340000,158.740000,153.600000,153.720,100244121.0,...,,159.94826,156.582007,160.397440,181.26,129.879,160.196920,157.502880,0.183563,0.151936
199059,2022-09-22,AAPL,152.380,154.470,150.910,152.740,152.380000,154.470000,150.910000,152.740,86264792.0,...,,160.09730,156.453213,160.337945,181.26,129.879,160.281285,157.647010,0.176018,0.157343


In [20]:
stacked_hist[stacked_hist['ticker'] == 'AAPL']

Unnamed: 0_level_0,Unnamed: 1_level_0,ticker,open,high,low,close,openadj,highadj,lowadj,closeadj,volume,...,bf_sepa_ma50>ma150,bf_sepa_ma50>ma200,bf_sepa_w52lowdiff>0.2,bf_sepa_w52lowdiff>0.3,bf_sepa_w52lowdiff>0.4,bf_sepa_w52lowdiff>0.5,bf_sepa_w52highdiff<0.4,bf_sepa_w52highdiff<0.3,bf_sepa_w52highdiff<0.2,bf_sepa_w52highdiff<0.1
permaticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
199059,2012-01-03,AAPL,14.621,14.732,14.607,14.687,12.482653,12.577419,12.470700,12.539,302220800.0,...,0,0,0,0,0,0,0,0,0,0
199059,2012-01-04,AAPL,14.643,14.810,14.617,14.766,12.500993,12.643564,12.478796,12.606,260022000.0,...,0,0,0,0,0,0,0,0,0,0
199059,2012-01-05,AAPL,14.820,14.948,14.738,14.930,12.652091,12.761367,12.582086,12.746,271269600.0,...,0,0,0,0,0,0,0,0,0,0
199059,2012-01-06,AAPL,14.992,15.098,14.972,15.086,12.798752,12.889244,12.781678,12.879,318292800.0,...,0,0,0,0,0,0,0,0,0,0
199059,2012-01-09,AAPL,15.197,15.277,15.048,15.062,12.974255,13.042554,12.847048,12.859,394024400.0,...,0,0,0,0,0,0,0,0,0,0
199059,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199059,2022-09-19,AAPL,149.310,154.560,149.100,154.480,149.310000,154.560000,149.100000,154.480,79599270.0,...,1,0,0,0,0,0,1,1,1,0
199059,2022-09-20,AAPL,153.400,158.080,153.080,156.900,153.400000,158.080000,153.080000,156.900,107035944.0,...,1,0,1,0,0,0,1,1,1,0
199059,2022-09-21,AAPL,157.340,158.740,153.600,153.720,157.340000,158.740000,153.600000,153.720,100244121.0,...,1,0,0,0,0,0,1,1,1,0
199059,2022-09-22,AAPL,152.380,154.470,150.910,152.740,152.380000,154.470000,150.910000,152.740,86264792.0,...,1,0,0,0,0,0,1,1,1,0


In [8]:
stacked_hist['bf_sepa_c>ma200'] = np.where((stacked_hist['closeadj'].isnull()) | (stacked_hist['ma_200'].isnull()) , np.nan, np.where(stacked_hist['closeadj'] > stacked_hist['ma_200'], 1, 0) )

stacked_hist['bf_sepa_c>ma150'] = np.where((stacked_hist['closeadj'].isnull()) | (stacked_hist['ma_150'].isnull()) , np.nan, np.where(stacked_hist['closeadj'] > stacked_hist['ma_150'], 1, 0) )

stacked_hist['bf_sepa_c>ma50'] = np.where((stacked_hist['closeadj'].isnull()) | (stacked_hist['ma_50'].isnull()) , np.nan, np.where(stacked_hist['closeadj'] > stacked_hist['ma_50'], 1, 0) )


stacked_hist['bf_sepa_ma200trend'] = np.where((stacked_hist['ma_200_lag1m'].isnull()) | (stacked_hist['ma_200'].isnull()) , np.nan, np.where(stacked_hist['ma_200'] > stacked_hist['ma_200_lag1m'], 1, 0) )

stacked_hist['bf_sepa_ma150>ma200'] = np.where((stacked_hist['ma_150'].isnull()) | (stacked_hist['ma_200'].isnull()) , np.nan, np.where(stacked_hist['ma_150'] > stacked_hist['ma_200'], 1, 0) )

stacked_hist['bf_sepa_ma50>ma150'] = np.where((stacked_hist['ma_50'].isnull()) | (stacked_hist['ma_150'].isnull()) , np.nan, np.where(stacked_hist['ma_50'] > stacked_hist['ma_150'], 1, 0) )
stacked_hist['bf_sepa_ma50>ma200'] = np.where((stacked_hist['ma_50'].isnull()) | (stacked_hist['ma_200'].isnull()) , np.nan, np.where(stacked_hist['ma_50'] > stacked_hist['ma_200'], 1, 0) )

stacked_hist['bf_sepa_w52lowdiff>0.2'] = np.where((stacked_hist['w52_low_pct_diff'].isnull()) , np.nan, np.where(stacked_hist['w52_low_pct_diff'] >= 0.2, 1, 0) )
stacked_hist['bf_sepa_w52lowdiff>0.3'] = np.where((stacked_hist['w52_low_pct_diff'].isnull()) , np.nan, np.where(stacked_hist['w52_low_pct_diff'] >= 0.3, 1, 0) )
stacked_hist['bf_sepa_w52lowdiff>0.4'] = np.where((stacked_hist['w52_low_pct_diff'].isnull()) , np.nan, np.where(stacked_hist['w52_low_pct_diff'] >= 0.4, 1, 0) )
stacked_hist['bf_sepa_w52lowdiff>0.5'] = np.where((stacked_hist['w52_low_pct_diff'].isnull()) , np.nan, np.where(stacked_hist['w52_low_pct_diff'] >= 0.5, 1, 0) )

stacked_hist['bf_sepa_w52highdiff<0.4'] = np.where((stacked_hist['w52_high_pct_diff'].isnull()) , np.nan, np.where(stacked_hist['w52_low_pct_diff'] <= 0.4, 1, 0) )
stacked_hist['bf_sepa_w52highdiff<0.3'] = np.where((stacked_hist['w52_high_pct_diff'].isnull()) , np.nan, np.where(stacked_hist['w52_low_pct_diff'] <= 0.3, 1, 0) )
stacked_hist['bf_sepa_w52highdiff<0.2'] =  np.where((stacked_hist['w52_high_pct_diff'].isnull()) , np.nan, np.where(stacked_hist['w52_low_pct_diff'] <= 0.2, 1, 0) )
stacked_hist['bf_sepa_w52highdiff<0.1'] = np.where((stacked_hist['w52_high_pct_diff'].isnull()) , np.nan, np.where(stacked_hist['w52_low_pct_diff'] <= 0.1, 1, 0) )


In [9]:
stacked_hist[stacked_hist['ticker'] == 'AAPL'].dropna()

Unnamed: 0_level_0,Unnamed: 1_level_0,ticker,open,high,low,close,openadj,highadj,lowadj,closeadj,volume,...,bf_sepa_ma50>ma150,bf_sepa_ma50>ma200,bf_sepa_w52lowdiff>0.2,bf_sepa_w52lowdiff>0.3,bf_sepa_w52lowdiff>0.4,bf_sepa_w52lowdiff>0.5,bf_sepa_w52highdiff<0.4,bf_sepa_w52highdiff<0.3,bf_sepa_w52highdiff<0.2,bf_sepa_w52highdiff<0.1
permaticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
199059,2013-03-28,AAPL,16.065,16.137,15.772,15.809,13.917784,13.980160,13.663945,13.696,442839600.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0
199059,2013-04-01,AAPL,15.782,15.846,15.277,15.318,13.672994,13.728441,13.235479,13.271,389732000.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0
199059,2013-04-02,AAPL,15.271,15.648,15.229,15.350,13.229561,13.556163,13.193175,13.298,529519200.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0
199059,2013-04-03,AAPL,15.406,15.617,15.368,15.428,13.346940,13.529740,13.314019,13.366,363216000.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0
199059,2013-04-04,AAPL,15.492,15.536,15.188,15.276,13.421126,13.459245,13.157763,13.234,358447600.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0
199059,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199059,2022-08-19,AAPL,173.030,173.740,171.310,171.520,173.030000,173.740000,171.310000,171.520,69409796.0,...,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
199059,2022-08-22,AAPL,169.690,169.860,167.135,167.570,169.690000,169.860000,167.135000,167.570,67755486.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
199059,2022-08-23,AAPL,167.080,168.710,166.650,167.230,167.080000,168.710000,166.650000,167.230,53709564.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
199059,2022-08-24,AAPL,167.320,168.110,166.245,167.530,167.320000,168.110000,166.245000,167.530,53841524.0,...,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
