In [1]:
import pandas as pd
import nasdaqdatalink as ndl
import json
import numpy as np
from typing import List, Callable, Dict
from realgam.quantlib import general_utils as gu
from realgam.quantlib.engineer.op_engineer_vect import OpEngineerV
from realgam.quantlib.engineer.ta_engineer_vect import TalibEngineerV
from realgam.quantlib.engineer.alpha_engineer_vect import AlphaEngineerV

import talib
from joblib import Parallel, delayed
import datetime
import os
import time
PROJECT_PATH = os.getenv('QuantSystemMVP')
DATA_PATH = f'{PROJECT_PATH}/Data/historical/stock_hist_perma.obj'

In [16]:
stocks_df, stocks_extended_df, available_tickers = gu.load_file(DATA_PATH)
stacked_hist = stocks_df.copy()

In [17]:
openg = OpEngineerV(stacked_hist, 'permaticker', 'date')

In [18]:
openg.ts_mean('volume', 20, inplace=True)

In [19]:
stacked_hist = openg.df.copy()

In [10]:
stacked_hist.columns

Index(['ticker', 'open', 'high', 'low', 'close', 'openadj', 'highadj',
       'lowadj', 'closeadj', 'volume', 'ts_mean20_volume'],
      dtype='object')

In [6]:
stacked_hist = stacked_hist[~stacked_hist['ts_mean20_volume'].isnull()]

In [7]:
openg.set_df(stacked_hist)

In [8]:
openg.cs_pctrank('ts_mean20_volume', inplace=True)

In [9]:
stacked_hist = openg.df.copy()

In [10]:
uni_top30cap = stacked_hist[stacked_hist['cs_pctrank_ts_mean20_volume'] >= 0.7]

In [11]:
openg.set_df(uni_top30cap)
openg.ts_ret(inplace=True)
uni_top30cap = openg.df.copy()

openg.ts_retn('closeadj', 20, inplace=True)
uni_top30cap = openg.df.copy()
uni_top30cap['fwd_ret20'] = uni_top30cap.sort_values(['permaticker', 'date']).groupby('permaticker')['ts_retn20_closeadj'].shift(-20)

uni_top30cap.rename(columns={'ts_ret_closeadj': 'returns'}, inplace=True)

In [21]:
stacked_hist[stacked_hist.index == ('105193', pd.Timestamp('2022-06-14'))]

Unnamed: 0_level_0,Unnamed: 1_level_0,ticker,open,high,low,close,openadj,highadj,lowadj,closeadj,volume,ts_mean20_volume
permaticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
105193,2022-06-14,APLD,1.26,1.26,0.98,1.03,1.26,1.26,0.98,1.03,2508781.0,727427.85


In [22]:
uni_top30cap = uni_top30cap.drop(columns = 'ts_retn20_closeadj')

In [23]:
save_path = os.path.join(PROJECT_PATH, 'Data/Projects/Alpha_ML/universe_top30.obj')
gu.save_file(save_path, uni_top30cap)

In [24]:
uni_top30cap

Unnamed: 0_level_0,Unnamed: 1_level_0,ticker,open,high,low,close,openadj,highadj,lowadj,closeadj,volume,ts_mean20_volume,cs_pctrank_ts_mean20_volume,returns,fwd_ret20
permaticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
105193,2022-06-14,APLD,1.26,1.26,0.98,1.03,1.26,1.26,0.98,1.03,2508781.0,727427.85,0.720450,,-0.128155
105193,2022-06-15,APLD,1.02,1.08,0.95,1.01,1.02,1.08,0.95,1.01,1481018.0,770604.65,0.729135,-0.019417,0.029703
105193,2022-06-16,APLD,1.00,1.05,0.95,1.03,1.00,1.05,0.95,1.03,843827.0,800672.40,0.734084,0.019802,0.029126
105193,2022-06-17,APLD,1.03,1.08,1.02,1.08,1.03,1.08,1.02,1.08,697612.0,823295.70,0.730007,0.048544,0.962963
105193,2022-06-21,APLD,1.14,1.29,1.14,1.21,1.14,1.29,1.14,1.21,1355982.0,775654.05,0.718918,0.120370,1.057851
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
638932,2022-09-21,FRZA,2.29,2.35,2.18,2.19,2.29,2.35,2.18,2.19,120086.0,1022149.35,0.786851,-0.039474,
638932,2022-09-22,FRZA,2.23,2.88,2.15,2.53,2.23,2.88,2.15,2.53,2405970.0,1092366.55,0.795149,0.155251,
638932,2022-09-23,FRZA,2.47,2.47,2.10,2.14,2.47,2.47,2.10,2.14,319682.0,1077465.65,0.786451,-0.154150,
638933,2022-09-09,CHG,2.66,2.77,2.57,2.68,2.66,2.77,2.57,2.68,76304.0,785028.30,0.765478,,


In [26]:
import gc

In [28]:
del stacked_hist
del uni_top30cap
gc.collect()

932