Make sure you understand that when you use opfunction rank in AlphaEngineer it is the rank of the inputted assets in the universe. Since we defined our topcap30 universe, we cannot use that to compute our alpha factors because there will be gaps in each stock's time series (price is nan for example ฟันหลอ). Instead we have to use the original ohlcv dataset we have because that data is complete. One problem is the opfunction rank will not compute over topcap30 universe but over the whole stock universe.

In [1]:
import pandas as pd
import nasdaqdatalink as ndl
import json
import numpy as np
from typing import List, Callable, Dict
from realgam.quantlib import general_utils as gu
from realgam.quantlib.engineer.op_engineer_vect import OpEngineerV
from realgam.quantlib.engineer.ta_engineer_vect import TalibEngineerV
from realgam.quantlib.engineer.alpha_engineer_vect import AlphaEngineerV
from realgam.quantlib.engineer.alpha_engineer import AlphaEngineer

from sklearn.feature_selection import mutual_info_regression
from scipy.stats import spearmanr
import matplotlib.pyplot as plt

from talib import WMA
import talib
from joblib import Parallel, delayed
import datetime
import os
import time
PROJECT_PATH = os.getenv('QuantSystemMVP')
DATA_PATH = f'{PROJECT_PATH}/Data/historical/stock_hist_perma.obj'

In [2]:
# uni_path = os.path.join(PROJECT_PATH, 'Data/Projects/Alpha_ML/universe_top30.obj')
# uni_df = gu.load_file(uni_path)

In [2]:
stocks_df, stocks_extended_df, available_tickers = gu.load_file(DATA_PATH)
stacked_hist = stocks_df.copy()

In [3]:
openg = OpEngineerV(stacked_hist, 'permaticker', 'date')
openg.ts_ret(inplace=True)
uni_df = openg.df.copy()
uni_df.rename(columns={'ts_ret_closeadj': 'returns'}, inplace=True)

In [4]:
candle_name_dict = {'o': 'openadj', 'h': 'highadj', 'l': 'lowadj',
                    'c': 'closeadj', 'v': 'volume', 'r': 'returns'}

In [5]:
ae = AlphaEngineer(uni_df, 'permaticker', 'date', candle_name_dict)

In [6]:
def run_eng(alpha_engineer, method_name):

    return getattr(alpha_engineer, method_name)()

alpha_eng_index = [i for i in range(1,31,1)]
# alpha_eng_index = [1,2]
alpha_eng_names = [f'alpha{index}' for index in alpha_eng_index]

# logger.info('Begin')
s_time_chunk = time.time()
eng_values = Parallel(n_jobs=-1)(delayed(run_eng)(ae, eng_name) for eng_name in alpha_eng_names)
e_time_chunk = time.time()
print(f"Total time: {e_time_chunk - s_time_chunk} sec")

Total time: 1290.7024109363556 sec


In [7]:
for eng_name, eng_value in zip(alpha_eng_names, eng_values):
    uni_df[eng_name] = eng_value


In [8]:
SAVE_PATH = f'{PROJECT_PATH}/Data/Projects/Alpha_ML/alpha_eng_30.obj'
gu.save_file(SAVE_PATH, uni_df)

In [9]:
uni_df

Unnamed: 0_level_0,Unnamed: 1_level_0,ticker,open,high,low,close,openadj,highadj,lowadj,closeadj,volume,...,alpha21,alpha22,alpha23,alpha24,alpha25,alpha26,alpha27,alpha28,alpha29,alpha30
permaticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
101501,2012-01-03,BBUCQ,0.03,0.030,0.030,0.03,0.03,0.030,0.030,0.03,0.0,...,1,,0.0,,,,-0.0,,,
101501,2012-01-04,BBUCQ,0.03,0.030,0.030,0.03,0.03,0.030,0.030,0.03,0.0,...,1,,0.0,,,,-0.0,,,
101501,2012-01-05,BBUCQ,0.03,0.030,0.030,0.03,0.03,0.030,0.030,0.03,0.0,...,1,,0.0,,,,-0.0,,,
101501,2012-01-06,BBUCQ,0.03,0.030,0.030,0.03,0.03,0.030,0.030,0.03,0.0,...,1,,0.0,-0.00,,,-0.0,,,
101501,2012-01-09,BBUCQ,0.03,0.030,0.030,0.03,0.03,0.030,0.030,0.03,0.0,...,1,,0.0,-0.00,,,-0.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
639045,2022-09-21,THRD,16.88,17.748,16.510,16.88,16.88,17.748,16.510,16.88,453144.0,...,1,,0.0,1.12,,,-0.0,,,
639045,2022-09-22,THRD,16.88,18.360,16.350,16.75,16.88,18.360,16.350,16.75,266993.0,...,1,,0.0,0.25,,,-0.0,,,
639045,2022-09-23,THRD,16.49,17.109,16.065,17.00,16.49,17.109,16.065,17.00,121250.0,...,1,,0.0,-0.20,,,-0.0,,,
639056,2022-09-22,GLSTU,9.97,9.990,9.960,9.98,9.97,9.990,9.960,9.98,606430.0,...,1,,0.0,,,,-0.0,,,
