In [1]:
import pandas as pd
import numpy as np

In [48]:
price = pd.read_pickle('prices.pkl')
price.sort_index(level='date', ascending=True, inplace=True)
price

Unnamed: 0_level_0,Unnamed: 1_level_0,adj_close,adj_open,adj_high,adj_low,volume
ticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A,2000-01-03 00:00:00,46.867298,51.261107,51.384785,43.859980,3343600.0
AAPL,2000-01-03 00:00:00,0.846070,0.792633,0.850303,0.768598,4783900.0
ABT,2000-01-03 00:00:00,10.827467,10.904806,11.136823,10.750128,10635000.0
ADBE,2000-01-03 00:00:00,16.277607,16.697209,16.759281,15.952353,1846100.0
ADI,2000-01-03 00:00:00,28.667898,29.720018,29.840806,27.971782,1827800.0
...,...,...,...,...,...,...
XYL,2024-02-27 00:00:00,125.690000,125.920000,126.415000,124.740000,932171.0
YUM,2024-02-27 00:00:00,137.040000,138.060000,138.170000,136.640000,1824286.0
ZBH,2024-02-27 00:00:00,126.370000,126.620000,127.062400,126.080000,829863.0
ZBRA,2024-02-27 00:00:00,275.780000,279.560000,281.039000,274.480000,194585.0


In [50]:
def factor_construction(df):
    #biasvol
    MAVOL = df['volume'].rolling(20, min_periods=1).mean()
    df['BIASVOL'] = (df['volume'] - MAVOL)/(MAVOL + 0.0001)

    #BullPower=HIGH-EMA(CLOSE,N)
    ema_close = df['adj_close'].ewm(com=0.5).mean(20)
    df['ERBE'] = df['adj_low'] - ema_close

    #BearPower=LOW-EMA(CLOSE,N)
    df['ERBU'] = df['adj_high'] - ema_close
    
    #FI=(CLOSE-REF(CLOSE,1))*VOLUME, Use the change of price to measure the trend of price, and use the volume to measure the strength of the trend.
    df['FI']  = (df['adj_close'] - df['adj_close'].shift(1)) * df['volume']

    #HMA=MA(HIGH,N)
    df['HMA'] = df['adj_high'].rolling(20, min_periods=1).mean()

    #PVO=(EMA(VOLUME,N1)-EMA(VOLUME,N2))/EMA(VOLUME,N2), The exponential moving average of trading volume is used to reflect the change of trading volume
    emav1 = df['volume'].ewm(span=20,min_periods=1).mean(20)
    emav2 = df['volume'].ewm(span=40,min_periods=1).mean(40)
    df['PVO'] = (emav1 - emav2)/(emav2 + 0.0001)

    #DPO=CLOSE-REF(MA(CLOSE,N),N/2+1), DPO is the difference between the current price and the delayed moving average, which reduces the influence of long-term trends on short-term price movements by removing the moving average of the previous period of time.
    MA = df['adj_close'].rolling(20, min_periods=1).mean()
    df['DPO'] = df['adj_close'] - MA.shift(periods=int(20/2)+1)

    #UPPER=MAX(HIGH,N),LOWER=MIN(LOW,N), MIDDLE=(UPPER+LOWER)/2, The DC index uses the n-day high and n-day low to construct the upper and lower tracks of price change.
    upper = df['adj_high'].rolling(20, min_periods=1).max()
    lower = df['adj_low'].rolling(20, min_periods=1).min()
    df['DC'] = (upper + lower) / 2


    #alpha101-6
    df['alpha6'] = df['adj_open'].rolling(10).corr(df['volume'])

    #alpha101-12, (sign(delta(volume, 1)) * (-1 * delta(close, 1)))
    vol_delta = df['volume'].diff()
    vol_sign = vol_delta/abs(vol_delta)
    close_delta = df['adj_close'].diff()
    df['alpha12'] = vol_sign * (-1) * close_delta

    df = df.resample('M',on='date').last()

    return df[['BIASVOL','ERBE','ERBU','FI','HMA','PVO','DPO','DC','alpha6','alpha12']]
     

In [51]:
price.reset_index(inplace=True)
factor = price.groupby('ticker').apply(factor_construction)
factor

Unnamed: 0_level_0,Unnamed: 1_level_0,BIASVOL,ERBE,ERBU,FI,HMA,PVO,DPO,DC,alpha6,alpha12
ticker,date,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
A,2000-01-31,-0.536376,-1.364868,0.509824,-9.058759e+05,45.281946,-0.106449,-0.039779,44.611809,0.583894,-1.217248
A,2000-02-29,-0.418271,-1.463950,1.074695,1.293648e+06,57.812114,-0.011302,20.707209,59.121143,0.019105,1.887711
A,2000-03-31,0.376512,-9.983452,0.431503,-1.738126e+06,85.272445,-0.004093,-9.280701,82.017771,-0.800328,0.650935
A,2000-04-30,-0.285363,-1.602023,0.428894,-1.756441e+06,64.212429,-0.030074,-13.641638,64.462063,-0.868263,-1.379982
A,2000-05-31,-0.134592,-0.217126,3.323959,-3.592037e+06,52.946377,0.215728,-9.458081,49.471037,0.294525,-1.139136
...,...,...,...,...,...,...,...,...,...,...,...
ZTS,2023-10-31,0.869302,-0.834121,1.600359,1.197742e+06,169.593016,0.080816,-17.877163,164.766911,-0.805695,-0.373260
ZTS,2023-11-30,0.282459,-2.950261,0.272431,2.266943e+06,174.057786,-0.034319,12.453762,167.921701,-0.347597,-0.878009
ZTS,2023-12-31,-0.419951,-0.960687,0.795331,2.110408e+05,192.351182,-0.084008,16.522532,188.522484,0.657172,-0.209525
ZTS,2024-01-31,0.135963,-1.625424,3.714576,-7.741048e+06,194.328013,0.018006,-7.755513,192.341439,-0.137218,3.620000


In [52]:
factor.to_pickle('factor_TP.pkl')