In [None]:
import pandas as pd
import numpy as np
from ta import add_all_ta_features

In [None]:
def momentum_235(data, factors):
    '''
    Calculates 2,3,5-day momentums of open price
    '''
    data['Mom_2day'] = data.groupby('tic')['prcod'].pct_change(periods=2)
    factors.append('Mom_2day')
    data['Mom_3day'] = data.groupby('tic')['prcod'].pct_change(periods=3)
    factors.append('Mom_3day')
    data['Mom_5day'] = data.groupby('tic')['prcod'].pct_change(periods=5)
    factors.append('Mom_5day')
    return data, factors

In [None]:
def MA_1050(data, factors):
    '''
    Calculates 10,50-day simple moving averages of open prices
    '''
    data['MA_10day'] = data.groupby('tic')['prcod'].transform(lambda x: x.rolling(10, min_periods=1).mean())
    data['MA_50day'] = data.groupby('tic')['prcod'].transform(lambda x: x.rolling(50, min_periods=1).mean())
    factors.append('MA_10day')
    factors.append('MA_50day')
    return data, factors

In [None]:
def price_vs_MA(data, factors):
    '''
    Calculates the ratio of open prices and 10,50-day moving averages
    '''
    data['open/MA10'] = data['prcod'] / data['MA_10day']
    data['open/MA50'] = data['prcod'] / data['MA_50day']
    factors.append('open/MA10')
    factors.append('open/MA50')
    return data, factors

In [None]:
def STD_10(data, factors):
    '''
    Calculates the 10-day moving standard deviation of open price
    '''
    data['STD_10day'] = data.groupby('tic')['prcod'].transform(lambda x: x.rolling(10, min_periods=1).std())
    factors.append('STD_10day')  
    return data, factors

In [None]:
def H_L(data, factors):
    '''
    Calculates the daily spread: high - low
    '''
    data['H-L'] = data['prchd'] - data['prcld']
    factors.append('H-L')
    return data, factors

In [None]:
def RSI_14(data, factors):
    '''
    Calculates the relative strength index (RSI) using 14-day period
    '''
    data['delta'] = data.groupby('tic')['prcod'].diff()
    data['gain'] = data['delta'].clip(lower=0)
    data['loss'] = -data['delta'].clip(upper=0)
    data['avg_gain'] = data.groupby('tic')['gain'].rolling(window=14, min_periods=1).mean().reset_index(level=0, drop=True)
    data['avg_loss'] = data.groupby('tic')['loss'].rolling(window=14, min_periods=1).mean().reset_index(level=0, drop=True)
    data['RSI'] = 100 - (100 / (1 + data['avg_gain'] / data['avg_loss']))
    data = data.drop(columns=['delta', 'gain', 'loss', 'avg_gain', 'avg_loss'])
    data = data.fillna(0)
    data['RSI'] = data.groupby('tic')['RSI'].transform(lambda x: x.replace(0, x[x != 0].mean()))
    factors.append('RSI')
    return data, factors

In [None]:
def MACD_Line(data, factors):
    '''
    Calculates the Moving Average Convergence Divergence (MACD)
    MACD = EMA12 - EMA26
    MACD_Signal_Line = 9-day exponential moving average of MACD
    '''
    data['EMA12'] = data['prcod'].ewm(span=12, adjust=False, min_periods=1).mean().reset_index(drop=True)
    data['EMA26'] = data['prcod'].ewm(span=26, adjust=False, min_periods=1).mean().reset_index(drop=True)
    data['MACD'] = data['EMA12'] - data['EMA26']
    data['MACD_Signal_Line'] = data['MACD'].ewm(span=9, adjust=False, min_periods=1).mean()
    
    data = data.drop(columns=['EMA12', 'EMA26'])    
    data = data.fillna(0)
    
    factors.extend(['MACD', 'MACD_Signal_Line'])
    
    return data, factors

In [None]:
def all_features_ta(data):
    '''
    Calculates all features in library ta (Technical Analysis Library)
    Note that "open" and "close" prices are reversed because all tradings are done at open prices
    '''
    data = add_all_ta_features(data, open="prccd", high="prchd", low="prcld", close="prcod", volume="cshtrd", fillna=True)
    
    return data

In [None]:
def feature_engineer(data, factors):
    '''
    Input: data, factors
    1. Add new features that are defined above
    Output: data, factors
    '''
    data, factors = momentum_235(data, factors)
    data, factors = MA_1050(data, factors)
    data, factors = price_vs_MA(data, factors)
    data, factors = STD_10(data, factors)
    data, factors = H_L(data, factors)
    data, factors = RSI_14(data, factors)
    data, factors = MACD_Line(data, factors)
    
    print(f'Confirm data has no NAs: {~data.isna().any().any()}')
    
    return data, factors
