In [1]:
#concat function

def concat_df(df1, df2):
    df1 = pd.concat([df1, df2],
                    ignore_index=True, sort=False
                    ).drop_duplicates(["RowId"], keep="first")
    return df1

In [2]:
from datetime import datetime
import time

 # auxiliary function, from datetime to timestamp
totimestamp = lambda s: np.int32(time.mktime(datetime.strptime(s, "%Y-%m-%d").timetuple()))

In [3]:

# define function to compute log returns
def log_return(series, periods=1):
    return np.log(series).diff(periods=periods)


# def fill_the_gaps(df):
#     new_df = pd.DataFrame(columns= df.columns)
#     for i in range(len(df['SecuritiesCode'].unique())):
#         new_df = new_df.append(df[df['SecuritiesCode'] == i].reindex(range(df[df['SecuritiesCode'] == i].index[0],df[df['SecuritiesCode'] == i].index[-1]+60,60),method='pad'))
#     new_df.fillna(method = 'pad', inplace = True) 
#     return new_df


def rsiFunc(prices, n=14):
    deltas = np.diff(prices)
    seed = deltas[:n+1]
    up = seed[seed>=0].sum()/n
    down = -seed[seed<0].sum()/n
    rs = up/down
    rsi = np.zeros_like(prices)
    rsi[:n] = 100. - 100./(1.+rs)

    for i in range(n, len(prices)):
        delta = deltas[i-1] # cause the diff is 1 shorter

        if delta>0:
            upval = delta
            downval = 0.
        else:
            upval = 0.
            downval = -delta

        up = (up*(n-1) + upval)/n
        down = (down*(n-1) + downval)/n

        rs = up/down
        rsi[i] = 100. - 100./(1.+rs)

    return rsi



def get_features(df):

    df['log_return_5'] = log_return(df['Close'],periods=5)
    df['log_return'] = log_return(df['Close'],periods=1)

    upper_shadow = lambda asset: asset.High - np.maximum(asset.Close,asset.Open)
    lower_shadow = lambda asset: np.minimum(asset.Close,asset.Open)- asset.Low

    df['upper_shadow'] = upper_shadow(df)
    df['lower_shadow'] = lower_shadow(df)
    
    df['EMA_21'] = df['Close'].ewm(span=21).mean()
    
    df['EMA_55'] = df['Close'].ewm(span=55).mean()
    
    df['EMA_315'] = df['Close'].ewm(span=315).mean()
    
    df['EMA_825'] = df['Close'].ewm(span=825).mean()
    
    window = 7
    
    no_of_std = 2
    
    df[f'EMA_{window}'] = df['Close'].ewm(span=window).mean()
    
    df[f'EMA_{window}_std'] = df['Close'].rolling(window=window).std()
    
    df[f'EMA_{window}_BB_high'] = df[f'EMA_{window}'] + no_of_std * df[f'EMA_{window}_std']
    
    df[f'MA_{window}MA_BB_low'] = df[f'EMA_{window}'] - no_of_std * df[f'EMA_{window}_std']
    
    window = 5
    
    df[f'EMA_{window}'] = df['Close'].ewm(span=window).mean()
    
    df[f'EMA_{window}_std'] = df['Close'].rolling(window=window).std()
    
    df[f'EMA_{window}_BB_high'] = df[f'EMA_{window}'] + no_of_std * df[f'EMA_{window}_std']
    
    df[f'MA_{window}MA_BB_low'] = df[f'EMA_{window}'] - no_of_std * df[f'EMA_{window}_std']
    
    df['MACD'] = df['EMA_7'] - df['EMA_5']
    
    
    df['rsi_5'] = rsiFunc(df['Close'].values, 5)
    
    df['rsi_7'] = rsiFunc(df['Close'].values, 7)
    
    df['rsi_21'] = rsiFunc(df['Close'].values, 21)
    
    
    
    df['VWAP'] = (df['Close'] * df['Volume'])/ df['Volume']
    

    df['dayofweek'] = df['dt'].dt.dayofweek


    
    
    #df = pd.concat([df, pd.get_dummies(df['Asset_ID'], prefix= 'Asset_')], axis=1)
    
    df[['log_return_5', 'log_return', 'upper_shadow', 'lower_shadow',
       'EMA_21', 'EMA_55', 'EMA_315', 'EMA_825', 'EMA_7', 'EMA_7_std',
       'EMA_7_BB_high', 'MA_7MA_BB_low', 'EMA_5', 'EMA_5_std', 'EMA_5_BB_high',
       'MA_5MA_BB_low', 'MACD', 'rsi_5', 'rsi_7', 'rsi_21']].astype('float16')
    
    return df



In [4]:
def add_rank(df, col_name="pred"):
    df["Rank"] = df.groupby("Date")[col_name].rank(ascending=False, method="first") - 1 
    df["Rank"] = df["Rank"].astype("int")
    return df