In [1]:
# importing Relevant Packages
import numpy as np 
import pandas as pd 
import gc
import pickle

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


/kaggle/input/jpx-my-baseline-lgboost/__results__.html
/kaggle/input/jpx-my-baseline-lgboost/simple_LGBM.txt
/kaggle/input/jpx-my-baseline-lgboost/simple_LGBM.sav
/kaggle/input/jpx-my-baseline-lgboost/submission.csv
/kaggle/input/jpx-my-baseline-lgboost/__notebook__.ipynb
/kaggle/input/jpx-my-baseline-lgboost/__output__.json
/kaggle/input/jpx-my-baseline-lgboost/custom.css
/kaggle/input/jpx-my-baseline-xgboost/simple_xgb.txt
/kaggle/input/jpx-my-baseline-xgboost/__results__.html
/kaggle/input/jpx-my-baseline-xgboost/submission.csv
/kaggle/input/jpx-my-baseline-xgboost/__notebook__.ipynb
/kaggle/input/jpx-my-baseline-xgboost/__output__.json
/kaggle/input/jpx-my-baseline-xgboost/simple_xgb.sav
/kaggle/input/jpx-my-baseline-xgboost/custom.css
/kaggle/input/jpx-my-baseline-skgb/__results__.html
/kaggle/input/jpx-my-baseline-skgb/simple_skgb.sav
/kaggle/input/jpx-my-baseline-skgb/submission.csv
/kaggle/input/jpx-my-baseline-skgb/__notebook__.ipynb
/kaggle/input/jpx-my-baseline-skgb/__output

## Loading the Gradient Boosting Regression Models 

In [2]:
#loading models
model_1 = pickle.load(open('../input/jpx-my-baseline-xgboost/simple_xgb.sav', 'rb'))
model_2 = pickle.load(open('../input/jpx-my-baseline-lgboost/simple_LGBM.sav', 'rb'))
model_3 = pickle.load(open('../input/jpx-my-baseline-skgb/simple_skgb.sav', 'rb'))

#setting the wieghts:
w = [0.4,0.4,0.2] 




## Feature Engeneering

In [3]:
from datetime import datetime
import time

 # auxiliary function, from datetime to timestamp
totimestamp = lambda s: np.int32(time.mktime(datetime.strptime(s, "%Y-%m-%d").timetuple()))

In [4]:

# define function to compute log returns
def log_return(series, periods=1):
    return np.log(series).diff(periods=periods)



def rsiFunc(prices, n=14):
    deltas = np.diff(prices)
    seed = deltas[:n+1]
    up = seed[seed>=0].sum()/n
    down = -seed[seed<0].sum()/n
    rs = up/down
    rsi = np.zeros_like(prices)
    rsi[:n] = 100. - 100./(1.+rs)

    for i in range(n, len(prices)):
        delta = deltas[i-1] # cause the diff is 1 shorter

        if delta>0:
            upval = delta
            downval = 0.
        else:
            upval = 0.
            downval = -delta

        up = (up*(n-1) + upval)/n
        down = (down*(n-1) + downval)/n

        rs = up/down
        rsi[i] = 100. - 100./(1.+rs)

    return rsi



def get_features(df):

    df['log_return_5'] = log_return(df['Close'],periods=5)
    df['log_return'] = log_return(df['Close'],periods=1)

    upper_shadow = lambda asset: asset.High - np.maximum(asset.Close,asset.Open)
    lower_shadow = lambda asset: np.minimum(asset.Close,asset.Open)- asset.Low

    df['upper_shadow'] = upper_shadow(df)
    df['lower_shadow'] = lower_shadow(df)
    
    df['EMA_21'] = df['Close'].ewm(span=21).mean()
    
    df['EMA_55'] = df['Close'].ewm(span=55).mean()
    
    df['EMA_315'] = df['Close'].ewm(span=315).mean()
    
    df['EMA_825'] = df['Close'].ewm(span=825).mean()
    
    window = 7
    
    no_of_std = 2
    
    df[f'EMA_{window}'] = df['Close'].ewm(span=window).mean()
    
    df[f'EMA_{window}_std'] = df['Close'].rolling(window=window).std()
    
    df[f'EMA_{window}_BB_high'] = df[f'EMA_{window}'] + no_of_std * df[f'EMA_{window}_std']
    
    df[f'MA_{window}MA_BB_low'] = df[f'EMA_{window}'] - no_of_std * df[f'EMA_{window}_std']
    
    window = 5
    
    df[f'EMA_{window}'] = df['Close'].ewm(span=window).mean()
    
    df[f'EMA_{window}_std'] = df['Close'].rolling(window=window).std()
    
    df[f'EMA_{window}_BB_high'] = df[f'EMA_{window}'] + no_of_std * df[f'EMA_{window}_std']
    
    df[f'MA_{window}MA_BB_low'] = df[f'EMA_{window}'] - no_of_std * df[f'EMA_{window}_std']
    
    df['MACD'] = df['EMA_7'] - df['EMA_5']
    
    
    df['rsi_5'] = rsiFunc(df['Close'].values, 5)
    
    df['rsi_7'] = rsiFunc(df['Close'].values, 7)
    
    df['rsi_21'] = rsiFunc(df['Close'].values, 21)

    
    
    #df = pd.concat([df, pd.get_dummies(df['Asset_ID'], prefix= 'Asset_')], axis=1)
    
    df[['log_return_5', 'log_return', 'upper_shadow', 'lower_shadow',
       'EMA_21', 'EMA_55', 'EMA_315', 'EMA_825', 'EMA_7', 'EMA_7_std',
       'EMA_7_BB_high', 'MA_7MA_BB_low', 'EMA_5', 'EMA_5_std', 'EMA_5_BB_high',
       'MA_5MA_BB_low', 'MACD', 'rsi_5', 'rsi_7', 'rsi_21']].astype('float16')
    
    return df



In [5]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [6]:
#sellectiong the features

scaled_features = ['Open', 'High', 'Low', 'Close', 'Volume','log_return_5', 'log_return', 'upper_shadow', 'lower_shadow',
       'EMA_21', 'EMA_55', 'EMA_315', 'EMA_825', 'EMA_7', 'EMA_7_std',
       'EMA_7_BB_high', 'MA_7MA_BB_low', 'EMA_5', 'EMA_5_std', 'EMA_5_BB_high',
       'MA_5MA_BB_low', 'MACD', 'rsi_5', 'rsi_7', 'rsi_21']
features = ['Open', 'High', 'Low', 'Close', 'Volume','log_return_5', 'log_return', 'upper_shadow', 'lower_shadow',
       'EMA_21', 'EMA_55', 'EMA_315', 'EMA_825', 'EMA_7', 'EMA_7_std',
       'EMA_7_BB_high', 'MA_7MA_BB_low', 'EMA_5', 'EMA_5_std', 'EMA_5_BB_high',
       'MA_5MA_BB_low', 'MACD', 'rsi_5', 'rsi_7', 'rsi_21']



## Submiting

In [7]:
#Building the Ranking Function

def add_rank(df, col_name="pred"):
    df["Rank"] = df.groupby("Date")[col_name].rank(ascending=False, method="first") - 1 
    df["Rank"] = df["Rank"].astype("int")
    return df

In [8]:
import jpx_tokyo_market_prediction


env = jpx_tokyo_market_prediction.make_env()   # initialize the environment
iter_test = env.iter_test()    # an iterator which loops over the test files
for (prices, options, financials, trades, secondary_prices, sample_prediction) in iter_test:
    prices = get_features(prices)
    prices = prices.replace((np.inf, -np.inf, np.nan), 0).reset_index(drop=True)
    prices = prices.fillna(0)
    prices[scaled_features] = scaler.fit_transform(prices[scaled_features])
    pred_1 = model_1.predict(prices[features])
    pred_2 = model_2.predict(prices[features])
    pred_3 = model_3.predict(prices[features])
    prices['pred'] = pred_1*w[0] + pred_2*w[1] + pred_3*w[2]
    prices = add_rank(prices)
    feature_map = prices.set_index('SecuritiesCode')['Rank'].to_dict()
    sample_prediction['Rank'] = sample_prediction['SecuritiesCode'].map(feature_map)
    env.predict(sample_prediction)

This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.
