импорт всех необходимых зависимостей

In [1]:
import os
from decimal import ROUND_HALF_UP, Decimal
import numpy as np
import pandas as pd
from tqdm import tqdm
import warnings
import jpx_tokyo_market_prediction

warnings.filterwarnings('ignore')

инициализация путей

In [2]:
base_dir = "../input/jpx-tokyo-stock-exchange-prediction"
train_files_dir = f"{base_dir}/train_files"
supplemental_files_dir = f"{base_dir}/supplemental_files"

предобработка данных

In [3]:
def adjust_price(price):
    price["Date"] = pd.to_datetime(price["Date"], format="%Y-%m-%d")

    def generate_adjusted_close(df):
        df = df.sort_values("Date", ascending=False, ignore_index=True)
        
        df["CumulativeAdjustmentFactor"] = df["AdjustmentFactor"].cumprod()
        adjusted = (df["CumulativeAdjustmentFactor"] * df["Close"]).astype(str)
        
        df["AdjustedClose"] = adjusted.apply(
            lambda x: float(Decimal(x).quantize(Decimal('0.1'), rounding=ROUND_HALF_UP))
        )
        
        df = df.sort_values("Date", ignore_index=True)
        df["AdjustedClose"] = (
            df["AdjustedClose"]
            .replace(0, np.nan)
            .ffill()
        )
        return df

    return (
        price
        .sort_values(["SecuritiesCode", "Date"])
        .groupby("SecuritiesCode", group_keys=False)
        .apply(generate_adjusted_close)
        .set_index("Date")
    )

обработчик тренировочных данных

In [4]:
def get_features_for_predict(price, code):
    close_col = "AdjustedClose"
    feats = price.loc[price["SecuritiesCode"] == code, ["SecuritiesCode", close_col, "ExpectedDividend"]].copy()

    feats["return_1day"] = feats[close_col].pct_change(1)
    
    feats["ExpectedDividend"] = feats["ExpectedDividend"].mask(feats["ExpectedDividend"] > 0, 1)

    feats = feats.fillna(0)
    feats = feats.replace([np.inf, -np.inf], 0)
    feats = feats.drop([close_col], axis=1)

    return feats

загрузка тренировочных данных

In [5]:
df_price_raw = pd.read_csv(f"{train_files_dir}/stock_prices.csv")
price_cols = [
    "Date",
    "SecuritiesCode",
    "Close",
    "AdjustmentFactor",
    "ExpectedDividend"
]
df_price_raw = df_price_raw[price_cols]

df_price_supplemental = pd.read_csv(f"{supplemental_files_dir}/stock_prices.csv")
df_price_supplemental = df_price_supplemental[price_cols]
df_price_raw = pd.concat([df_price_raw, df_price_supplemental])

df_price_raw = df_price_raw.loc[df_price_raw["Date"] >= "2022-07-01"]

создание среды соревнования

In [6]:
env = jpx_tokyo_market_prediction.make_env()
iter_test = env.iter_test()

генерация выходных данных

In [7]:
for counter, (prices, options, financials, trades, secondary_prices, sample_prediction) in enumerate(iter_test):
    current_date = prices["Date"].iloc[0]
    sample_prediction_date = sample_prediction["Date"].iloc[0]

    if counter == 0:
        df_price_raw = df_price_raw[df_price_raw["Date"] < current_date]

    df_price_raw = pd.concat([df_price_raw, prices[price_cols]])
    df_price = adjust_price(df_price_raw)  # Предполагается, что функция идемпотентна

    codes = sorted(prices["SecuritiesCode"].unique())
    
    feature = pd.concat([get_features_for_predict(df_price, code) for code in codes])
    feature = feature.loc[feature.index == current_date]
    
    feature["predict"] = (
        feature["return_1day"] + 
        feature["ExpectedDividend"] * 100
    )

    ranked_features = (
        feature
        .sort_values("predict", ascending=True)
        .drop_duplicates(subset=["SecuritiesCode"])
        .reset_index(drop=True)
    )
    ranked_features["Rank"] = ranked_features.index
    
    rank_mapping = ranked_features.set_index("SecuritiesCode")["Rank"].to_dict()
    
    sample_prediction["Rank"] = (
        sample_prediction["SecuritiesCode"]
        .map(rank_mapping)
        .astype(int)
    )

    env.predict(sample_prediction)

This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.


In [8]:
! head submission.csv

Date,SecuritiesCode,Rank
2021-12-06,1301,0
2021-12-06,1332,1339
2021-12-06,1333,1338
2021-12-06,1375,1337
2021-12-06,1376,1336
2021-12-06,1377,1335
2021-12-06,1379,1334
2021-12-06,1381,1333
2021-12-06,1407,1332


In [9]:
! tail submission.csv

2021-12-07,9982,472
2021-12-07,9983,109
2021-12-07,9984,1980
2021-12-07,9987,960
2021-12-07,9989,329
2021-12-07,9990,1419
2021-12-07,9991,881
2021-12-07,9993,56
2021-12-07,9994,233
2021-12-07,9997,528
