# Overview
- LightGBM

In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.metrics import mean_absolute_error
from datetime import timedelta
from tqdm.notebook import tqdm
import lightgbm as lgb
from typing import List, Union, Optional
import time
from contextlib import contextmanager
import sys
import logging
import category_encoders as ce
import plotly
from plotly import express as px
from scipy.stats import norm
# from optuna.integration import lightgbm as lgb
# import mlb
import os
import warnings
from typing import Optional, Tuple
from tqdm.notebook import tqdm
from collections import defaultdict

pd.options.display.max_columns = 200

DEBUG = True

## Config

In [2]:
class CFG:
    ######################
    # global #
    ######################
    INPUT_DIR = "../input/mlb-player-digital-engagement-forecasting"
    # INPUT_DIR = "../input/mlb-unnested-dataset" # for kaggle kernel
    OBJECT_ID = ["playerId", "date"]
    TARGETS = ["target1", "target2", "target3", "target4"]
    ######################
    # model #
    ######################
    # target1
    LGB_TARGET1_PARAMS = {
        "objective": "mae",
        "metric": "l1",
        "boosting_type": "gbdt",
        "learning_rate": 0.1,
        "max_depth": 6,
        "num_leaves": 54,
        "lambda_l1": 1.8294424356946235e-07,
        "lambda_l2": 8.269494679852943e-05,
        "bagging_fraction": 0.9311050425278897,
        "bagging_freq": 2,
        "feature_fraction": 0.9840000000000001,
        "min_data_in_leaf": 20,
        "num_threads": 8,
        "verbosity": -1,
        "num_iterations": 10000,
        "early_stopping_round": 100,
    }
    # target2
    LGB_TARGET2_PARAMS = {
        "objective": "mae",
        "metric": "l1",
        "boosting_type": "gbdt",
        "learning_rate": 0.1,
        "max_depth": 6,
        "num_leaves": 14,
        "lambda_l1": 4.379035512071324e-06,
        "lambda_l2": 5.940520594400555e-05,
        "bagging_fraction": 1.0,
        "bagging_freq": 0,
        "feature_fraction": 0.5,
        "min_data_in_leaf": 20,
        "num_threads": 8,
        "verbosity": -1,
        "num_iterations": 10000,
        "early_stopping_round": 100,
    }
    # target3
    LGB_TARGET3_PARAMS = {
        "objective": "mae",
        "metric": "l1",
        "boosting_type": "gbdt",
        "learning_rate": 0.1,
        "max_depth": 6,
        "num_leaves": 64,
        "lambda_l1": 0.011234793539671765,
        "lambda_l2": 4.766835836661758e-06,
        "bagging_fraction": 0.8841726515054378,
        "bagging_freq": 3,
        "feature_fraction": 1.0,
        "min_data_in_leaf": 100,
        "num_threads": 8,
        "verbosity": -1,
        "num_iterations": 10000,
        "early_stopping_round": 100,
    }
    # target4
    LGB_TARGET4_PARAMS = {
        "objective": "mae",
        "metric": "l1",
        "boosting_type": "gbdt",
        "learning_rate": 0.1,
        "max_depth": 6,
        "num_leaves": 51,
        "lambda_l1": 5.506356543008336,
        "lambda_l2": 9.683199146595637,
        "bagging_fraction": 0.7376768451562011,
        "bagging_freq": 2,
        "feature_fraction": 0.5,
        "min_data_in_leaf": 20,
        "num_threads": 8,
        "verbosity": -1,
        "num_iterations": 10000,
        "early_stopping_round": 100,
    }
    SEEDS = [2434]
    MODEL_PATH = "../output/nb022"
    # MODEL_PATH = "../input/mlb-nb022-lgb-weights" # for kaggle kernel

## Utils

In [3]:
def get_logger(out_file=None):
    logger = logging.getLogger()  # loggerの呼び出し
    formatter = logging.Formatter("[%(asctime)s] [%(levelname)s] [%(message)s]")  # ログ出力の際のフォーマットを定義
    logger.handlers = []  # ハンドラーを追加するためのリスト
    logger.setLevel(logging.INFO)  # ロギングのレベルを設定, 'INFO' : 想定された通りのことが起こったことの確認

    handler = logging.StreamHandler(sys.stdout)  # StreamHandler(コンソールに出力するハンドラ)を追加
    handler.setFormatter(formatter)
    handler.setLevel(logging.INFO)
    logger.addHandler(handler)

    # ログをファイルとして出力する際のハンドラ(FileHandler)
    if out_file is not None:
        fh = logging.FileHandler(out_file)
        fh.setFormatter(formatter)
        fh.setLevel(logging.INFO)
        logger.addHandler(fh)

    logger.info("logger set up")  # "logger set up"を表示
    return logger


@contextmanager
def timer(name: str, logger: Optional[logging.Logger] = None):
    t0 = time.time()
    msg = f"<{name}> start"
    if logger is None:
        print(msg)
    else:
        logger.info(msg)
    yield

    msg = f"<{name}> done in {time.time() - t0:.2f} s"
    if logger is None:
        print(msg)
    else:
        logger.info(msg)


def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int64)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float32)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float64)
                else:
                    df[col] = df[col].astype(np.float64)
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

## Loading dataset

In [4]:
class MLBDataset(object):
    def __init__(self, input_path):
        self.input_path = Path(input_path)
        #################
        # train #
        #################
        self.train_next = pd.read_pickle(self.input_path / "train_updated_nextDayPlayerEngagement.pickle")
        self.train_rosters = pd.read_pickle(self.input_path / "train_updated_rosters.pickle")
        self.train_ptf = pd.read_pickle(self.input_path / "train_updated_playerTwitterFollowers.pickle")
        self.train_scores = pd.read_pickle(self.input_path / "train_updated_playerBoxScores.pickle")
        self.train_games = pd.read_pickle(self.input_path / "train_updated_games.pickle")
        self.train_standings = pd.read_pickle(self.input_path / "train_updated_standings.pickle")
        self.train_tbs = pd.read_pickle(self.input_path / "train_updated_teamBoxScores.pickle")
        self.train_ttf = pd.read_pickle(self.input_path / "train_updated_teamTwitterFollowers.pickle")
        self.train_trans = pd.read_pickle(self.input_path / "train_updated_transactions.pickle")
        self.train_awards = pd.read_pickle(self.input_path / "train_updated_awards.pickle")
        self.train_events = pd.read_pickle(self.input_path / "train_updated_events.pickle")
        #################
        # Additional #
        #################
        self.players = pd.read_pickle(self.input_path / "players.pickle")
        self.awards = pd.read_pickle(self.input_path / "awards.pickle")
        self.seasons = pd.read_pickle(self.input_path / "seasons.pickle")
        self.teams = pd.read_pickle(self.input_path / "teams.pickle")
        #################
        # test #
        #################
        self.example_test = pd.read_csv(self.input_path / "example_test.csv")
        self.sample_submission = pd.read_csv(self.input_path / "example_sample_submission.csv")

        # # only players in test set
        # target_playerids = self.players[self.players["playerForTestSetAndFuturePreds"] == True]["playerId"].unique()
        # self.train_next = self.train_next[self.train_next["playerId"].isin(target_playerids)].reset_index(drop=True)

mlb_train_ds = MLBDataset(CFG.INPUT_DIR)

## Feature blocks

In [5]:
def merge_by_key(left: Union[pd.DataFrame, pd.Series], right: pd.DataFrame, on=CFG.OBJECT_ID) -> pd.DataFrame:
    if not isinstance(left, pd.Series):
        left = left[on]
    return pd.merge(left, right, on=on, how="left").drop(columns=on)


class BaseBlock(object):
    def fit(self, input_df: pd.DataFrame, y=None) -> pd.DataFrame:
        return self.transform(input_df)

    def transform(self, input_df: pd.DataFrame) -> pd.DataFrame:
        return NotImplementedError()


def quantile25(x: pd.Series):
    return x.quantile(q=0.25)


def quantile75(x: pd.Series):
    return x.quantile(q=0.75)


def prob(x: pd.Series):
    x = x.reset_index(drop=True).values.tolist()
    mean = np.mean(x)
    std = np.std(x)
    distribution = norm(mean, std)
    min_weight = min(x)
    max_weight = max(x)
    values = list(np.linspace(min_weight, max_weight))
    probabilities = [distribution.pdf(v) for v in values]
    max_values = max(probabilities)
    max_index = probabilities.index(max_values)

    return x[max_index]


class TargetAggregateBlock(BaseBlock):
    def __init__(self, periods: List[int]):
        self.periods = periods
        
    def fit(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame,
        only_fit: bool,
    ):
        output_df = pd.DataFrame()
        for i in self.periods:
            _target_df = target_df[(target_df["date"] >= i) & (target_df["date"] < (i + 100))].copy()

            cols = _target_df.drop(columns=["date", "engagementMetricsDate", "playerId"], axis=1).columns.tolist()
            dfs = [_target_df.groupby(["playerId"])[col].agg([
                "mean", "std", "max", "min", "median", quantile25, quantile75, prob
                ]).add_prefix(f"{col}_{str(i)[:-2]}_") for col in cols]
            tmp_df = pd.concat(dfs, axis=1)
            output_df = pd.concat([output_df, tmp_df], axis=1)

            del _target_df, dfs, tmp_df

        self.agg_df = output_df.reset_index()

        return self.transform(
            input_df,
            target_df,
            rosters_df,
            playerBoxScores_df
        )

    def transform(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame
    ):
        return merge_by_key(input_df, self.agg_df, on="playerId")


class PlayersLabelEncodingBlock(BaseBlock):
    def __init__(self, columns: List[str]):
        self.columns = columns + ["playerId"]
        self.players_df = mlb_train_ds.players.copy()
        self.players_df["DOB_year"] = pd.to_datetime(self.players_df["DOB"]).dt.year
        self.players_df["playerAge"] = 2021 - self.players_df["DOB_year"]
        self.labeled_df = self.players_df[self.columns].copy()
        self.labeled_df["playerid"] = self.labeled_df["playerId"]
        self.columns = self.columns + ["playerid"]
        self.encoder = None
    
    def fit(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame,
        only_fit: bool
    ):
        self.encoder = ce.OrdinalEncoder(handle_unknown="value", handle_missing="values")
        self.encoder.fit(self.labeled_df[self.columns])
        self.labeled_df[self.columns] = self.encoder.transform(self.labeled_df[self.columns])

        return self.transform(
            input_df,
            target_df,
            rosters_df,
            playerBoxScores_df
        )

    def transform(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame
    ):
        return merge_by_key(input_df, self.labeled_df, on="playerId").add_prefix("Players_LE_")


class RostersLabelEncodingBlock(BaseBlock):
    def __init__(self, columns: List[str]):
        self.columns = columns
        self.encoder = None

    def fit(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame,
        only_fit: bool
    ):
        self.encoder = ce.OrdinalEncoder(handle_unknown="value", handle_missing="value")
        self.encoder.fit(rosters_df[self.columns])

        return self.transform(
            input_df,
            target_df,
            rosters_df,
            playerBoxScores_df
        )

    def transform(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame
    ):
        self.labeled_df = rosters_df[CFG.OBJECT_ID + self.columns].copy()
        self.labeled_df[self.columns] = self.encoder.transform(self.labeled_df[self.columns])

        return merge_by_key(input_df, self.labeled_df).add_prefix("Rosters_LE_")


class PlayerStatsBlock(BaseBlock):
    def __init__(self, columns):
        self.columns = columns
        # game info
        self.feat_home_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_home_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_positionCode_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_battingOrder_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        # batting
        self.feat_gamesPlayedBatting_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_gamesPlayedBatting_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_plateAppearances_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_plateAppearances_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_atBats_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_atBats_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_hits_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_hits_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_doubles_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_doubles_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_triples_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_triples_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_homeRuns_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_homeRuns_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_rbi_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_rbi_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_runsScored_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_runsScored_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_totalBases_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_totalBases_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_baseOnBalls_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_baseOnBalls_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_hitByPitch_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_hitByPitch_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_intentionalWalks_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_intentionalWalks_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_groundIntoDoublePlay_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_groundIntoDoublePlay_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_groundIntoTriplePlay_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_groundIntoTriplePlay_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_strikeOuts_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_strikeOuts_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_sacBunts_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_sacBunts_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_sacFlies_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_sacFlies_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_caughtStealing_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_caughtStealing_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_stolenBases_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_stolenBases_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_leftOnBase_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_leftOnBase_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_catchersInterference_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_catchersInterference_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_pickoffs_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_pickoffs_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_flyOuts_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_flyOuts_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_groundOuts_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_groundOuts_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        # pitching
        self.feat_gamesPlayedPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_gamesPlayedPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_gamesStartedPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_gamesStartedPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_winsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_winsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_lossesPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_lossesPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_completeGamesPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_completeGamesPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_shutoutsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_shutoutsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_saves_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_saves_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_saveOpportunities_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_saveOpportunities_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_blownSaves_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_blownSaves_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_holds_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_holds_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_inningsPitched_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_inningsPitched_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_runsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_runsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_earnedRuns_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_earnedRuns_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_pitchesThrown_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_pitchesThrown_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_hitsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_hitsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_homeRunsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_homeRunsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_strikeOutsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_strikeOutsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_baseOnBallsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_baseOnBallsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_hitByPitchPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_hitByPitchPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_intentionalWalksPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_intentionalWalksPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_balks_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_balks_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_wildPitches_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_wildPitches_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_atBatsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_atBatsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_battersFaced_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_battersFaced_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_sacBuntsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_sacBuntsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_sacFliesPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_sacFliesPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_inheritedRunners_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_inheritedRunners_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_inheritedRunnersScored_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_inheritedRunnersScored_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_rbiPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_rbiPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_flyOutsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_flyOutsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_airOutsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_airOutsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_doublesPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_doublesPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_triplesPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_triplesPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_caughtStealingPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_caughtStealingPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_stolenBasesPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_stolenBasesPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_outsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_outsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_balls_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_balls_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_strikes_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_strikes_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_hitBatsmen_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_hitBatsmen_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_pickoffsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_pickoffsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_catchersInterferencePitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_catchersInterferencePitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_assists_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_assists_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_putOuts_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_putOuts_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_errors_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_errors_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_chances_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_chances_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_gamesFinishedPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_gamesFinishedPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_groundOutsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_groundOutsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))

    def fit(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame,
        only_fit: bool
    ):
        if playerBoxScores_df is not None:
            for i, row in enumerate(tqdm(playerBoxScores_df[self.columns].values)):
                season = int(str(row[1])[:4])

                self.feat_home_dict[row[0]][season][row[1]] += row[2]
                self.feat_positionCode_dict[row[0]][season][row[1]] += row[3]
                self.feat_battingOrder_dict[row[0]][season][row[1]] += row[4]
                self.feat_gamesPlayedBatting_dict[row[0]][season][row[1]] += row[5]
                self.feat_plateAppearances_dict[row[0]][season][row[1]] += row[6]
                self.feat_atBats_dict[row[0]][season][row[1]] += row[7]
                self.feat_hits_dict[row[0]][season][row[1]] += row[8]
                self.feat_doubles_dict[row[0]][season][row[1]] += row[9]
                self.feat_triples_dict[row[0]][season][row[1]] += row[10]
                self.feat_homeRuns_dict[row[0]][season][row[1]] += row[11]
                self.feat_rbi_dict[row[0]][season][row[1]] += row[12]
                self.feat_runsScored_dict[row[0]][season][row[1]] += row[13]
                self.feat_totalBases_dict[row[0]][season][row[1]] += row[14]
                self.feat_baseOnBalls_dict[row[0]][season][row[1]] += row[15]
                self.feat_hitByPitch_dict[row[0]][season][row[1]] += row[16]
                self.feat_intentionalWalks_dict[row[0]][season][row[1]] += row[17]
                self.feat_groundIntoDoublePlay_dict[row[0]][season][row[1]] += row[18]
                self.feat_groundIntoTriplePlay_dict[row[0]][season][row[1]] += row[19]
                self.feat_strikeOuts_dict[row[0]][season][row[1]] += row[20]
                self.feat_sacBunts_dict[row[0]][season][row[1]] += row[21]
                self.feat_sacFlies_dict[row[0]][season][row[1]] += row[22]
                self.feat_caughtStealing_dict[row[0]][season][row[1]] += row[23]
                self.feat_stolenBases_dict[row[0]][season][row[1]] += row[24]
                self.feat_leftOnBase_dict[row[0]][season][row[1]] += row[25]
                self.feat_catchersInterference_dict[row[0]][season][row[1]] += row[26]
                self.feat_pickoffs_dict[row[0]][season][row[1]] += row[27]
                self.feat_flyOuts_dict[row[0]][season][row[1]] += row[28]
                self.feat_groundOuts_dict[row[0]][season][row[1]] += row[29]

                self.feat_gamesPlayedPitching_dict[row[0]][season][row[1]] += row[30]
                self.feat_gamesStartedPitching_dict[row[0]][season][row[1]] += row[31]
                self.feat_winsPitching_dict[row[0]][season][row[1]] += row[32]
                self.feat_lossesPitching_dict[row[0]][season][row[1]] += row[33]
                self.feat_completeGamesPitching_dict[row[0]][season][row[1]] += row[34]
                self.feat_shutoutsPitching_dict[row[0]][season][row[1]] += row[35]
                self.feat_saves_dict[row[0]][season][row[1]] += row[36]
                self.feat_saveOpportunities_dict[row[0]][season][row[1]] += row[37]
                self.feat_blownSaves_dict[row[0]][season][row[1]] += row[38]
                self.feat_holds_dict[row[0]][season][row[1]] += row[39]
                self.feat_inningsPitched_dict[row[0]][season][row[1]] += row[40]
                self.feat_runsPitching_dict[row[0]][season][row[1]] += row[41]
                self.feat_earnedRuns_dict[row[0]][season][row[1]] += row[42]
                self.feat_pitchesThrown_dict[row[0]][season][row[1]] += row[43]
                self.feat_hitsPitching_dict[row[0]][season][row[1]] += row[44]
                self.feat_homeRunsPitching_dict[row[0]][season][row[1]] += row[45]
                self.feat_strikeOutsPitching_dict[row[0]][season][row[1]] += row[46]
                self.feat_baseOnBallsPitching_dict[row[0]][season][row[1]] += row[47]
                self.feat_hitByPitchPitching_dict[row[0]][season][row[1]] += row[48]
                self.feat_intentionalWalksPitching_dict[row[0]][season][row[1]] += row[49]
                self.feat_balks_dict[row[0]][season][row[1]] += row[50]
                self.feat_wildPitches_dict[row[0]][season][row[1]] += row[51]
                self.feat_atBatsPitching_dict[row[0]][season][row[1]] += row[52]
                self.feat_battersFaced_dict[row[0]][season][row[1]] += row[53]
                self.feat_sacBuntsPitching_dict[row[0]][season][row[1]] += row[54]
                self.feat_sacFliesPitching_dict[row[0]][season][row[1]] += row[55]
                self.feat_inheritedRunners_dict[row[0]][season][row[1]] += row[56]
                self.feat_inheritedRunnersScored_dict[row[0]][season][row[1]] += row[57]
                self.feat_rbiPitching_dict[row[0]][season][row[1]] += row[58]
                self.feat_flyOutsPitching_dict[row[0]][season][row[1]] += row[59]
                self.feat_airOutsPitching_dict[row[0]][season][row[1]] += row[60]
                self.feat_doublesPitching_dict[row[0]][season][row[1]] += row[61]
                self.feat_triplesPitching_dict[row[0]][season][row[1]] += row[62]
                self.feat_caughtStealingPitching_dict[row[0]][season][row[1]] += row[63]
                self.feat_stolenBasesPitching_dict[row[0]][season][row[1]] += row[64]
                self.feat_outsPitching_dict[row[0]][season][row[1]] += row[65]
                self.feat_balls_dict[row[0]][season][row[1]] += row[66]
                self.feat_strikes_dict[row[0]][season][row[1]] += row[67]
                self.feat_hitBatsmen_dict[row[0]][season][row[1]] += row[68]
                self.feat_pickoffsPitching_dict[row[0]][season][row[1]] += row[69]
                self.feat_catchersInterferencePitching_dict[row[0]][season][row[1]] += row[70]
                self.feat_assists_dict[row[0]][season][row[1]] += row[71]
                self.feat_putOuts_dict[row[0]][season][row[1]] += row[72]
                self.feat_errors_dict[row[0]][season][row[1]] += row[73]
                self.feat_chances_dict[row[0]][season][row[1]] += row[74]
                self.feat_gamesFinishedPitching_dict[row[0]][season][row[1]] += row[75]
                self.feat_groundOutsPitching_dict[row[0]][season][row[1]] += row[76]
                
                self.feat_home_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_home_cum_dict, row[2])
                self.feat_gamesPlayedBatting_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_gamesPlayedBatting_cum_dict, row[5])
                self.feat_plateAppearances_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_plateAppearances_cum_dict, row[6])
                self.feat_atBats_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_atBats_cum_dict, row[7])
                self.feat_hits_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_hits_cum_dict, row[8])
                self.feat_doubles_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_doubles_cum_dict, row[9])
                self.feat_triples_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_triples_cum_dict, row[10])
                self.feat_homeRuns_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_homeRuns_cum_dict, row[11])
                self.feat_rbi_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_rbi_cum_dict, row[12])
                self.feat_runsScored_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_runsScored_cum_dict, row[13])
                self.feat_totalBases_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_totalBases_cum_dict, row[14])
                self.feat_baseOnBalls_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_baseOnBalls_cum_dict, row[15])
                self.feat_hitByPitch_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_hitByPitch_cum_dict, row[16])
                self.feat_intentionalWalks_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_intentionalWalks_cum_dict, row[17])
                self.feat_groundIntoDoublePlay_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_groundIntoDoublePlay_cum_dict, row[18])
                self.feat_groundIntoTriplePlay_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_groundIntoTriplePlay_cum_dict, row[19])
                self.feat_strikeOuts_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_strikeOuts_cum_dict, row[20])
                self.feat_sacBunts_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_sacBunts_cum_dict, row[21])
                self.feat_sacFlies_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_sacFlies_cum_dict, row[22])
                self.feat_caughtStealing_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_caughtStealing_cum_dict, row[23])
                self.feat_stolenBases_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_stolenBases_cum_dict, row[24])
                self.feat_leftOnBase_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_leftOnBase_cum_dict, row[25])
                self.feat_catchersInterference_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_catchersInterference_cum_dict, row[26])
                self.feat_pickoffs_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_pickoffs_cum_dict, row[27])
                self.feat_flyOuts_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_flyOuts_cum_dict, row[28])
                self.feat_groundOuts_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_groundOuts_cum_dict, row[29])

                self.feat_gamesPlayedPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_gamesPlayedPitching_cum_dict, row[30])
                self.feat_gamesStartedPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_gamesStartedPitching_cum_dict, row[31])
                self.feat_winsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_winsPitching_cum_dict, row[32])
                self.feat_lossesPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_lossesPitching_cum_dict, row[33])
                self.feat_completeGamesPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_completeGamesPitching_cum_dict, row[34])
                self.feat_shutoutsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_shutoutsPitching_cum_dict, row[35])
                self.feat_saves_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_saves_cum_dict, row[36])
                self.feat_saveOpportunities_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_saveOpportunities_cum_dict, row[37])
                self.feat_blownSaves_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_blownSaves_cum_dict, row[38])
                self.feat_holds_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_holds_cum_dict, row[39])
                self.feat_inningsPitched_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_inningsPitched_cum_dict, row[40])
                self.feat_runsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_runsPitching_cum_dict, row[41])
                self.feat_earnedRuns_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_earnedRuns_cum_dict, row[42])
                self.feat_pitchesThrown_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_pitchesThrown_cum_dict, row[43])
                self.feat_hitsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_hitsPitching_cum_dict, row[44])
                self.feat_homeRunsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_homeRunsPitching_cum_dict, row[45])
                self.feat_strikeOutsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_strikeOutsPitching_cum_dict, row[46])
                self.feat_baseOnBallsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_strikeOutsPitching_cum_dict, row[47])
                self.feat_hitByPitchPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_hitByPitchPitching_cum_dict, row[48])
                self.feat_intentionalWalksPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_intentionalWalksPitching_cum_dict, row[49])
                self.feat_balks_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_balks_cum_dict, row[50])
                self.feat_wildPitches_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_wildPitches_cum_dict, row[51])
                self.feat_atBatsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_atBatsPitching_cum_dict, row[52])
                self.feat_battersFaced_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_battersFaced_cum_dict, row[53])
                self.feat_sacBuntsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_sacBuntsPitching_cum_dict, row[54])
                self.feat_sacFliesPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_sacFliesPitching_cum_dict, row[55])
                self.feat_inheritedRunners_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_inheritedRunners_cum_dict, row[56])
                self.feat_inheritedRunnersScored_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_inheritedRunnersScored_cum_dict, row[57])
                self.feat_rbiPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_rbiPitching_cum_dict, row[58])
                self.feat_flyOutsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_flyOutsPitching_cum_dict, row[59])
                self.feat_airOutsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_airOutsPitching_cum_dict, row[60])
                self.feat_doublesPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_doublesPitching_cum_dict, row[61])
                self.feat_triplesPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_triplesPitching_cum_dict, row[62])
                self.feat_caughtStealingPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_caughtStealingPitching_cum_dict, row[63])
                self.feat_stolenBasesPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_stolenBasesPitching_cum_dict, row[64])
                self.feat_outsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_outsPitching_cum_dict, row[65])
                self.feat_balls_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_balls_cum_dict, row[66])
                self.feat_strikes_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_strikes_cum_dict, row[67])
                self.feat_hitBatsmen_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_hitBatsmen_cum_dict, row[68])
                self.feat_pickoffsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_pickoffsPitching_cum_dict, row[69])
                self.feat_catchersInterferencePitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_catchersInterferencePitching_cum_dict, row[70])
                self.feat_assists_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_assists_cum_dict, row[71])
                self.feat_putOuts_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_putOuts_cum_dict, row[72])
                self.feat_errors_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_errors_cum_dict, row[73])
                self.feat_chances_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_chances_cum_dict, row[74])
                self.feat_gamesFinishedPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_gamesFinishedPitching_cum_dict, row[75])
                self.feat_groundOutsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_groundOutsPitching_cum_dict, row[76])

        if only_fit == False:
            return self.transform(
                input_df,
                target_df,
                rosters_df,
                playerBoxScores_df
            )

    def transform(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame
    ):
        date = np.zeros(len(input_df), dtype=np.int32)
        playerid = np.zeros(len(input_df), dtype=np.int32)
        home = np.zeros(len(input_df), dtype=np.float32)
        home_cum = np.zeros(len(input_df), dtype=np.float32)
        positionCode = np.zeros(len(input_df), dtype=np.float32)
        battingOrder = np.zeros(len(input_df), dtype=np.float32)
        # batting
        gamesPlayedBatting = np.zeros(len(input_df), dtype=np.float32)
        gamesPlayedBatting_cum = np.zeros(len(input_df), dtype=np.float32)
        plateAppearances = np.zeros(len(input_df), dtype=np.float32)
        plateAppearances_cum = np.zeros(len(input_df), dtype=np.float32)
        atBats = np.zeros(len(input_df), dtype=np.float32)
        atBats_cum = np.zeros(len(input_df), dtype=np.float32)
        hits = np.zeros(len(input_df), dtype=np.float32)
        hits_cum = np.zeros(len(input_df), dtype=np.float32)
        doubles = np.zeros(len(input_df), dtype=np.float32)
        doubles_cum = np.zeros(len(input_df), dtype=np.float32)
        triples = np.zeros(len(input_df), dtype=np.float32)
        triples_cum = np.zeros(len(input_df), dtype=np.float32)
        homeRuns = np.zeros(len(input_df), dtype=np.float32)
        homeRuns_cum = np.zeros(len(input_df), dtype=np.float32)
        rbi = np.zeros(len(input_df), dtype=np.float32)
        rbi_cum = np.zeros(len(input_df), dtype=np.float32)
        runsScored = np.zeros(len(input_df), dtype=np.float32)
        runsScored_cum = np.zeros(len(input_df), dtype=np.float32)
        totalBases = np.zeros(len(input_df), dtype=np.float32)
        totalBases_cum = np.zeros(len(input_df), dtype=np.float32)
        baseOnBalls = np.zeros(len(input_df), dtype=np.float32)
        baseOnBalls_cum = np.zeros(len(input_df), dtype=np.float32)
        hitByPitch = np.zeros(len(input_df), dtype=np.float32)
        hitByPitch_cum = np.zeros(len(input_df), dtype=np.float32)
        intentionalWalks = np.zeros(len(input_df), dtype=np.float32)
        intentionalWalks_cum = np.zeros(len(input_df), dtype=np.float32)
        groundIntoDoublePlay = np.zeros(len(input_df), dtype=np.float32)
        groundIntoDoublePlay_cum = np.zeros(len(input_df), dtype=np.float32)
        groundIntoTriplePlay = np.zeros(len(input_df), dtype=np.float32)
        groundIntoTriplePlay_cum = np.zeros(len(input_df), dtype=np.float32)
        strikeOuts = np.zeros(len(input_df), dtype=np.float32)
        strikeOuts_cum = np.zeros(len(input_df), dtype=np.float32)
        sacBunts = np.zeros(len(input_df), dtype=np.float32)
        sacBunts_cum = np.zeros(len(input_df), dtype=np.float32)
        sacFlies = np.zeros(len(input_df), dtype=np.float32)
        sacFlies_cum = np.zeros(len(input_df), dtype=np.float32)
        caughtStealing = np.zeros(len(input_df), dtype=np.float32)
        caughtStealing_cum = np.zeros(len(input_df), dtype=np.float32)
        stolenBases = np.zeros(len(input_df), dtype=np.float32)
        stolenBases_cum = np.zeros(len(input_df), dtype=np.float32)
        leftOnBase = np.zeros(len(input_df), dtype=np.float32)
        leftOnBase_cum = np.zeros(len(input_df), dtype=np.float32)
        catchersInterference = np.zeros(len(input_df), dtype=np.float32)
        catchersInterference_cum = np.zeros(len(input_df), dtype=np.float32)
        pickoffs = np.zeros(len(input_df), dtype=np.float32)
        pickoffs_cum = np.zeros(len(input_df), dtype=np.float32)
        flyOuts = np.zeros(len(input_df), dtype=np.float32)
        flyOuts_cum = np.zeros(len(input_df), dtype=np.float32)
        groundOuts = np.zeros(len(input_df), dtype=np.float32)
        groundOuts_cum = np.zeros(len(input_df), dtype=np.float32)
        # pitching
        gamesPlayedPitching = np.zeros(len(input_df), dtype=np.float32)
        gamesPlayedPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        gamesStartedPitching = np.zeros(len(input_df), dtype=np.float32)
        gamesStartedPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        winsPitching = np.zeros(len(input_df), dtype=np.float32)
        winsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        lossesPitching = np.zeros(len(input_df), dtype=np.float32)
        lossesPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        completeGamesPitching = np.zeros(len(input_df), dtype=np.float32)
        completeGamesPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        shutoutsPitching = np.zeros(len(input_df), dtype=np.float32)
        shutoutsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        saves = np.zeros(len(input_df), dtype=np.float32)
        saves_cum = np.zeros(len(input_df), dtype=np.float32)
        saveOpportunities = np.zeros(len(input_df), dtype=np.float32)
        saveOpportunities_cum = np.zeros(len(input_df), dtype=np.float32)
        blownSaves = np.zeros(len(input_df), dtype=np.float32)
        blownSaves_cum = np.zeros(len(input_df), dtype=np.float32)
        holds = np.zeros(len(input_df), dtype=np.float32)
        holds_cum = np.zeros(len(input_df), dtype=np.float32)
        inningsPitched = np.zeros(len(input_df), dtype=np.float32)
        inningsPitched_cum = np.zeros(len(input_df), dtype=np.float32)
        runsPitching = np.zeros(len(input_df), dtype=np.float32)
        runsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        earnedRuns = np.zeros(len(input_df), dtype=np.float32)
        earnedRuns_cum = np.zeros(len(input_df), dtype=np.float32)
        pitchesThrown = np.zeros(len(input_df), dtype=np.float32)
        pitchesThrown_cum = np.zeros(len(input_df), dtype=np.float32)
        hitsPitching = np.zeros(len(input_df), dtype=np.float32)
        hitsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        homeRunsPitching = np.zeros(len(input_df), dtype=np.float32)
        homeRunsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        strikeOutsPitching = np.zeros(len(input_df), dtype=np.float32)
        strikeOutsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        baseOnBallsPitching = np.zeros(len(input_df), dtype=np.float32)
        baseOnBallsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        hitByPitchPitching = np.zeros(len(input_df), dtype=np.float32)
        hitByPitchPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        intentionalWalksPitching = np.zeros(len(input_df), dtype=np.float32)
        intentionalWalksPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        balks = np.zeros(len(input_df), dtype=np.float32)
        balks_cum = np.zeros(len(input_df), dtype=np.float32)
        wildPitches = np.zeros(len(input_df), dtype=np.float32)
        wildPitches_cum = np.zeros(len(input_df), dtype=np.float32)
        atBatsPitching = np.zeros(len(input_df), dtype=np.float32)
        atBatsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        battersFaced = np.zeros(len(input_df), dtype=np.float32)
        battersFaced_cum = np.zeros(len(input_df), dtype=np.float32)
        sacBuntsPitching = np.zeros(len(input_df), dtype=np.float32)
        sacBuntsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        sacFliesPitching = np.zeros(len(input_df), dtype=np.float32)
        sacFliesPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        inheritedRunners = np.zeros(len(input_df), dtype=np.float32)
        inheritedRunners_cum = np.zeros(len(input_df), dtype=np.float32)
        inheritedRunnersScored = np.zeros(len(input_df), dtype=np.float32)
        inheritedRunnersScored_cum = np.zeros(len(input_df), dtype=np.float32)
        rbiPitching = np.zeros(len(input_df), dtype=np.float32)
        rbiPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        flyOutsPitching = np.zeros(len(input_df), dtype=np.float32)
        flyOutsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        airOutsPitching = np.zeros(len(input_df), dtype=np.float32)
        airOutsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        doublesPitching = np.zeros(len(input_df), dtype=np.float32)
        doublesPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        triplesPitching = np.zeros(len(input_df), dtype=np.float32)
        triplesPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        caughtStealingPitching = np.zeros(len(input_df), dtype=np.float32)
        caughtStealingPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        stolenBasesPitching = np.zeros(len(input_df), dtype=np.float32)
        stolenBasesPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        outsPitching = np.zeros(len(input_df), dtype=np.float32)
        outsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        balls = np.zeros(len(input_df), dtype=np.float32)
        balls_cum = np.zeros(len(input_df), dtype=np.float32)
        strikes = np.zeros(len(input_df), dtype=np.float32)
        strikes_cum = np.zeros(len(input_df), dtype=np.float32)
        hitBatsmen = np.zeros(len(input_df), dtype=np.float32)
        hitBatsmen_cum = np.zeros(len(input_df), dtype=np.float32)
        pickoffsPitching = np.zeros(len(input_df), dtype=np.float32)
        pickoffsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        catchersInterferencePitching = np.zeros(len(input_df), dtype=np.float32)
        catchersInterferencePitching_cum = np.zeros(len(input_df), dtype=np.float32)
        assists = np.zeros(len(input_df), dtype=np.float32)
        assists_cum = np.zeros(len(input_df), dtype=np.float32)
        putOuts = np.zeros(len(input_df), dtype=np.float32)
        putOuts_cum = np.zeros(len(input_df), dtype=np.float32)
        errors = np.zeros(len(input_df), dtype=np.float32)
        errors_cum = np.zeros(len(input_df), dtype=np.float32)
        chances = np.zeros(len(input_df), dtype=np.float32)
        chances_cum = np.zeros(len(input_df), dtype=np.float32)
        gamesFinishedPitching = np.zeros(len(input_df), dtype=np.float32)
        gamesFinishedPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        groundOutsPitching = np.zeros(len(input_df), dtype=np.float32)
        groundOutsPitching_cum = np.zeros(len(input_df), dtype=np.float32)

        for i, row in enumerate(tqdm(input_df[["playerId", "date"]].values)):
            season = int(str(row[1])[:4])
            playerid[i] = row[0]
            date[i] = row[1]
            home[i] = self.extract_score(row[0], row[1], season, self.feat_home_dict)
            positionCode[i] = self.extract_score(row[0], row[1], season, self.feat_positionCode_dict)
            battingOrder[i] = self.extract_score(row[0], row[1], season, self.feat_battingOrder_dict)
            gamesPlayedBatting[i] = self.extract_score(row[0], row[1], season, self.feat_gamesPlayedBatting_dict)
            plateAppearances[i] = self.extract_score(row[0], row[1], season, self.feat_plateAppearances_dict)
            atBats[i] = self.extract_score(row[0], row[1], season, self.feat_atBats_dict)
            hits[i] = self.extract_score(row[0], row[1], season, self.feat_hits_dict)
            doubles[i] = self.extract_score(row[0], row[1], season, self.feat_doubles_dict)
            triples[i] = self.extract_score(row[0], row[1], season, self.feat_triples_dict)
            homeRuns[i] = self.extract_score(row[0], row[1], season, self.feat_homeRuns_dict)
            rbi[i] = self.extract_score(row[0], row[1], season, self.feat_rbi_dict)
            runsScored[i] = self.extract_score(row[0], row[1], season, self.feat_runsScored_dict)
            totalBases[i] = self.extract_score(row[0], row[1], season, self.feat_totalBases_dict)
            baseOnBalls[i] = self.extract_score(row[0], row[1], season, self.feat_baseOnBalls_dict)
            hitByPitch[i] = self.extract_score(row[0], row[1], season, self.feat_hitByPitch_dict)
            intentionalWalks[i] = self.extract_score(row[0], row[1], season, self.feat_intentionalWalks_dict)
            groundIntoDoublePlay[i] = self.extract_score(row[0], row[1], season, self.feat_groundIntoDoublePlay_dict)
            groundIntoTriplePlay[i] = self.extract_score(row[0], row[1], season, self.feat_groundIntoTriplePlay_dict)
            strikeOuts[i] = self.extract_score(row[0], row[1], season, self.feat_strikeOuts_dict)
            sacBunts[i] = self.extract_score(row[0], row[1], season, self.feat_sacBunts_dict)
            sacFlies[i] = self.extract_score(row[0], row[1], season, self.feat_sacFlies_dict)
            caughtStealing[i] = self.extract_score(row[0], row[1], season, self.feat_caughtStealing_dict)
            stolenBases[i] = self.extract_score(row[0], row[1], season, self.feat_stolenBases_dict)
            leftOnBase[i] = self.extract_score(row[0], row[1], season, self.feat_leftOnBase_dict)
            catchersInterference[i] = self.extract_score(row[0], row[1], season, self.feat_catchersInterference_dict)
            pickoffs[i] = self.extract_score(row[0], row[1], season, self.feat_pickoffs_dict)
            flyOuts[i] = self.extract_score(row[0], row[1], season, self.feat_flyOuts_dict)
            groundOuts[i] = self.extract_score(row[0], row[1], season, self.feat_groundOuts_dict)

            gamesPlayedPitching[i] = self.extract_score(row[0], row[1], season, self.feat_gamesPlayedPitching_dict)
            gamesStartedPitching[i] = self.extract_score(row[0], row[1], season, self.feat_gamesStartedPitching_dict)
            winsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_winsPitching_dict)
            lossesPitching[i] = self.extract_score(row[0], row[1], season, self.feat_lossesPitching_dict)
            completeGamesPitching[i] = self.extract_score(row[0], row[1], season, self.feat_completeGamesPitching_dict)
            shutoutsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_shutoutsPitching_dict)
            saves[i] = self.extract_score(row[0], row[1], season, self.feat_saves_dict)
            saveOpportunities[i] = self.extract_score(row[0], row[1], season, self.feat_saveOpportunities_dict)
            blownSaves[i] = self.extract_score(row[0], row[1], season, self.feat_blownSaves_dict)
            holds[i] = self.extract_score(row[0], row[1], season, self.feat_holds_dict)
            inningsPitched[i] = self.extract_score(row[0], row[1], season, self.feat_inningsPitched_dict)
            runsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_runsPitching_dict)
            earnedRuns[i] = self.extract_score(row[0], row[1], season, self.feat_earnedRuns_dict)
            pitchesThrown[i] = self.extract_score(row[0], row[1], season, self.feat_pitchesThrown_dict)
            hitsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_hitsPitching_dict)
            homeRunsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_homeRunsPitching_dict)
            strikeOutsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_strikeOutsPitching_dict)
            baseOnBallsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_baseOnBallsPitching_dict)
            hitByPitchPitching[i] = self.extract_score(row[0], row[1], season, self.feat_hitByPitchPitching_dict)
            intentionalWalksPitching[i] = self.extract_score(row[0], row[1], season, self.feat_intentionalWalksPitching_dict)
            balks[i] = self.extract_score(row[0], row[1], season, self.feat_balks_dict)
            wildPitches[i] = self.extract_score(row[0], row[1], season, self.feat_wildPitches_dict)
            atBatsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_atBatsPitching_dict)
            battersFaced[i] = self.extract_score(row[0], row[1], season, self.feat_battersFaced_dict)
            sacBuntsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_sacBuntsPitching_dict)
            sacFliesPitching[i] = self.extract_score(row[0], row[1], season, self.feat_sacFliesPitching_dict)
            inheritedRunners[i] = self.extract_score(row[0], row[1], season, self.feat_inheritedRunners_dict)
            inheritedRunnersScored[i] = self.extract_score(row[0], row[1], season, self.feat_inheritedRunnersScored_dict)
            rbiPitching[i] = self.extract_score(row[0], row[1], season, self.feat_rbiPitching_dict)
            flyOutsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_flyOutsPitching_dict)
            airOutsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_airOutsPitching_dict)
            doublesPitching[i] = self.extract_score(row[0], row[1], season, self.feat_doublesPitching_dict)
            triplesPitching[i] = self.extract_score(row[0], row[1], season, self.feat_triplesPitching_dict)
            caughtStealingPitching[i] = self.extract_score(row[0], row[1], season, self.feat_caughtStealingPitching_dict)
            stolenBasesPitching[i] = self.extract_score(row[0], row[1], season, self.feat_stolenBasesPitching_dict)
            outsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_outsPitching_dict)
            balls[i] = self.extract_score(row[0], row[1], season, self.feat_balls_dict)
            strikes[i] = self.extract_score(row[0], row[1], season, self.feat_strikes_dict)
            hitBatsmen[i] = self.extract_score(row[0], row[1], season, self.feat_hitBatsmen_dict)
            pickoffsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_pickoffsPitching_dict)
            catchersInterferencePitching[i] = self.extract_score(row[0], row[1], season, self.feat_catchersInterferencePitching_dict)
            assists[i] = self.extract_score(row[0], row[1], season, self.feat_assists_dict)
            putOuts[i] = self.extract_score(row[0], row[1], season, self.feat_putOuts_dict)
            errors[i] = self.extract_score(row[0], row[1], season, self.feat_errors_dict)
            chances[i] = self.extract_score(row[0], row[1], season, self.feat_chances_dict)
            gamesFinishedPitching[i] = self.extract_score(row[0], row[1], season, self.feat_gamesFinishedPitching_dict)
            groundOutsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_groundOutsPitching_dict)

            home_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_home_cum_dict)
            gamesPlayedBatting_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_gamesPlayedBatting_cum_dict)
            plateAppearances_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_plateAppearances_cum_dict)
            atBats_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_atBats_cum_dict)
            hits_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_hits_cum_dict)
            doubles_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_doubles_cum_dict)
            triples_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_triples_cum_dict)
            homeRuns_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_homeRuns_cum_dict)
            rbi_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_rbi_cum_dict)
            runsScored_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_runsScored_cum_dict)
            totalBases_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_totalBases_cum_dict)
            baseOnBalls_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_baseOnBalls_cum_dict)
            hitByPitch_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_hitByPitch_cum_dict)
            intentionalWalks_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_intentionalWalks_cum_dict)
            groundIntoDoublePlay_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_groundIntoDoublePlay_cum_dict)
            groundIntoTriplePlay_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_groundIntoTriplePlay_cum_dict)
            strikeOuts_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_strikeOuts_cum_dict)
            sacBunts_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_sacBunts_cum_dict)
            sacFlies_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_sacFlies_cum_dict)
            caughtStealing_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_caughtStealing_cum_dict)
            stolenBases_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_stolenBases_cum_dict)
            leftOnBase_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_leftOnBase_cum_dict)
            catchersInterference_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_catchersInterference_cum_dict)
            pickoffs_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_pickoffs_cum_dict)
            flyOuts_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_flyOuts_cum_dict)
            groundOuts_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_groundOuts_cum_dict)

            gamesPlayedPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_gamesPlayedPitching_cum_dict)
            gamesStartedPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_gamesStartedPitching_cum_dict)
            winsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_winsPitching_cum_dict)
            lossesPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_lossesPitching_cum_dict)
            completeGamesPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_completeGamesPitching_cum_dict)
            shutoutsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_shutoutsPitching_cum_dict)
            saves_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_saves_cum_dict)
            saveOpportunities_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_saveOpportunities_cum_dict)
            blownSaves_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_blownSaves_cum_dict)
            holds_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_holds_cum_dict)
            inningsPitched_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_inningsPitched_cum_dict)
            runsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_runsPitching_cum_dict)
            earnedRuns_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_earnedRuns_cum_dict)
            pitchesThrown_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_pitchesThrown_cum_dict)
            hitsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_hitsPitching_cum_dict)
            homeRunsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_homeRunsPitching_cum_dict)
            strikeOutsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_strikeOutsPitching_cum_dict)
            baseOnBallsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_baseOnBallsPitching_cum_dict)
            hitByPitchPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_hitByPitchPitching_cum_dict)
            intentionalWalksPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_intentionalWalksPitching_cum_dict)
            balks_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_balks_cum_dict)
            wildPitches_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_wildPitches_cum_dict)
            atBatsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_atBatsPitching_cum_dict)
            battersFaced_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_battersFaced_cum_dict)
            sacBuntsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_sacBuntsPitching_cum_dict)
            sacFliesPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_sacFliesPitching_cum_dict)
            inheritedRunners_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_inheritedRunners_cum_dict)
            inheritedRunnersScored_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_inheritedRunnersScored_cum_dict)
            rbiPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_rbiPitching_cum_dict)
            flyOutsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_flyOutsPitching_cum_dict)
            airOutsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_airOutsPitching_cum_dict)
            doublesPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_doublesPitching_cum_dict)
            triplesPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_triplesPitching_cum_dict)
            caughtStealingPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_caughtStealingPitching_cum_dict)
            stolenBasesPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_stolenBasesPitching_cum_dict)
            outsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_outsPitching_cum_dict)
            balls_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_balls_cum_dict)
            strikes_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_strikes_cum_dict)
            hitBatsmen_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_hitBatsmen_cum_dict)
            pickoffsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_pickoffsPitching_cum_dict)
            catchersInterferencePitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_catchersInterferencePitching_cum_dict)
            assists_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_assists_cum_dict)
            putOuts_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_putOuts_cum_dict)
            errors_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_errors_cum_dict)
            chances_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_chances_cum_dict)
            gamesFinishedPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_gamesFinishedPitching_cum_dict)
            groundOutsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_groundOutsPitching_cum_dict)

        output_df = pd.DataFrame({
            "date": date,
            "home": home, 
            "home_cum": home_cum,
            "positionCode": positionCode,
            "battingOrder": battingOrder,
            "gamesPlayedBatting": gamesPlayedBatting,
            "gamesPlayedBatting_cum": gamesPlayedBatting_cum,
            "plateAppearances": plateAppearances,
            "plateAppearances_cum": plateAppearances_cum,
            "atBats": atBats,
            "atBats_cum": atBats_cum,
            "hits": hits,
            "hits_cum": hits_cum,
            "doubles": doubles,
            "doubles_cum": doubles_cum,
            "triples": triples,
            "triples_cum": triples_cum,
            "homeRuns": homeRuns,
            "homeRuns_cum": homeRuns_cum,
            "rbi": rbi,
            "rbi_cum": rbi_cum,
            "runsScored": runsScored,
            "runsScored_cum": runsScored_cum,
            "totalBases": totalBases,
            "totalBases_cum": totalBases_cum,
            "baseOnBalls": baseOnBalls,
            "baseOnBalls_cum": baseOnBalls_cum,
            "hitByPitch": hitByPitch,
            "hitByPitch_cum": hitByPitch_cum,
            "intentionalWalks": intentionalWalks,
            "intentionalWalks_cum": intentionalWalks_cum,
            "groundIntoDoublePlay": groundIntoDoublePlay,
            "groundIntoDoublePlay_cum": groundIntoDoublePlay_cum,
            "groundIntoTriplePlay": groundIntoTriplePlay,
            "groundIntoTriplePlay_cum": groundIntoTriplePlay_cum,
            "strikeOuts": strikeOuts,
            "strikeOuts_cum": strikeOuts_cum,
            "sacBunts": sacBunts,
            "sacBunts_cum": sacBunts_cum,
            "sacFlies": sacFlies,
            "sacFlies_cum": sacFlies_cum,
            "caughtStealing": caughtStealing,
            "caughtStealing_cum": caughtStealing_cum,
            "stolenBases": stolenBases,
            "stolenBases_cum": stolenBases_cum,
            "leftOnBase": leftOnBase,
            "leftOnBase_cum": leftOnBase_cum,
            "catchersInterference": catchersInterference,
            "catchersInterference_cum": catchersInterference_cum,
            "pickoffs": pickoffs,
            "pickoffs_cum": pickoffs_cum,
            "flyOuts": flyOuts,
            "flyOuts_cum": flyOuts_cum,
            "groundOuts": groundOuts,
            "groundOuts_cum": groundOuts_cum,
            "gamesPlayedPitching": gamesPlayedPitching,
            "gamesPlayedPitching_cum": gamesPlayedPitching_cum,
            "gamesStartedPitching": gamesStartedPitching,
            "gamesStartedPitching_cum": gamesStartedPitching_cum,
            "winsPitching": winsPitching,
            "winsPitching_cum": winsPitching_cum,
            "lossesPitching": lossesPitching,
            "lossesPitching_cum": lossesPitching_cum,
            "completeGamesPitching": completeGamesPitching,
            "completeGamesPitching_cum": completeGamesPitching_cum,
            "shutoutsPitching": shutoutsPitching,
            "shutoutsPitching_cum": shutoutsPitching_cum,
            "saves": saves,
            "saves_cum": saves_cum,
            "saveOpportunities": saveOpportunities,
            "saveOpportunities_cum": saveOpportunities_cum,
            "blownSaves": blownSaves,
            "blownSaves_cum": blownSaves_cum,
            "holds": holds,
            "holds_cum": holds_cum,
            "inningsPitched": inningsPitched,
            "inningsPitched_cum": inningsPitched_cum,
            "runsPitching": runsPitching,
            "runsPitching_cum": runsPitching_cum,
            "earnedRuns": earnedRuns,
            "earnedRuns_cum": earnedRuns_cum,
            "pitchesThrown": pitchesThrown,
            "pitchesThrown_cum": pitchesThrown_cum,
            "hitsPitching": hitsPitching,
            "hitsPitching_cum": hitsPitching_cum,
            "homeRunsPitching": homeRunsPitching,
            "homeRunsPitching_cum": homeRunsPitching_cum,
            "strikeOutsPitching": strikeOutsPitching,
            "strikeOutsPitching_cum": strikeOutsPitching_cum,
            "baseOnBallsPitching": baseOnBallsPitching,
            "baseOnBallsPitching_cum": baseOnBallsPitching_cum,
            "hitByPitchPitching": hitByPitchPitching,
            "hitByPitchPitching_cum": hitByPitchPitching_cum,
            "intentionalWalksPitching": intentionalWalksPitching,
            "intentionalWalksPitching_cum": intentionalWalksPitching_cum,
            "balks": balks,
            "balks_cum": balks_cum,
            "wildPitches": wildPitches,
            "wildPitches_cum": wildPitches_cum,
            "atBatsPitching": atBatsPitching,
            "atBatsPitching_cum": atBatsPitching_cum,
            "battersFaced": battersFaced,
            "battersFaced_cum": battersFaced_cum,
            "sacBuntsPitching": sacBuntsPitching,
            "sacBuntsPitching_cum": sacBuntsPitching_cum,
            "sacFliesPitching": sacFliesPitching,
            "sacFliesPitching_cum": sacFliesPitching_cum,
            "inheritedRunners": inheritedRunners,
            "inheritedRunners_cum": inheritedRunners_cum,
            "inheritedRunnersScored": inheritedRunnersScored,
            "inheritedRunnersScored_cum": inheritedRunnersScored_cum,
            "rbiPitching": rbiPitching,
            "rbiPitching_cum": rbiPitching_cum,
            "flyOutsPitching": flyOutsPitching,
            "flyOutsPitching_cum": flyOutsPitching_cum,
            "airOutsPitching": airOutsPitching,
            "airOutsPitching_cum": airOutsPitching_cum,
            "doublesPitching": doublesPitching,
            "doublesPitching_cum": doublesPitching_cum,
            "triplesPitching": triplesPitching,
            "triplesPitching_cum": triplesPitching_cum,
            "caughtStealingPitching": caughtStealingPitching,
            "caughtStealingPitching_cum": caughtStealingPitching_cum,
            "stolenBasesPitching": stolenBasesPitching,
            "stolenBasesPitching_cum": stolenBasesPitching_cum,
            "outsPitching": outsPitching,
            "outsPitching_cum": outsPitching_cum,
            "balls": balls,
            "balls_cum": balls_cum,
            "strikes": strikes,
            "strikes_cum": strikes_cum,
            "hitBatsmen": hitBatsmen,
            "hitBatsmen_cum": hitBatsmen_cum,
            "pickoffsPitching": pickoffsPitching,
            "pickoffsPitching_cum": pickoffsPitching_cum,
            "catchersInterferencePitching": catchersInterferencePitching,
            "catchersInterferencePitching_cum": catchersInterferencePitching_cum,
            "assists": assists,
            "assists_cum": assists_cum,
            "putOuts": putOuts,
            "putOuts_cum": putOuts_cum,
            "errors": errors,
            "errors_cum": errors_cum,
            "chances": chances,
            "chances_cum": chances_cum,
            "gamesFinishedPitching": gamesFinishedPitching,
            "gamesFinishedPitching_cum": gamesFinishedPitching_cum,
            "groundOutsPitching": groundOutsPitching,
            "groundOutsPitching_cum": groundOutsPitching_cum,
        })

        # 打撃指標
        output_df["battingAverage"] = output_df["hits_cum"] / output_df["atBats_cum"] # 打率
        output_df["sluggingPercentage"] = output_df["totalBases_cum"] / output_df["atBats_cum"] # 長打率
        output_df["onBasePercentage"] = (output_df["hits_cum"] + output_df["baseOnBalls_cum"] + output_df["hitByPitch_cum"])/(output_df["atBats_cum"] + output_df["baseOnBalls_cum"] + output_df["hitByPitch_cum"] + output_df["sacFlies_cum"]) # 出塁率
        output_df["ops"] = output_df["sluggingPercentage"] + output_df["onBasePercentage"] # OPS
        output_df["isop"] = output_df["sluggingPercentage"] - output_df["battingAverage"] # IsoP
        output_df["isod"] = output_df["onBasePercentage"] - output_df["battingAverage"] # IsoD
        output_df["rc"] = (output_df["hits_cum"] + output_df["baseOnBalls_cum"]) * output_df["totalBases_cum"] / (output_df["atBats_cum"] + output_df["baseOnBalls_cum"]) # RC
        output_df["rc27"] = output_df["rc"] / (output_df["atBats_cum"] - output_df["hits_cum"] + output_df["caughtStealing_cum"] + output_df["sacBunts_cum"] + output_df["sacFlies_cum"] + output_df["groundIntoDoublePlay_cum"]) * 27 # RC27
        output_df["rc27"] = output_df["rc27"].replace({np.inf: 0})
        output_df["babip"] = (output_df["hits_cum"] - output_df["homeRuns_cum"]) / (output_df["atBats_cum"] - output_df["strikeOuts_cum"] - output_df["homeRuns_cum"] + output_df["sacFlies_cum"]) # BABIP
        output_df["bb_k"] = output_df["baseOnBalls_cum"] / output_df["strikeOuts_cum"] # BB/K
        output_df["k%"] = output_df["strikeOuts_cum"] / output_df["plateAppearances_cum"] # K%

        # 投手指標
        output_df["qs"] = ((output_df["inningsPitched"] >= 6) & (output_df["runsPitching"] <= 3)) * output_df["gamesStartedPitching"]
        output_df["hqs"] = ((output_df["inningsPitched"] >= 7) & (output_df["runsPitching"] <= 2)) * output_df["gamesStartedPitching"]
        output_df["era"] = output_df["earnedRuns_cum"] * 9 / output_df["inningsPitched_cum"] # 防御率
        output_df["wp"] = output_df["winsPitching_cum"] / (output_df["winsPitching_cum"] + output_df["lossesPitching_cum"]) # 勝率
        output_df["k9"] = output_df["strikeOutsPitching_cum"] * 9 / output_df["inningsPitched_cum"] # K/9(奪三振率)
        output_df["hits_allowed_average"] = output_df["hitsPitching_cum"] / output_df["atBatsPitching_cum"] # 被打率
        output_df["hp"] = output_df["holds_cum"] + output_df["winsPitching_cum"] # HP
        # output_df["qs%"] = output_df["qs_cum"] / output_df["gamesStartedPitching_cum"] # QS率
        # output_df["hqs%"] = output_df["hqs_cum"] / output_df["gamesStartedPitching_cum"] # HQS率
        output_df["k%_pitching"] = output_df["strikeOutsPitching_cum"] / output_df["battersFaced_cum"] # K%
        output_df["bb9"] = output_df["baseOnBallsPitching_cum"] * 9 / output_df["inningsPitched_cum"] # BB/9
        output_df["bb%"] = output_df["baseOnBallsPitching_cum"] / output_df["battersFaced_cum"] # BB%
        output_df["k_bb"] = output_df["strikeOutsPitching_cum"] / (output_df["baseOnBallsPitching_cum"] + output_df["hitByPitchPitching_cum"]) # K/BB
        output_df["hr9"] = output_df["homeRunsPitching_cum"] * 9 / output_df["inningsPitched_cum"] # HR/9
        output_df["babip_pitching"] = (output_df["hitsPitching_cum"] - output_df["homeRunsPitching_cum"]) / (output_df["atBatsPitching_cum"] - output_df["strikeOutsPitching_cum"] - output_df["homeRunsPitching_cum"] + output_df["sacFliesPitching_cum"]) # BABIP
        output_df["whip"] = (output_df["baseOnBallsPitching_cum"] + output_df["hitsPitching_cum"]) / output_df["inningsPitched_cum"] # WHIP

        # ランキング
        output_df["homeRuns_rank"] = output_df.groupby(["date"])["homeRuns_cum"].rank(ascending=False, method="min")
        output_df["hits_rank"] = output_df.groupby(["date"])["hits_cum"].rank(ascending=False, method="min")
        output_df["rbi_rank"] = output_df.groupby(["date"])["rbi_cum"].rank(ascending=False, method="min")
        output_df["doubles_rank"] = output_df.groupby(["date"])["doubles_cum"].rank(ascending=False, method="min")
        output_df["triples_rank"] = output_df.groupby(["date"])["triples_cum"].rank(ascending=False, method="min")
        output_df["runsScored_rank"] = output_df.groupby(["date"])["runsScored_cum"].rank(ascending=False, method="min")
        output_df["totalBases_rank"] = output_df.groupby(["date"])["totalBases_cum"].rank(ascending=False, method="min")
        output_df["baseOnBalls_rank"] = output_df.groupby(["date"])["baseOnBalls_cum"].rank(ascending=False, method="min")
        output_df["hitByPitch_rank"] = output_df.groupby(["date"])["hitByPitch_cum"].rank(ascending=False, method="min")
        output_df["strikeOuts_rank"] = output_df.groupby(["date"])["strikeOuts_cum"].rank(ascending=False, method="min")
        output_df["stolenBases_rank"] = output_df.groupby(["date"])["stolenBases_cum"].rank(ascending=False, method="min")
        output_df["plateAppearances_rank"] = output_df.groupby(["date"])["plateAppearances_cum"].rank(ascending=False, method="min")
        output_df["atBats_rank"] = output_df.groupby(["date"])["atBats_cum"].rank(ascending=False, method="min")

        output_df["battingAverage_rank"] = output_df[output_df["gamesPlayedBatting"] == 1.0].groupby(["date"])["battingAverage"].rank(ascending=False, method="min")
        output_df["ops_rank"] = output_df.groupby(["date"])["ops"].rank(ascending=False, method="min")
        output_df["isop_rank"] = output_df.groupby(["date"])["isop"].rank(ascending=False, method="min")
        output_df["isod_rank"] = output_df.groupby(["date"])["isod"].rank(ascending=False, method="min")
        output_df["rc_rank"] = output_df.groupby(["date"])["rc"].rank(ascending=False, method="min")
        output_df["rc27_rank"] = output_df.groupby(["date"])["rc27"].rank(ascending=False, method="min")
        output_df["babip_rank"] = output_df.groupby(["date"])["babip"].rank(ascending=False, method="min")
        output_df["bb_k_rank"] = output_df.groupby(["date"])["bb_k"].rank(ascending=False, method="min")
        output_df["k%_rank"] = output_df.groupby(["date"])["k%"].rank(ascending=False, method="min")

        output_df["winsPitching_rank"] = output_df.groupby(["date"])["winsPitching_cum"].rank(ascending=False, method="min")
        output_df["lossesPitching_rank"] = output_df.groupby(["date"])["lossesPitching_cum"].rank(ascending=False, method="min")
        output_df["saves_rank"] = output_df.groupby(["date"])["saves_cum"].rank(ascending=False, method="min")
        output_df["holds_rank"] = output_df.groupby(["date"])["holds_cum"].rank(ascending=False, method="min")
        output_df["completeGamesPitching_rank"] = output_df.groupby(["date"])["completeGamesPitching_cum"].rank(ascending=False, method="min")
        output_df["shutoutsPitching_rank"] = output_df.groupby(["date"])["shutoutsPitching_cum"].rank(ascending=False, method="min")
        output_df["inningsPitched_rank"] = output_df.groupby(["date"])["inningsPitched_cum"].rank(ascending=False, method="min")
        output_df["runsPitching_rank"] = output_df.groupby(["date"])["runsPitching_cum"].rank(ascending=False, method="min")
        output_df["earnedRuns_rank"] = output_df.groupby(["date"])["earnedRuns_cum"].rank(ascending=False, method="min")

        output_df["era_rank"] = output_df.groupby(["date"])["era"].rank(ascending=False, method="min")
        output_df["whip_rank"] = output_df.groupby(["date"])["whip"].rank(ascending=False, method="min")
        
        return output_df.drop(columns=["date"], axis=1).add_prefix("PlayerStats_")

    def extract_score(self, x, y, z, dict):
        if y in dict[x][z].keys():
            return dict[x][z][y]
        else:
            return np.nan

    def extract_cumsum_score(self, x, y, z, dict):
        previous_gamedates = [i for i in dict[x][z].keys() if i < y]
        if y in dict[x][z].keys():
            return dict[x][z][y]
        elif len(previous_gamedates) != 0:
            return dict[x][z][max(previous_gamedates)]
        elif len(previous_gamedates) == 0:
            return np.nan

    def add_cumsum2dict(self, x, y, z, dict, value):
        value = value if value == value else 0
        previous_gamedates = [i for i in dict[x][z].keys() if i < y]
        if len(previous_gamedates) != 0:
            return value + dict[x][z][max(previous_gamedates)]
        else:
            return value

In [6]:
def create_train_feature(
    input_df: pd.DataFrame,
    target_df: pd.DataFrame,
    rosters_df: pd.DataFrame,
    playerBoxScores_df: pd.DataFrame,
    update_blocks: list,
    non_update_blocks: list,
    only_fit: bool
    ) -> pd.DataFrame:
    feat_df = pd.DataFrame()
    blocks = update_blocks + non_update_blocks

    for block in blocks:
        with timer(name=f"{str(block) + '_fit'}", logger=logger):
            try:
                out_feat_block = block.fit(
                    input_df,
                    target_df,
                    rosters_df,
                    playerBoxScores_df,
                    only_fit=False
                    )
            except Exception as e:
                print(f"Error on {block} fit.")
                raise e from e

            assert len(out_feat_block) == len(input_df), block

        feat_df = pd.concat([feat_df, out_feat_block], axis=1)

        del out_feat_block

    return feat_df


def fit_train_feature(
    input_df: pd.DataFrame,
    target_df: pd.DataFrame,
    rosters_df: pd.DataFrame,
    playerBoxScores_df: pd.DataFrame,
    update_blocks: list,
    non_update_blocks: list,
    only_fit: bool
    ) -> pd.DataFrame:
    blocks = update_blocks + non_update_blocks

    for block in blocks:
        with timer(name=f"{str(block) + '_fit'}", logger=logger):
            try:
                block.fit(
                    input_df,
                    target_df,
                    rosters_df,
                    playerBoxScores_df,
                    only_fit
                    )
            except Exception as e:
                print(f"Error on {block} fit.")
                raise e from e


def create_test_feature(
    input_df: pd.DataFrame,
    target_df: pd.DataFrame,
    rosters_df: pd.DataFrame,
    playerBoxScores_df: pd.DataFrame,
    update_blocks: list,
    non_update_blocks: list
    ) -> pd.DataFrame:
    is_update = True if len(update_blocks) != 0 else False
    update_feat_df = pd.DataFrame()
    non_update_feat_df = pd.DataFrame()
    
    if is_update:
        for block in update_blocks:
            with timer(name=f"{str(block) + '_update'}", logger=logger):
                try:
                    out_feat_block = block.fit(
                        input_df,
                        target_df,
                        rosters_df,
                        playerBoxScores_df,
                        only_fit=False,
                        )
                except Exception as e:
                    print(f"Error on {block} transform.")
                    raise e from e

                assert len(out_feat_block) == len(input_df), block

            update_feat_df = pd.concat([update_feat_df, out_feat_block], axis=1)

            del out_feat_block
        

    for block in non_update_blocks:
        with timer(name=f"{str(block) + '_transform'}", logger=logger):
            try:
                out_feat_block = block.transform(
                    input_df,
                    target_df,
                    rosters_df,
                    playerBoxScores_df,
                )
            except Exception as e:
                print(f"Error on {block} transform.")
                raise e from e

            assert len(out_feat_block) == len(input_df), block

        non_update_feat_df = pd.concat([non_update_feat_df, out_feat_block], axis=1)

        del out_feat_block

    feat_df = pd.concat([update_feat_df, non_update_feat_df], axis=1) if is_update else non_update_feat_df

    del update_feat_df, non_update_feat_df

    return feat_df

## CV

In [7]:
def get_timeseries_holdout(train_df: pd.DataFrame, valid_start_date: int) -> List[tuple]:
    fold = []
    train_idx = np.array(train_df[train_df["date"] < valid_start_date].index)
    valid_idx = np.array(train_df[(train_df["date"] >= valid_start_date) & (train_df["date"] < (valid_start_date + 100))].index)
    valid_preds_idx = np.array(train_df[(train_df["date"] >= valid_start_date) & (train_df["date"] < (valid_start_date + 100)) & (train_df["playerForTestSetAndFuturePreds"] == True)].index)
    fold.append((train_idx, valid_idx, valid_preds_idx))

    return fold

## Model

In [8]:
class LightGBMTrainer:
    def __init__(self, params: dict, seeds: List[int]):
        self.params = params
        self.seeds = seeds
        self.models = []

    def fit(self, X_train: np.ndarray, y_train: np.ndarray, cv: List[tuple]):
        oof_all = []
        for i, seed in enumerate(self.seeds):
            self.params["seed"] = seed

            for train_idx, valid_idx, valid_preds_idx in cv:
                X_train_fold = X_train[train_idx]
                X_valid_fold = X_train[valid_preds_idx]

                y_train_fold = y_train[train_idx]
                y_valid_fold = y_train[valid_preds_idx]

                train_set = lgb.Dataset(X_train_fold, y_train_fold)
                valid_set = lgb.Dataset(X_valid_fold, y_valid_fold, reference=train_set)

                model = lgb.train(
                    params=self.params,
                    train_set=train_set,
                    valid_sets=[train_set, valid_set],
                    valid_names=["train", "valid"],
                    verbose_eval=100,
                )

                y_oof = model.predict(X_valid_fold, num_iteration=model.best_iteration)
                oof_all.append(y_oof)
                self.models.append(model)

        oof_all = np.mean(oof_all, axis=0)
        oof_all = np.clip(oof_all, 0, 100)
        oof_score = mean_absolute_error(oof_all, y_valid_fold)

        return oof_score, self.models

    def predict(self, X_test: np.ndarray):
        y_pred = np.mean([model.predict(X_test, num_iteration=model.best_iteration) for model in self.models], axis=0)

        return y_pred


def run_lgb(X_train, targets, seeds, fold):
    cv_scores = []
    models = []

    for i, target in enumerate(targets):
        print(f"Training for Target{i+1}")
        if i == 0:
            params = CFG.LGB_TARGET1_PARAMS
        elif i == 1:
            params = CFG.LGB_TARGET2_PARAMS
        elif i == 2:
            params = CFG.LGB_TARGET3_PARAMS
        elif i == 3:
            params = CFG.LGB_TARGET4_PARAMS
        trainer = LightGBMTrainer(params, seeds)
        oof_score_tmp, models_tmp = trainer.fit(X_train, target, fold)
        cv_scores.append(oof_score_tmp)
        models.append(models_tmp)
        print(f"Local Target{i+1} OOF-MAE : {np.mean(oof_score_tmp)}")
        print("-"*50)

    print(f"Local MCMAE : {np.mean(cv_scores)}")

    return models

## Training

In [9]:
# # set-up logger
# logger = get_logger()

# # create feature
# update_blocks = [
#     PlayerStatsBlock(columns=[
#         "playerId", 
#         "date", 
#         "home", 
#         "positionCode", 
#         "battingOrder", 
#         "gamesPlayedBatting", 
#         "plateAppearances",
#         "atBats",
#         "hits",
#         "doubles",
#         "triples",
#         "homeRuns",
#         "rbi",
#         "runsScored",
#         "totalBases",
#         "baseOnBalls",
#         "hitByPitch",
#         "intentionalWalks",
#         "groundIntoDoublePlay",
#         "groundIntoTriplePlay",
#         "strikeOuts",
#         "sacBunts",
#         "sacFlies",
#         "caughtStealing",
#         "stolenBases",
#         "leftOnBase",
#         "catchersInterference",
#         "pickoffs",
#         "flyOuts",
#         "groundOuts",
#         "gamesPlayedPitching",
#         "gamesStartedPitching",
#         "winsPitching",
#         "lossesPitching",
#         "completeGamesPitching",
#         "shutoutsPitching",
#         "saves",
#         "saveOpportunities",
#         "blownSaves",
#         "holds",
#         "inningsPitched",
#         "runsPitching",
#         "earnedRuns",
#         "pitchesThrown",
#         "hitsPitching",
#         "homeRunsPitching",
#         "strikeOutsPitching",
#         "baseOnBallsPitching",
#         "hitByPitchPitching",
#         "intentionalWalksPitching",
#         "balks",
#         "wildPitches",
#         "atBatsPitching",
#         "battersFaced",
#         "sacBuntsPitching",
#         "sacFliesPitching",
#         "inheritedRunners",
#         "inheritedRunnersScored",
#         "rbiPitching",
#         "flyOutsPitching",
#         "airOutsPitching",
#         "doublesPitching",
#         "triplesPitching",
#         "caughtStealingPitching",
#         "stolenBasesPitching",
#         "outsPitching",
#         "balls",
#         "strikes",
#         "hitBatsmen",
#         "pickoffsPitching",
#         "catchersInterferencePitching",
#         "assists",
#         "putOuts",
#         "errors",
#         "chances",
#         "gamesFinishedPitching",
#         "groundOutsPitching",
#     ]),
# ]
# non_update_blocks = [
#     TargetAggregateBlock(periods=[
#         # 20210201,
#         # 20210301,
#         20210401,
#     ]),
#     RostersLabelEncodingBlock(columns=[
#         "teamId",
#         "status"
#     ]),
#     PlayersLabelEncodingBlock(columns=[
#         "birthCountry",
#         # "birthCity",
#         # "birthStateProvince",
#         # "heightInches",
#         # "weight",
#         "primaryPositionCode",
#         # "playerAge"
#     ]),
# ]

# # create features
# input_df = mlb_train_ds.train_next[CFG.OBJECT_ID].copy()
# input_df = input_df.merge(mlb_train_ds.players[["playerId", "playerForTestSetAndFuturePreds"]], on="playerId", how="left")
# target_df = mlb_train_ds.train_next
# rosters_df = mlb_train_ds.train_rosters
# playerBoxScores_df = mlb_train_ds.train_scores

# # for inference
# fit_train_feature(
#     input_df,
#     target_df,
#     rosters_df,
#     playerBoxScores_df,
#     update_blocks,
#     non_update_blocks,
#     only_fit=True,
# )

In [10]:
# set-up logger
logger = get_logger()

# create feature
update_blocks = [
    PlayerStatsBlock(columns=[
        "playerId", 
        "date", 
        "home", 
        "positionCode", 
        "battingOrder", 
        "gamesPlayedBatting", 
        "plateAppearances",
        "atBats",
        "hits",
        "doubles",
        "triples",
        "homeRuns",
        "rbi",
        "runsScored",
        "totalBases",
        "baseOnBalls",
        "hitByPitch",
        "intentionalWalks",
        "groundIntoDoublePlay",
        "groundIntoTriplePlay",
        "strikeOuts",
        "sacBunts",
        "sacFlies",
        "caughtStealing",
        "stolenBases",
        "leftOnBase",
        "catchersInterference",
        "pickoffs",
        "flyOuts",
        "groundOuts",
        "gamesPlayedPitching",
        "gamesStartedPitching",
        "winsPitching",
        "lossesPitching",
        "completeGamesPitching",
        "shutoutsPitching",
        "saves",
        "saveOpportunities",
        "blownSaves",
        "holds",
        "inningsPitched",
        "runsPitching",
        "earnedRuns",
        "pitchesThrown",
        "hitsPitching",
        "homeRunsPitching",
        "strikeOutsPitching",
        "baseOnBallsPitching",
        "hitByPitchPitching",
        "intentionalWalksPitching",
        "balks",
        "wildPitches",
        "atBatsPitching",
        "battersFaced",
        "sacBuntsPitching",
        "sacFliesPitching",
        "inheritedRunners",
        "inheritedRunnersScored",
        "rbiPitching",
        "flyOutsPitching",
        "airOutsPitching",
        "doublesPitching",
        "triplesPitching",
        "caughtStealingPitching",
        "stolenBasesPitching",
        "outsPitching",
        "balls",
        "strikes",
        "hitBatsmen",
        "pickoffsPitching",
        "catchersInterferencePitching",
        "assists",
        "putOuts",
        "errors",
        "chances",
        "gamesFinishedPitching",
        "groundOutsPitching",
    ]),
]
non_update_blocks = [
    TargetAggregateBlock(periods=[
        # 20210201,
        # 20210301,
        20210401,
    ]),
    RostersLabelEncodingBlock(columns=[
        "teamId",
        "status"
    ]),
    PlayersLabelEncodingBlock(columns=[
        "birthCountry",
        # "birthCity",
        # "birthStateProvince",
        # "heightInches",
        # "weight",
        "primaryPositionCode",
        # "playerAge"
    ]),
]

# create features
input_df = mlb_train_ds.train_next[CFG.OBJECT_ID].copy()
input_df = input_df.merge(mlb_train_ds.players[["playerId", "playerForTestSetAndFuturePreds"]], on="playerId", how="left")
target_df = mlb_train_ds.train_next
rosters_df = mlb_train_ds.train_rosters
playerBoxScores_df = mlb_train_ds.train_scores

if DEBUG:
    input_df = input_df[input_df["date"] >= 20210101].reset_index(drop=True)
    target_df = target_df[target_df["date"] >= 20210101].reset_index(drop=True)
    rosters_df = rosters_df[rosters_df["date"] >= 20210101].reset_index(drop=True)
    playerBoxScores_df = playerBoxScores_df[playerBoxScores_df["date"] >= 20210101].reset_index(drop=True)

train_feat_df = create_train_feature(
    input_df,
    target_df,
    rosters_df,
    playerBoxScores_df,
    update_blocks,
    non_update_blocks,
    only_fit=False,
)

X_train = train_feat_df.values
targets = [target_df[col].values for col in CFG.TARGETS]

if DEBUG:
    targets = [target_df[col].values for col in CFG.TARGETS]

# split train/valid
fold = get_timeseries_holdout(input_df, valid_start_date=20210501)

# training
models = run_lgb(X_train, targets, CFG.SEEDS, fold)

[2021-07-29 02:21:57,165] [INFO] [logger set up]
[2021-07-29 02:21:57,837] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_fit> start]


  0%|          | 0/39714 [00:00<?, ?it/s]

  0%|          | 0/408078 [00:00<?, ?it/s]

[2021-07-29 02:24:31,800] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_fit> done in 153.96 s]
[2021-07-29 02:24:31,894] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_fit> start]


  x = np.asarray((x - loc)/scale, dtype=dtyp)


[2021-07-29 02:25:14,209] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_fit> done in 42.31 s]
[2021-07-29 02:25:14,307] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_fit> start]
[2021-07-29 02:25:14,485] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_fit> done in 0.18 s]
[2021-07-29 02:25:15,067] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_fit> start]
[2021-07-29 02:25:15,118] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_fit> done in 0.05 s]
Training for Target1




Training until validation scores don't improve for 100 rounds
[100]	train's l1: 0.617397	valid's l1: 1.13188
Early stopping, best iteration is:
[60]	train's l1: 0.617533	valid's l1: 1.13165
Local Target1 OOF-MAE : 1.1316521086914029
--------------------------------------------------
Training for Target2




Training until validation scores don't improve for 100 rounds
[100]	train's l1: 1.66717	valid's l1: 2.12572
[200]	train's l1: 1.64799	valid's l1: 2.11302
[300]	train's l1: 1.63766	valid's l1: 2.10498
[400]	train's l1: 1.63187	valid's l1: 2.10463
Early stopping, best iteration is:
[358]	train's l1: 1.63347	valid's l1: 2.10119
Local Target2 OOF-MAE : 2.1007766975635582
--------------------------------------------------
Training for Target3




Training until validation scores don't improve for 100 rounds
[100]	train's l1: 0.670249	valid's l1: 0.869504
[200]	train's l1: 0.67	valid's l1: 0.868907
[300]	train's l1: 0.669993	valid's l1: 0.86891
Early stopping, best iteration is:
[257]	train's l1: 0.669999	valid's l1: 0.868907
Local Target3 OOF-MAE : 0.8688997227866097
--------------------------------------------------
Training for Target4




Training until validation scores don't improve for 100 rounds
[100]	train's l1: 0.956147	valid's l1: 1.35483
Early stopping, best iteration is:
[12]	train's l1: 1.04765	valid's l1: 1.24074
Local Target4 OOF-MAE : 1.2407423322569975
--------------------------------------------------
Local MCMAE : 1.335517715324642


In [11]:
def visualize_feature_importance(models, feat_train_df) -> plotly.graph_objects.Figure:
    '''LightGBMのfeature importanceを可視化
    '''
    for target_i in range(len(models)):
        feature_importance_df = pd.DataFrame()
        for i, model in enumerate(models[target_i]):
            _df = pd.DataFrame()
            _df['feature_importance'] = model.feature_importance(importance_type="gain")
            _df['feature'] = feat_train_df.columns
            _df['model_no'] = i + 1
            feature_importance_df = pd.concat([feature_importance_df, _df], 
                                            axis=0, ignore_index=True)

        order = feature_importance_df.groupby('feature')\
            .mean()[['feature_importance']]\
            .sort_values('feature_importance', ascending=False).index[:50]
        
        fig = px.box(
            feature_importance_df.query("feature in @order"),
            x="feature_importance",
            y="feature",
            category_orders={"feature": order},
            width=1250,
            height=900,
            title=f"Target{target_i+1} Top 50 feature importance",
        )
        fig.update_yaxes(showgrid=True)
        fig.show()

In [12]:
visualize_feature_importance(models, train_feat_df)

In [13]:
# for target, model_tmps in zip(CFG.TARGETS, models):
#     for i, model_tmp in enumerate(model_tmps):
#         model_tmp.save_model(f"{CFG.MODEL_PATH}/lgb_{target}_{i}.txt")

## Inference

In [15]:
def update_dataset(input_df: pd.DataFrame, sample_prediction_df: pd.DataFrame):
    '''datasetの更新
    '''
    # columnがNaNでなければ分岐処理が実行される(pd.isnaとかでも代用可)
    # NaN == NaNはFalseになる
    ####################
    # rosters #
    ####################
    if (input_df["rosters"].iloc[0] == input_df["rosters"].iloc[0]) & (input_df.iloc[0]["date"] > 20210430):
        test_rosters_df = pd.read_json(input_df["rosters"].iloc[0])
    else:
        test_rosters_df = sample_prediction_df[["playerId"]].copy()
        for col in mlb_train_ds.train_rosters.columns:
            if col == "playerId": continue
            test_rosters_df[col] = np.nan

    test_rosters_df["date"] = input_df.iloc[0]["date"]
    ####################
    # playerBoxScores #
    ####################
    if (input_df["playerBoxScores"].iloc[0] == input_df["playerBoxScores"].iloc[0]) & (input_df.iloc[0]["date"] > 20210430):
        test_playerBoxScores_df = pd.read_json(input_df["playerBoxScores"].iloc[0])
        test_playerBoxScores_df["date"] = input_df.iloc[0]["date"]
    else:
        test_playerBoxScores_df = None
    # ####################
    # # teamBoxScores #
    # ####################
    # if input_df["teamBoxScores"].iloc[0] == input_df["teamBoxScores"].iloc[0]:
    #     test_teamBoxScores = pd.read_json(input_df["teamBoxScores"].iloc[0])
    # else:
    #     cols = mlb_train_ds.train_tbs.drop(columns="date", axis=1).columns.tolist()
    #     test_teamBoxScores = pd.DataFrame(columns=cols)
    #     test_teamBoxScores["teamId"] = mlb_train_ds.train_tbs.teamId.unique()

    # test_teamBoxScores["date"] = input_df.iloc[0]["date"]
    # if input_df.iloc[0]["date"] > 20210430:
    #     mlb_train_ds.train_tbs = reduce_mem_usage(pd.concat([mlb_train_ds.train_tbs, test_teamBoxScores], axis=0, ignore_index=True))
    # ####################
    # # games #
    # ####################
    # if input_df["games"].iloc[0] == input_df["games"].iloc[0]:
    #     test_games = pd.read_json(input_df["games"].iloc[0])
    # else:
    #     cols = mlb_train_ds.train_games.drop(columns="date", axis=1).columns.tolist()
    #     test_games = pd.DataFrame(columns=cols)

    # test_games["date"] = input_df.iloc[0]["date"]
    # if input_df.iloc[0]["date"] > 20210430:
    #     mlb_train_ds.train_games = reduce_mem_usage(pd.concat([mlb_train_ds.train_games, test_games], axis=0, ignore_index=True))
    # ####################
    # # staindings #
    # ####################
    # if input_df["standings"].iloc[0] == input_df["standings"].iloc[0]:
    #     test_standings = pd.read_json(input_df["standings"].iloc[0])
    # else:
    #     cols = mlb_train_ds.train_standings.drop(columns="date", axis=1).columns.tolist()
    #     test_standings = pd.DataFrame(columns=cols)
    
    # test_standings["date"] = input_df.iloc[0]["date"]
    # if input_df.iloc[0]["date"] > 20210430:
    #     mlb_train_ds.train_standings = reduce_mem_usage(pd.concat([mlb_train_ds.train_standings, test_standings], axis=0, ignore_index=True))
    # ####################
    # # awards #
    # ####################
    # if input_df["awards"].iloc[0] == input_df["awards"].iloc[0]:
    #     test_awards = pd.read_json(input_df["awards"].iloc[0])
    # else:
    #     cols = mlb_train_ds.train_awards.drop(columns="date", axis=1).columns.tolist()
    #     test_awards = pd.DataFrame(columns=cols)
    
    # test_awards["date"] = input_df.iloc[0]["date"]
    # if input_df.iloc[0]["date"] > 20210430:
    #     mlb_train_ds.train_awards = reduce_mem_usage(pd.concat([mlb_train_ds.train_awards, test_awards], axis=0, ignore_index=True))

    return test_rosters_df, test_playerBoxScores_df

In [None]:
# models = []
# for target in CFG.TARGETS:
#     tmp = [lgb.Booster(model_file=f"{CFG.MODEL_PATH}/lgb_{target}_{i}.txt") for i in range(len(CFG.SEEDS))]
#     models.append(tmp)

In [None]:
# env = mlb.make_env()
# iter_test = env.iter_test()

# for (test_df, sample_prediction_df) in iter_test:
#     sample_prediction_df = sample_prediction_df.reset_index(drop=True)
#     # create dataset
#     test_df = test_df.reset_index()
#     test_df = test_df.rename(columns={"index": "date"})
#     sample_prediction_df["date"] = test_df.iloc[0]["date"]
#     sample_prediction_df["playerId"] = sample_prediction_df["date_playerId"].map(lambda x: int(x.split("_")[1]))
#     test_rosters_df, test_playerBoxScores_df = update_dataset(test_df, sample_prediction_df)
#     # create features
#     test_feat_df = create_test_feature(
#         sample_prediction_df,
#         target_df,
#         test_rosters_df,
#         test_playerBoxScores_df,
#         update_blocks, 
#         non_update_blocks)
#     X_test = test_feat_df.values
#     # prediction
#     for target, model in zip(CFG.TARGETS, models):
#         pred = np.mean([model_.predict(X_test, num_iteration=model_.best_iteration) for model_ in model], axis=0)
#         sample_prediction_df[target] = np.clip(pred, 0, 100)

#     sample_prediction_df = sample_prediction_df.drop(columns=["playerId", "date"], axis=1)

#     env.predict(sample_prediction_df)
#     del test_feat_df, test_df, sample_prediction_df

In [None]:
# Local inference check
# test_df = mlb_train_ds.example_test.query("date <= 20210426").copy()
# sample_prediction_df = mlb_train_ds.sample_submission.query("date <= 20210426").copy()
# sample_prediction_df["playerId"] = sample_prediction_df["date_playerId"].map(lambda x: int(x.split("_")[1]))

In [16]:
class Environment:
    def __init__(self,
                 data_dir: str,
                 eval_start_day: int,
                 eval_end_day: Optional[int],
                 use_updated: bool,
                 multiple_days_per_iter: bool):
        warnings.warn('this is mock module for mlb')

        postfix = '_updated' if use_updated else ''
        
        # recommend to replace this with pickle, feather etc to speedup preparing data
        df_train = pd.read_csv(os.path.join(data_dir, f'train{postfix}.csv'))

        players = pd.read_csv(os.path.join(data_dir, 'players.csv'))

        self.players = players[players['playerForTestSetAndFuturePreds'] == True]['playerId'].astype(str)
        if eval_end_day is not None:
            self.df_train = df_train.set_index('date').loc[eval_start_day:eval_end_day]
        else:
            self.df_train = df_train.set_index('date').loc[eval_start_day:]
        self.date = self.df_train.index.values
        self.n_rows = len(self.df_train)
        self.multiple_days_per_iter = multiple_days_per_iter

        assert self.n_rows > 0, 'no data to emulate'

        self.prediction_df = pd.DataFrame()

    def predict(self, df: pd.DataFrame) -> None:
        # if you want to emulate public LB, store your prediction here and calculate MAE
        self.prediction_df = pd.concat([self.prediction_df, df], axis=0).reset_index(drop=True)

    def iter_test(self) -> Tuple[pd.DataFrame, pd.DataFrame]:
        if self.multiple_days_per_iter:
            for i in range(self.n_rows // 2):
                date1 = self.date[2 * i]
                date2 = self.date[2 * i + 1]
                sample_sub1 = self._make_sample_sub(date1)
                sample_sub2 = self._make_sample_sub(date2)
                sample_sub = pd.concat([sample_sub1, sample_sub2]).reset_index(drop=True)
                df = self.df_train.loc[date1:date2]

                yield df, sample_sub.set_index('date')
        else:
            for i in range(self.n_rows):
                date = self.date[i]
                sample_sub = self._make_sample_sub(date)
                df = self.df_train.loc[date:date]

                yield df, sample_sub.set_index('date')

    def _make_sample_sub(self, date: int) -> pd.DataFrame:
        next_day = (pd.to_datetime(date, format='%Y%m%d') + pd.to_timedelta(1, 'd')).strftime('%Y%m%d')
        sample_sub = pd.DataFrame()
        sample_sub['date_playerId'] = next_day + '_' + self.players
        sample_sub['target1'] = 0
        sample_sub['target2'] = 0
        sample_sub['target3'] = 0
        sample_sub['target4'] = 0
        sample_sub['date'] = date
        return sample_sub


class MLBEmulator:
    def __init__(self,
                 data_dir: str = '../input/mlb-player-digital-engagement-forecasting',
                 eval_start_day: int = 20210401,
                 eval_end_day: Optional[int] = 20210430,
                 use_updated: bool = True,
                 multiple_days_per_iter: bool = False):
        self.data_dir = data_dir
        self.eval_start_day = eval_start_day
        self.eval_end_day = eval_end_day
        self.use_updated = use_updated
        self.multiple_days_per_iter = multiple_days_per_iter

    def make_env(self) -> Environment:
        return Environment(self.data_dir,
                           self.eval_start_day,
                           self.eval_end_day,
                           self.use_updated,
                           self.multiple_days_per_iter)


emulation_mode = True

if emulation_mode:
    mlb = MLBEmulator(eval_start_day=20210501, eval_end_day=20210531)
else:
    import mlb

env = mlb.make_env()
iter_test = env.iter_test()


for (test_df, sample_prediction_df) in iter_test:
    sample_prediction_df = sample_prediction_df.reset_index(drop=True)
    # # create dataset
    test_df = test_df.reset_index()
    test_df = test_df.rename(columns={"index": "date"})
    sample_prediction_df["date"] = test_df.iloc[0]["date"]
    sample_prediction_df["playerId"] = sample_prediction_df["date_playerId"].map(lambda x: int(x.split("_")[1]))
    test_rosters_df, test_playerBoxScores_df = update_dataset(test_df, sample_prediction_df)
    # # create features
    test_feat_df = create_test_feature(
        sample_prediction_df,
        target_df,
        test_rosters_df,
        test_playerBoxScores_df,
        update_blocks, 
        non_update_blocks)
    X_test = test_feat_df.values
    # prediction
    for target, model in zip(CFG.TARGETS, models):
        pred = np.mean([model_.predict(X_test, num_iteration=model_.best_iteration) for model_ in model], axis=0)
        sample_prediction_df[target] = np.clip(pred, 0, 100)

    sample_prediction_df = sample_prediction_df.drop(columns=["playerId", "date"], axis=1)

    env.predict(sample_prediction_df)


this is mock module for mlb



[2021-07-29 02:27:55,669] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/463 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:27:56,860] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 1.19 s]
[2021-07-29 02:27:56,862] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:27:56,870] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:27:56,872] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:27:56,881] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:27:56,884] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:27:56,889] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:27:56,977] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/438 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:27:57,873] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.90 s]
[2021-07-29 02:27:57,874] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:27:57,882] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:27:57,884] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:27:57,891] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:27:57,894] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:27:57,898] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:27:58,293] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/228 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:27:59,038] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.74 s]
[2021-07-29 02:27:59,039] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:27:59,045] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:27:59,047] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:27:59,054] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:27:59,058] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:27:59,062] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:27:59,155] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/478 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:00,068] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.91 s]
[2021-07-29 02:28:00,069] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:00,077] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:00,079] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:00,088] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:00,090] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:00,095] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:00,191] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/470 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:01,110] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.92 s]
[2021-07-29 02:28:01,112] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:01,119] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:01,121] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:01,129] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:01,133] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:01,138] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:01,224] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/286 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:02,069] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.84 s]
[2021-07-29 02:28:02,071] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:02,079] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:02,081] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:02,089] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:02,092] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:02,097] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:02,191] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/422 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:03,087] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.90 s]
[2021-07-29 02:28:03,088] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:03,096] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:03,098] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:03,108] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:03,111] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:03,115] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:03,205] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/462 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:04,124] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.92 s]
[2021-07-29 02:28:04,125] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:04,134] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:04,136] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:04,145] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:04,148] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:04,152] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:04,243] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/376 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:05,121] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.88 s]
[2021-07-29 02:28:05,122] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:05,130] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:05,132] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:05,141] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:05,144] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:05,148] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:05,222] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/141 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:06,008] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.79 s]
[2021-07-29 02:28:06,009] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:06,017] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:06,020] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:06,028] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:06,031] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:06,036] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:06,123] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/447 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:07,058] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.93 s]
[2021-07-29 02:28:07,060] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:07,068] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:07,070] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:07,078] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:07,081] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:07,085] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:07,170] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/435 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:08,073] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.90 s]
[2021-07-29 02:28:08,074] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:08,081] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:08,083] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:08,090] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:08,094] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:08,098] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:08,185] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/346 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:09,057] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.87 s]
[2021-07-29 02:28:09,058] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:09,066] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:09,068] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:09,076] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:09,079] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:09,083] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:09,168] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/460 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:10,082] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.91 s]
[2021-07-29 02:28:10,084] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:10,092] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:10,094] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:10,102] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:10,105] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:10,110] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:10,194] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/449 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:11,109] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.91 s]
[2021-07-29 02:28:11,110] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:11,118] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:11,120] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:11,127] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:11,131] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:11,135] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:11,225] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/445 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:12,146] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.92 s]
[2021-07-29 02:28:12,148] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:12,156] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:12,159] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:12,168] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:12,171] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:12,175] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:12,257] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/257 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:13,100] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.84 s]
[2021-07-29 02:28:13,102] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:13,109] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:13,111] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:13,119] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:13,122] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:13,126] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:13,219] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/431 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:14,315] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 1.10 s]
[2021-07-29 02:28:14,317] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:14,325] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:14,327] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:14,336] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:14,339] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:14,344] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:14,428] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/430 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:15,387] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.96 s]
[2021-07-29 02:28:15,389] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:15,397] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:15,399] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:15,407] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:15,410] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:15,414] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:15,498] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/318 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:16,393] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.90 s]
[2021-07-29 02:28:16,395] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:16,402] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:16,404] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:16,412] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:16,415] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:16,420] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:16,515] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/451 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:17,446] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.93 s]
[2021-07-29 02:28:17,448] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:17,456] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:17,458] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:17,467] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:17,470] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:17,475] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:17,555] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/438 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:18,478] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.92 s]
[2021-07-29 02:28:18,479] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:18,487] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:18,489] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:18,497] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:18,500] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:18,504] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:18,588] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/447 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:19,520] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.93 s]
[2021-07-29 02:28:19,521] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:19,529] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:19,532] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:19,540] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:19,543] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:19,548] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:19,621] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/228 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:20,464] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.84 s]
[2021-07-29 02:28:20,465] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:20,473] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:20,475] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:20,483] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:20,486] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:20,491] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:20,579] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/414 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:21,485] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.91 s]
[2021-07-29 02:28:21,487] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:21,494] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:21,496] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:21,504] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:21,507] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:21,511] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:21,596] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/377 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:22,510] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.91 s]
[2021-07-29 02:28:22,511] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:22,519] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:22,521] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:22,530] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:22,533] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:22,538] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:22,623] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/416 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:23,547] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.92 s]
[2021-07-29 02:28:23,548] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:23,555] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:23,558] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:23,566] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:23,569] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:23,573] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:23,649] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/286 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:24,535] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.89 s]
[2021-07-29 02:28:24,537] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:24,544] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:24,546] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:24,555] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:24,559] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:24,563] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:24,659] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/484 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:25,631] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.97 s]
[2021-07-29 02:28:25,633] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:25,640] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:25,642] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:25,649] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:25,653] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:25,658] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.01 s]
[2021-07-29 02:28:25,740] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/386 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:26,678] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.94 s]
[2021-07-29 02:28:26,680] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:26,688] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:26,690] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:26,699] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:26,702] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:26,707] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.00 s]
[2021-07-29 02:28:26,795] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> start]


  0%|          | 0/405 [00:00<?, ?it/s]

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-07-29 02:28:27,717] [INFO] [<<__main__.PlayerStatsBlock object at 0x141a1b3a0>_update> done in 0.92 s]
[2021-07-29 02:28:27,719] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> start]
[2021-07-29 02:28:27,727] [INFO] [<<__main__.TargetAggregateBlock object at 0x141a1bbe0>_transform> done in 0.01 s]
[2021-07-29 02:28:27,729] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> start]
[2021-07-29 02:28:27,737] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x141a1b790>_transform> done in 0.01 s]
[2021-07-29 02:28:27,740] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> start]
[2021-07-29 02:28:27,746] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x141a1be20>_transform> done in 0.01 s]


In [17]:
prediction_df = env.prediction_df.copy()
prediction_df.columns = ["date_playerId", "target1_pred", "target2_pred", "target3_pred", "target4_pred"]
prediction_df["playerId"] = prediction_df["date_playerId"].map(lambda x: int(x.split("_")[1]))
prediction_df["engagementMetricsDate"] = prediction_df["date_playerId"].map(lambda x: x.split("_")[0])
prediction_df["engagementMetricsDate"] = prediction_df["engagementMetricsDate"].map(lambda x: x[:4] + "-" + x[4:6] + "-" + x[6:])
prediction_df = prediction_df.merge(mlb_train_ds.train_next.drop(columns="date"), on=["playerId", "engagementMetricsDate"], how="left")
prediction_df

Unnamed: 0,date_playerId,target1_pred,target2_pred,target3_pred,target4_pred,playerId,engagementMetricsDate,target1,target2,target3,target4
0,20210502_593590,0.000002,0.062290,2.883843e-15,0.105623,593590,2021-05-02,0.000000,0.011553,0.000000,0.000000
1,20210502_661269,0.000099,0.277554,5.673574e-03,0.193293,661269,2021-05-02,0.000136,0.107825,0.001989,0.085262
2,20210502_669212,0.000002,0.231441,2.883843e-15,0.119204,669212,2021-05-02,0.000000,0.057763,0.000000,0.012180
3,20210502_666201,0.007443,0.268310,1.604534e-02,0.367268,666201,2021-05-02,0.107086,0.061614,0.025851,0.816078
4,20210502_680911,0.000346,0.301540,1.554175e-03,0.132942,680911,2021-05-02,0.000272,0.042360,0.000000,0.085262
...,...,...,...,...,...,...,...,...,...,...,...
36792,20210601_667674,0.000002,0.051142,2.883843e-15,0.187348,667674,2021-06-01,0.000000,0.074042,0.000000,0.065641
36793,20210601_672695,0.007901,0.047099,1.387167e-01,0.166795,672695,2021-06-01,0.000284,0.058617,0.119955,0.103151
36794,20210601_676103,0.000002,0.334118,5.880379e-03,0.527947,676103,2021-06-01,0.000000,0.055532,0.003272,0.853338
36795,20210601_676755,0.000002,0.133208,2.883843e-15,0.182330,676755,2021-06-01,0.000000,0.058617,0.000000,0.121905


In [18]:
score = []
for i in CFG.TARGETS:
    pred = prediction_df[f"{i}_pred"]
    true = prediction_df[i]
    mae = mean_absolute_error(pred, true)
    print(f"{i} mae : {mae}")
    score.append(mae)

print(f"MCMAE:{np.mean(score)}")

target1 mae : 1.1404860437551771
target2 mae : 2.302236116276499
target3 mae : 0.8757826346202646
target4 mae : 1.237248090953393
MCMAE:1.3889382214013333
