# Overview
- LightGBM

In [1]:
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.metrics import mean_absolute_error
from datetime import timedelta
from tqdm.notebook import tqdm
import lightgbm as lgb
from typing import List, Union, Optional
import time
from contextlib import contextmanager
import sys
import logging
import category_encoders as ce
import plotly
from plotly import express as px
from scipy.stats import norm
# from optuna.integration import lightgbm as lgb
# import mlb
import os
import warnings
from typing import Optional, Tuple
from tqdm.notebook import tqdm
from collections import defaultdict

pd.options.display.max_columns = 200

DEBUG = True

## Config

In [2]:
class CFG:
    ######################
    # global #
    ######################
    INPUT_DIR = "../input/mlb-player-digital-engagement-forecasting"
    # INPUT_DIR = "../input/mlb-unnested-dataset-v2" # for kaggle kernel
    OBJECT_ID = ["playerId", "date"]
    TARGETS = ["target1", "target2", "target3", "target4"]
    ######################
    # model #
    ######################
    # target1
    LGB_TARGET1_PARAMS = {
        "objective": "mae",
        "metric": "l1",
        "boosting_type": "gbdt",
        "learning_rate": 0.1,
        "max_depth": 6,
        "num_leaves": 64,
        "lambda_l1": 0.17484677266606874,
        "lambda_l2": 0.6844652737530655,
        "bagging_fraction": 0.42972410695670027,
        "bagging_freq": 4,
        "feature_fraction": 0.9799999999999999,
        "min_data_in_leaf": 20,
        "num_threads": 8,
        "verbosity": -1,
        "num_iterations": 10000,
        "early_stopping_round": 100,
    }
    # target2
    LGB_TARGET2_PARAMS = {
        "objective": "mae",
        "metric": "l1",
        "boosting_type": "gbdt",
        "learning_rate": 0.1,
        "max_depth": 6,
        "num_leaves": 64,
        "lambda_l1": 6.622946832036268e-08,
        "lambda_l2": 3.432487952682862,
        "bagging_fraction": 0.623104049167172,
        "bagging_freq": 4,
        "feature_fraction": 1.0,
        "min_data_in_leaf": 20,
        "num_threads": 8,
        "verbosity": -1,
        "num_iterations": 10000,
        "early_stopping_round": 100,
    }
    # target3
    LGB_TARGET3_PARAMS = {
        "objective": "mae",
        "metric": "l1",
        "boosting_type": "gbdt",
        "learning_rate": 0.1,
        "max_depth": 6,
        "num_leaves": 57,
        "lambda_l1": 0.0011093571794053496,
        "lambda_l2": 2.1716397278227532e-07,
        "bagging_fraction": 1.0,
        "bagging_freq": 0,
        "feature_fraction": 1.0,
        "min_data_in_leaf": 100,
        "num_threads": 8,
        "verbosity": -1,
        "num_iterations": 10000,
        "early_stopping_round": 100,
    }
    # target4
    LGB_TARGET4_PARAMS = {
        "objective": "mae",
        "metric": "l1",
        "boosting_type": "gbdt",
        "learning_rate": 0.1,
        "max_depth": 6,
        "num_leaves": 60,
        "lambda_l1": 0.0,
        "lambda_l2": 0.0,
        "bagging_fraction": 0.40678529508780487,
        "bagging_freq": 5,
        "feature_fraction": 0.44800000000000006,
        "min_data_in_leaf": 20,
        "num_threads": 8,
        "verbosity": -1,
        "num_iterations": 10000,
        "early_stopping_round": 100,
    }
    SEEDS = [2434, 98]
    MODEL_PATH = "../output/nb026"
    # MODEL_PATH = "../input/mlb-nb026-lgb-weights" # for kaggle kernel

In [3]:
# class CFG:
#     ######################
#     # global #
#     ######################
#     INPUT_DIR = "../input/mlb-player-digital-engagement-forecasting"
#     # INPUT_DIR = "../input/mlb-unnested-dataset-v2" # for kaggle kernel
#     OBJECT_ID = ["playerId", "date"]
#     TARGETS = ["target1", "target2", "target3", "target4"]
#     ######################
#     # model #
#     ######################
#     # target1
#     LGB_TARGET1_PARAMS = {
#         "objective": "mae",
#         "metric": "l1",
#         "boosting_type": "gbdt",
#         "learning_rate": 0.1,
#         "max_depth": 6,
#         "num_leaves": 54,
#         "lambda_l1": 1.8294424356946235e-07,
#         "lambda_l2": 8.269494679852943e-05,
#         "bagging_fraction": 0.9311050425278897,
#         "bagging_freq": 2,
#         "feature_fraction": 0.9840000000000001,
#         "min_data_in_leaf": 20,
#         "num_threads": 8,
#         "verbosity": -1,
#         "num_iterations": 10000,
#         "early_stopping_round": 100,
#     }
#     # target2
#     LGB_TARGET2_PARAMS = {
#         "objective": "mae",
#         "metric": "l1",
#         "boosting_type": "gbdt",
#         "learning_rate": 0.1,
#         "max_depth": 6,
#         "num_leaves": 14,
#         "lambda_l1": 4.379035512071324e-06,
#         "lambda_l2": 5.940520594400555e-05,
#         "bagging_fraction": 1.0,
#         "bagging_freq": 0,
#         "feature_fraction": 0.5,
#         "min_data_in_leaf": 20,
#         "num_threads": 8,
#         "verbosity": -1,
#         "num_iterations": 10000,
#         "early_stopping_round": 100,
#     }
#     # target3
#     LGB_TARGET3_PARAMS = {
#         "objective": "mae",
#         "metric": "l1",
#         "boosting_type": "gbdt",
#         "learning_rate": 0.1,
#         "max_depth": 6,
#         "num_leaves": 64,
#         "lambda_l1": 0.011234793539671765,
#         "lambda_l2": 4.766835836661758e-06,
#         "bagging_fraction": 0.8841726515054378,
#         "bagging_freq": 3,
#         "feature_fraction": 1.0,
#         "min_data_in_leaf": 100,
#         "num_threads": 8,
#         "verbosity": -1,
#         "num_iterations": 10000,
#         "early_stopping_round": 100,
#     }
#     # target4
#     LGB_TARGET4_PARAMS = {
#         "objective": "mae",
#         "metric": "l1",
#         "boosting_type": "gbdt",
#         "learning_rate": 0.1,
#         "max_depth": 6,
#         "num_leaves": 51,
#         "lambda_l1": 5.506356543008336,
#         "lambda_l2": 9.683199146595637,
#         "bagging_fraction": 0.7376768451562011,
#         "bagging_freq": 2,
#         "feature_fraction": 0.5,
#         "min_data_in_leaf": 20,
#         "num_threads": 8,
#         "verbosity": -1,
#         "num_iterations": 10000,
#         "early_stopping_round": 100,
#     }
#     SEEDS = [2434, 98]
#     MODEL_PATH = "../output/nb025"
#     # MODEL_PATH = "../input/mlb-nb025-lgb-weights" # for kaggle kernel

## Utils

In [4]:
def get_logger(out_file=None):
    logger = logging.getLogger()  # loggerの呼び出し
    formatter = logging.Formatter("[%(asctime)s] [%(levelname)s] [%(message)s]")  # ログ出力の際のフォーマットを定義
    logger.handlers = []  # ハンドラーを追加するためのリスト
    logger.setLevel(logging.INFO)  # ロギングのレベルを設定, 'INFO' : 想定された通りのことが起こったことの確認

    handler = logging.StreamHandler(sys.stdout)  # StreamHandler(コンソールに出力するハンドラ)を追加
    handler.setFormatter(formatter)
    handler.setLevel(logging.INFO)
    logger.addHandler(handler)

    # ログをファイルとして出力する際のハンドラ(FileHandler)
    if out_file is not None:
        fh = logging.FileHandler(out_file)
        fh.setFormatter(formatter)
        fh.setLevel(logging.INFO)
        logger.addHandler(fh)

    logger.info("logger set up")  # "logger set up"を表示
    return logger


@contextmanager
def timer(name: str, logger: Optional[logging.Logger] = None):
    t0 = time.time()
    msg = f"<{name}> start"
    if logger is None:
        print(msg)
    else:
        logger.info(msg)
    yield

    msg = f"<{name}> done in {time.time() - t0:.2f} s"
    if logger is None:
        print(msg)
    else:
        logger.info(msg)


def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int64)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float32)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float64)
                else:
                    df[col] = df[col].astype(np.float64)
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

## Loading dataset

In [5]:
class MLBDataset(object):
    def __init__(self, input_path):
        self.input_path = Path(input_path)
        #################
        # train #
        #################
        self.train_next = pd.read_pickle(self.input_path / "train_updated_nextDayPlayerEngagement.pickle")
        self.train_rosters = pd.read_pickle(self.input_path / "train_updated_rosters.pickle")
        self.train_ptf = pd.read_pickle(self.input_path / "train_updated_playerTwitterFollowers.pickle")
        self.train_scores = pd.read_pickle(self.input_path / "train_updated_playerBoxScores.pickle")
        self.train_games = pd.read_pickle(self.input_path / "train_updated_games.pickle")
        self.train_standings = pd.read_pickle(self.input_path / "train_updated_standings.pickle")
        self.train_tbs = pd.read_pickle(self.input_path / "train_updated_teamBoxScores.pickle")
        self.train_ttf = pd.read_pickle(self.input_path / "train_updated_teamTwitterFollowers.pickle")
        self.train_trans = pd.read_pickle(self.input_path / "train_updated_transactions.pickle")
        self.train_awards = pd.read_pickle(self.input_path / "train_updated_awards.pickle")
        self.train_events = pd.read_pickle(self.input_path / "train_updated_events.pickle")
        #################
        # Additional #
        #################
        self.players = pd.read_pickle(self.input_path / "players.pickle")
        self.awards = pd.read_pickle(self.input_path / "awards.pickle")
        self.seasons = pd.read_pickle(self.input_path / "seasons.pickle")
        self.teams = pd.read_pickle(self.input_path / "teams.pickle")
        #################
        # test #
        #################
        self.example_test = pd.read_csv(self.input_path / "example_test.csv")
        self.sample_submission = pd.read_csv(self.input_path / "example_sample_submission.csv")

        # # only players in test set
        # target_playerids = self.players[self.players["playerForTestSetAndFuturePreds"] == True]["playerId"].unique()
        # self.train_next = self.train_next[self.train_next["playerId"].isin(target_playerids)].reset_index(drop=True)

mlb_train_ds = MLBDataset(CFG.INPUT_DIR)

## Feature blocks

In [6]:
def merge_by_key(left: Union[pd.DataFrame, pd.Series], right: pd.DataFrame, on=CFG.OBJECT_ID) -> pd.DataFrame:
    if not isinstance(left, pd.Series):
        left = left[on]
    return pd.merge(left, right, on=on, how="left").drop(columns=on)


class BaseBlock(object):
    def fit(self, input_df: pd.DataFrame, y=None) -> pd.DataFrame:
        return self.transform(input_df)

    def transform(self, input_df: pd.DataFrame) -> pd.DataFrame:
        return NotImplementedError()


def quantile25(x: pd.Series):
    return x.quantile(q=0.25)


def quantile75(x: pd.Series):
    return x.quantile(q=0.75)


def prob(x: pd.Series):
    x = x.reset_index(drop=True).values.tolist()
    mean = np.mean(x)
    std = np.std(x)
    distribution = norm(mean, std)
    min_weight = min(x)
    max_weight = max(x)
    values = list(np.linspace(min_weight, max_weight, len(x)))
    probabilities = [distribution.pdf(v) for v in values]
    max_values = max(probabilities)
    max_index = probabilities.index(max_values)

    return x[max_index]


class TargetAggregateBlock(BaseBlock):
    def __init__(self, periods: List[int]):
        self.periods = periods
        
    def fit(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame,
        standings_df: pd.DataFrame,
        playerTwitterFollowers_df: pd.DataFrame,
        teamTwitterFollowers_df: pd.DataFrame,
        only_fit: bool,
    ):
        output_df = pd.DataFrame()
        for i in self.periods:
            _target_df = target_df[(target_df["date"] >= i) & (target_df["date"] < (i + 100))].copy()

            cols = _target_df.drop(columns=["date", "engagementMetricsDate", "playerId"], axis=1).columns.tolist()
            dfs = [_target_df.groupby(["playerId"])[col].agg([
                "mean", "std", "max", "min", "median", quantile25, quantile75, prob
                ]).add_prefix(f"{col}_{str(i)[:-2]}_") for col in cols]
            tmp_df = pd.concat(dfs, axis=1)
            output_df = pd.concat([output_df, tmp_df], axis=1)

            del _target_df, dfs, tmp_df

        self.agg_df = output_df.reset_index()

        if only_fit == False:
            return self.transform(
                input_df,
                target_df,
                rosters_df,
                playerBoxScores_df,
                standings_df,
                playerTwitterFollowers_df,
                teamTwitterFollowers_df
            )

    def transform(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame,
        standings_df: pd.DataFrame,
        playerTwitterFollowers_df: pd.DataFrame,
        teamTwitterFollowers_df: pd.DataFrame
    ):
        return merge_by_key(input_df, self.agg_df, on="playerId")


class PlayersLabelEncodingBlock(BaseBlock):
    def __init__(self, columns: List[str]):
        self.columns = columns + ["playerId"]
        self.players_df = mlb_train_ds.players.copy()
        self.players_df["DOB_year"] = pd.to_datetime(self.players_df["DOB"]).dt.year
        self.players_df["playerAge"] = 2021 - self.players_df["DOB_year"]
        self.labeled_df = self.players_df[self.columns].copy()
        self.labeled_df["playerid"] = self.labeled_df["playerId"]
        self.columns = self.columns + ["playerid"]
        self.encoder = None
    
    def fit(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame,
        standings_df: pd.DataFrame,
        playerTwitterFollowers_df: pd.DataFrame,
        teamTwitterFollowers_df: pd.DataFrame,
        only_fit: bool
    ):
        self.encoder = ce.OrdinalEncoder(handle_unknown="value", handle_missing="values")
        self.encoder.fit(self.labeled_df[self.columns])
        self.labeled_df[self.columns] = self.encoder.transform(self.labeled_df[self.columns])

        if only_fit == False:
            return self.transform(
                input_df,
                target_df,
                rosters_df,
                playerBoxScores_df,
                standings_df,
                playerTwitterFollowers_df,
                teamTwitterFollowers_df
            )

    def transform(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame,
        standings_df: pd.DataFrame,
        playerTwitterFollowers_df: pd.DataFrame,
        teamTwitterFollowers_df: pd.DataFrame
    ):
        return merge_by_key(input_df, self.labeled_df, on="playerId").add_prefix("Players_LE_")


class RostersLabelEncodingBlock(BaseBlock):
    def __init__(self, columns: List[str]):
        self.columns = columns
        self.encoder = None

    def fit(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame,
        standings_df: pd.DataFrame,
        playerTwitterFollowers_df: pd.DataFrame,
        teamTwitterFollowers_df: pd.DataFrame,
        only_fit: bool
    ):
        self.encoder = ce.OrdinalEncoder(handle_unknown="value", handle_missing="value")
        self.encoder.fit(rosters_df[self.columns])

        if only_fit == False:
            return self.transform(
                input_df,
                target_df,
                rosters_df,
                playerBoxScores_df,
                standings_df,
                playerTwitterFollowers_df,
                teamTwitterFollowers_df
            )

    def transform(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame,
        standings_df: pd.DataFrame,
        playerTwitterFollowers_df: pd.DataFrame,
        teamTwitterFollowers_df: pd.DataFrame,
    ):
        self.labeled_df = rosters_df[CFG.OBJECT_ID + self.columns].copy()
        self.labeled_df[self.columns] = self.encoder.transform(self.labeled_df[self.columns])

        return merge_by_key(input_df, self.labeled_df).add_prefix("Rosters_LE_")


class PlayerStatsBlock(BaseBlock):
    def __init__(self, columns):
        self.columns = columns
        # game info
        self.feat_home_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_home_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_positionCode_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_battingOrder_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        # batting
        self.feat_gamesPlayedBatting_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_gamesPlayedBatting_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_plateAppearances_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_plateAppearances_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_atBats_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_atBats_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_hits_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_hits_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_doubles_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_doubles_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_triples_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_triples_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_homeRuns_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_homeRuns_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_rbi_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_rbi_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_runsScored_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_runsScored_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_totalBases_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_totalBases_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_baseOnBalls_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_baseOnBalls_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_hitByPitch_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_hitByPitch_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_intentionalWalks_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_intentionalWalks_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_groundIntoDoublePlay_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_groundIntoDoublePlay_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_groundIntoTriplePlay_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_groundIntoTriplePlay_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_strikeOuts_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_strikeOuts_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_sacBunts_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_sacBunts_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_sacFlies_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_sacFlies_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_caughtStealing_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_caughtStealing_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_stolenBases_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_stolenBases_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_leftOnBase_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_leftOnBase_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_catchersInterference_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_catchersInterference_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_pickoffs_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_pickoffs_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_flyOuts_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_flyOuts_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_groundOuts_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_groundOuts_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        # pitching
        self.feat_gamesPlayedPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_gamesPlayedPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_gamesStartedPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_gamesStartedPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_winsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_winsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_lossesPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_lossesPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_completeGamesPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_completeGamesPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_shutoutsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_shutoutsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_saves_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_saves_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_saveOpportunities_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_saveOpportunities_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_blownSaves_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_blownSaves_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_holds_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_holds_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_inningsPitched_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_inningsPitched_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_runsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_runsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_earnedRuns_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_earnedRuns_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_pitchesThrown_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_pitchesThrown_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_hitsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_hitsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_homeRunsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_homeRunsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_strikeOutsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_strikeOutsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_baseOnBallsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_baseOnBallsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_hitByPitchPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_hitByPitchPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_intentionalWalksPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_intentionalWalksPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_balks_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_balks_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_wildPitches_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_wildPitches_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_atBatsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_atBatsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_battersFaced_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_battersFaced_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_sacBuntsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_sacBuntsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_sacFliesPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_sacFliesPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_inheritedRunners_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_inheritedRunners_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_inheritedRunnersScored_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_inheritedRunnersScored_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_rbiPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_rbiPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_flyOutsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_flyOutsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_airOutsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_airOutsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_doublesPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_doublesPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_triplesPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_triplesPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_caughtStealingPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_caughtStealingPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_stolenBasesPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_stolenBasesPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_outsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_outsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_balls_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_balls_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_strikes_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_strikes_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_hitBatsmen_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_hitBatsmen_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_pickoffsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_pickoffsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_catchersInterferencePitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_catchersInterferencePitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_assists_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_assists_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_putOuts_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_putOuts_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_errors_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_errors_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_chances_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_chances_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_gamesFinishedPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_gamesFinishedPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_groundOutsPitching_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        self.feat_groundOutsPitching_cum_dict = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))

    def fit(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame,
        standings_df: pd.DataFrame,
        playerTwitterFollowers_df: pd.DataFrame,
        teamTwitterFollowers_df: pd.DataFrame,
        only_fit: bool
    ):
        if playerBoxScores_df is not None:
            for i, row in enumerate(tqdm(playerBoxScores_df[self.columns].values)):
                season = int(str(row[1])[:4])

                self.feat_home_dict[row[0]][season][row[1]] += row[2]
                self.feat_positionCode_dict[row[0]][season][row[1]] += row[3]
                self.feat_battingOrder_dict[row[0]][season][row[1]] += row[4]
                self.feat_gamesPlayedBatting_dict[row[0]][season][row[1]] += row[5]
                self.feat_plateAppearances_dict[row[0]][season][row[1]] += row[6]
                self.feat_atBats_dict[row[0]][season][row[1]] += row[7]
                self.feat_hits_dict[row[0]][season][row[1]] += row[8]
                self.feat_doubles_dict[row[0]][season][row[1]] += row[9]
                self.feat_triples_dict[row[0]][season][row[1]] += row[10]
                self.feat_homeRuns_dict[row[0]][season][row[1]] += row[11]
                self.feat_rbi_dict[row[0]][season][row[1]] += row[12]
                self.feat_runsScored_dict[row[0]][season][row[1]] += row[13]
                self.feat_totalBases_dict[row[0]][season][row[1]] += row[14]
                self.feat_baseOnBalls_dict[row[0]][season][row[1]] += row[15]
                self.feat_hitByPitch_dict[row[0]][season][row[1]] += row[16]
                self.feat_intentionalWalks_dict[row[0]][season][row[1]] += row[17]
                self.feat_groundIntoDoublePlay_dict[row[0]][season][row[1]] += row[18]
                self.feat_groundIntoTriplePlay_dict[row[0]][season][row[1]] += row[19]
                self.feat_strikeOuts_dict[row[0]][season][row[1]] += row[20]
                self.feat_sacBunts_dict[row[0]][season][row[1]] += row[21]
                self.feat_sacFlies_dict[row[0]][season][row[1]] += row[22]
                self.feat_caughtStealing_dict[row[0]][season][row[1]] += row[23]
                self.feat_stolenBases_dict[row[0]][season][row[1]] += row[24]
                self.feat_leftOnBase_dict[row[0]][season][row[1]] += row[25]
                self.feat_catchersInterference_dict[row[0]][season][row[1]] += row[26]
                self.feat_pickoffs_dict[row[0]][season][row[1]] += row[27]
                self.feat_flyOuts_dict[row[0]][season][row[1]] += row[28]
                self.feat_groundOuts_dict[row[0]][season][row[1]] += row[29]

                self.feat_gamesPlayedPitching_dict[row[0]][season][row[1]] += row[30]
                self.feat_gamesStartedPitching_dict[row[0]][season][row[1]] += row[31]
                self.feat_winsPitching_dict[row[0]][season][row[1]] += row[32]
                self.feat_lossesPitching_dict[row[0]][season][row[1]] += row[33]
                self.feat_completeGamesPitching_dict[row[0]][season][row[1]] += row[34]
                self.feat_shutoutsPitching_dict[row[0]][season][row[1]] += row[35]
                self.feat_saves_dict[row[0]][season][row[1]] += row[36]
                self.feat_saveOpportunities_dict[row[0]][season][row[1]] += row[37]
                self.feat_blownSaves_dict[row[0]][season][row[1]] += row[38]
                self.feat_holds_dict[row[0]][season][row[1]] += row[39]
                self.feat_inningsPitched_dict[row[0]][season][row[1]] += row[40]
                self.feat_runsPitching_dict[row[0]][season][row[1]] += row[41]
                self.feat_earnedRuns_dict[row[0]][season][row[1]] += row[42]
                self.feat_pitchesThrown_dict[row[0]][season][row[1]] += row[43]
                self.feat_hitsPitching_dict[row[0]][season][row[1]] += row[44]
                self.feat_homeRunsPitching_dict[row[0]][season][row[1]] += row[45]
                self.feat_strikeOutsPitching_dict[row[0]][season][row[1]] += row[46]
                self.feat_baseOnBallsPitching_dict[row[0]][season][row[1]] += row[47]
                self.feat_hitByPitchPitching_dict[row[0]][season][row[1]] += row[48]
                self.feat_intentionalWalksPitching_dict[row[0]][season][row[1]] += row[49]
                self.feat_balks_dict[row[0]][season][row[1]] += row[50]
                self.feat_wildPitches_dict[row[0]][season][row[1]] += row[51]
                self.feat_atBatsPitching_dict[row[0]][season][row[1]] += row[52]
                self.feat_battersFaced_dict[row[0]][season][row[1]] += row[53]
                self.feat_sacBuntsPitching_dict[row[0]][season][row[1]] += row[54]
                self.feat_sacFliesPitching_dict[row[0]][season][row[1]] += row[55]
                self.feat_inheritedRunners_dict[row[0]][season][row[1]] += row[56]
                self.feat_inheritedRunnersScored_dict[row[0]][season][row[1]] += row[57]
                self.feat_rbiPitching_dict[row[0]][season][row[1]] += row[58]
                self.feat_flyOutsPitching_dict[row[0]][season][row[1]] += row[59]
                self.feat_airOutsPitching_dict[row[0]][season][row[1]] += row[60]
                self.feat_doublesPitching_dict[row[0]][season][row[1]] += row[61]
                self.feat_triplesPitching_dict[row[0]][season][row[1]] += row[62]
                self.feat_caughtStealingPitching_dict[row[0]][season][row[1]] += row[63]
                self.feat_stolenBasesPitching_dict[row[0]][season][row[1]] += row[64]
                self.feat_outsPitching_dict[row[0]][season][row[1]] += row[65]
                self.feat_balls_dict[row[0]][season][row[1]] += row[66]
                self.feat_strikes_dict[row[0]][season][row[1]] += row[67]
                self.feat_hitBatsmen_dict[row[0]][season][row[1]] += row[68]
                self.feat_pickoffsPitching_dict[row[0]][season][row[1]] += row[69]
                self.feat_catchersInterferencePitching_dict[row[0]][season][row[1]] += row[70]
                self.feat_assists_dict[row[0]][season][row[1]] += row[71]
                self.feat_putOuts_dict[row[0]][season][row[1]] += row[72]
                self.feat_errors_dict[row[0]][season][row[1]] += row[73]
                self.feat_chances_dict[row[0]][season][row[1]] += row[74]
                self.feat_gamesFinishedPitching_dict[row[0]][season][row[1]] += row[75]
                self.feat_groundOutsPitching_dict[row[0]][season][row[1]] += row[76]
                
                self.feat_home_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_home_cum_dict, row[2])
                self.feat_gamesPlayedBatting_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_gamesPlayedBatting_cum_dict, row[5])
                self.feat_plateAppearances_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_plateAppearances_cum_dict, row[6])
                self.feat_atBats_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_atBats_cum_dict, row[7])
                self.feat_hits_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_hits_cum_dict, row[8])
                self.feat_doubles_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_doubles_cum_dict, row[9])
                self.feat_triples_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_triples_cum_dict, row[10])
                self.feat_homeRuns_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_homeRuns_cum_dict, row[11])
                self.feat_rbi_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_rbi_cum_dict, row[12])
                self.feat_runsScored_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_runsScored_cum_dict, row[13])
                self.feat_totalBases_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_totalBases_cum_dict, row[14])
                self.feat_baseOnBalls_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_baseOnBalls_cum_dict, row[15])
                self.feat_hitByPitch_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_hitByPitch_cum_dict, row[16])
                self.feat_intentionalWalks_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_intentionalWalks_cum_dict, row[17])
                self.feat_groundIntoDoublePlay_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_groundIntoDoublePlay_cum_dict, row[18])
                self.feat_groundIntoTriplePlay_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_groundIntoTriplePlay_cum_dict, row[19])
                self.feat_strikeOuts_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_strikeOuts_cum_dict, row[20])
                self.feat_sacBunts_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_sacBunts_cum_dict, row[21])
                self.feat_sacFlies_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_sacFlies_cum_dict, row[22])
                self.feat_caughtStealing_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_caughtStealing_cum_dict, row[23])
                self.feat_stolenBases_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_stolenBases_cum_dict, row[24])
                self.feat_leftOnBase_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_leftOnBase_cum_dict, row[25])
                self.feat_catchersInterference_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_catchersInterference_cum_dict, row[26])
                self.feat_pickoffs_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_pickoffs_cum_dict, row[27])
                self.feat_flyOuts_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_flyOuts_cum_dict, row[28])
                self.feat_groundOuts_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_groundOuts_cum_dict, row[29])

                self.feat_gamesPlayedPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_gamesPlayedPitching_cum_dict, row[30])
                self.feat_gamesStartedPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_gamesStartedPitching_cum_dict, row[31])
                self.feat_winsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_winsPitching_cum_dict, row[32])
                self.feat_lossesPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_lossesPitching_cum_dict, row[33])
                self.feat_completeGamesPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_completeGamesPitching_cum_dict, row[34])
                self.feat_shutoutsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_shutoutsPitching_cum_dict, row[35])
                self.feat_saves_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_saves_cum_dict, row[36])
                self.feat_saveOpportunities_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_saveOpportunities_cum_dict, row[37])
                self.feat_blownSaves_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_blownSaves_cum_dict, row[38])
                self.feat_holds_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_holds_cum_dict, row[39])
                self.feat_inningsPitched_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_inningsPitched_cum_dict, row[40])
                self.feat_runsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_runsPitching_cum_dict, row[41])
                self.feat_earnedRuns_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_earnedRuns_cum_dict, row[42])
                self.feat_pitchesThrown_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_pitchesThrown_cum_dict, row[43])
                self.feat_hitsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_hitsPitching_cum_dict, row[44])
                self.feat_homeRunsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_homeRunsPitching_cum_dict, row[45])
                self.feat_strikeOutsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_strikeOutsPitching_cum_dict, row[46])
                self.feat_baseOnBallsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_strikeOutsPitching_cum_dict, row[47])
                self.feat_hitByPitchPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_hitByPitchPitching_cum_dict, row[48])
                self.feat_intentionalWalksPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_intentionalWalksPitching_cum_dict, row[49])
                self.feat_balks_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_balks_cum_dict, row[50])
                self.feat_wildPitches_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_wildPitches_cum_dict, row[51])
                self.feat_atBatsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_atBatsPitching_cum_dict, row[52])
                self.feat_battersFaced_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_battersFaced_cum_dict, row[53])
                self.feat_sacBuntsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_sacBuntsPitching_cum_dict, row[54])
                self.feat_sacFliesPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_sacFliesPitching_cum_dict, row[55])
                self.feat_inheritedRunners_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_inheritedRunners_cum_dict, row[56])
                self.feat_inheritedRunnersScored_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_inheritedRunnersScored_cum_dict, row[57])
                self.feat_rbiPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_rbiPitching_cum_dict, row[58])
                self.feat_flyOutsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_flyOutsPitching_cum_dict, row[59])
                self.feat_airOutsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_airOutsPitching_cum_dict, row[60])
                self.feat_doublesPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_doublesPitching_cum_dict, row[61])
                self.feat_triplesPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_triplesPitching_cum_dict, row[62])
                self.feat_caughtStealingPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_caughtStealingPitching_cum_dict, row[63])
                self.feat_stolenBasesPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_stolenBasesPitching_cum_dict, row[64])
                self.feat_outsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_outsPitching_cum_dict, row[65])
                self.feat_balls_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_balls_cum_dict, row[66])
                self.feat_strikes_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_strikes_cum_dict, row[67])
                self.feat_hitBatsmen_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_hitBatsmen_cum_dict, row[68])
                self.feat_pickoffsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_pickoffsPitching_cum_dict, row[69])
                self.feat_catchersInterferencePitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_catchersInterferencePitching_cum_dict, row[70])
                self.feat_assists_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_assists_cum_dict, row[71])
                self.feat_putOuts_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_putOuts_cum_dict, row[72])
                self.feat_errors_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_errors_cum_dict, row[73])
                self.feat_chances_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_chances_cum_dict, row[74])
                self.feat_gamesFinishedPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_gamesFinishedPitching_cum_dict, row[75])
                self.feat_groundOutsPitching_cum_dict[row[0]][season][row[1]] += self.add_cumsum2dict(row[0], row[1], season, self.feat_groundOutsPitching_cum_dict, row[76])

        if only_fit == False:
            return self.transform(
            input_df,
            target_df,
            rosters_df,
            playerBoxScores_df,
            standings_df,
            playerTwitterFollowers_df,
            teamTwitterFollowers_df
            )

    def transform(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame,
        standings_df: pd.DataFrame,
        playerTwitterFollowers_df: pd.DataFrame,
        teamTwitterFollowers_df: pd.DataFrame,
    ):
        date = np.zeros(len(input_df), dtype=np.int32)
        playerid = np.zeros(len(input_df), dtype=np.int32)
        home = np.zeros(len(input_df), dtype=np.float32)
        home_cum = np.zeros(len(input_df), dtype=np.float32)
        positionCode = np.zeros(len(input_df), dtype=np.float32)
        battingOrder = np.zeros(len(input_df), dtype=np.float32)
        # batting
        gamesPlayedBatting = np.zeros(len(input_df), dtype=np.float32)
        gamesPlayedBatting_cum = np.zeros(len(input_df), dtype=np.float32)
        plateAppearances = np.zeros(len(input_df), dtype=np.float32)
        plateAppearances_cum = np.zeros(len(input_df), dtype=np.float32)
        atBats = np.zeros(len(input_df), dtype=np.float32)
        atBats_cum = np.zeros(len(input_df), dtype=np.float32)
        hits = np.zeros(len(input_df), dtype=np.float32)
        hits_cum = np.zeros(len(input_df), dtype=np.float32)
        doubles = np.zeros(len(input_df), dtype=np.float32)
        doubles_cum = np.zeros(len(input_df), dtype=np.float32)
        triples = np.zeros(len(input_df), dtype=np.float32)
        triples_cum = np.zeros(len(input_df), dtype=np.float32)
        homeRuns = np.zeros(len(input_df), dtype=np.float32)
        homeRuns_cum = np.zeros(len(input_df), dtype=np.float32)
        rbi = np.zeros(len(input_df), dtype=np.float32)
        rbi_cum = np.zeros(len(input_df), dtype=np.float32)
        runsScored = np.zeros(len(input_df), dtype=np.float32)
        runsScored_cum = np.zeros(len(input_df), dtype=np.float32)
        totalBases = np.zeros(len(input_df), dtype=np.float32)
        totalBases_cum = np.zeros(len(input_df), dtype=np.float32)
        baseOnBalls = np.zeros(len(input_df), dtype=np.float32)
        baseOnBalls_cum = np.zeros(len(input_df), dtype=np.float32)
        hitByPitch = np.zeros(len(input_df), dtype=np.float32)
        hitByPitch_cum = np.zeros(len(input_df), dtype=np.float32)
        intentionalWalks = np.zeros(len(input_df), dtype=np.float32)
        intentionalWalks_cum = np.zeros(len(input_df), dtype=np.float32)
        groundIntoDoublePlay = np.zeros(len(input_df), dtype=np.float32)
        groundIntoDoublePlay_cum = np.zeros(len(input_df), dtype=np.float32)
        groundIntoTriplePlay = np.zeros(len(input_df), dtype=np.float32)
        groundIntoTriplePlay_cum = np.zeros(len(input_df), dtype=np.float32)
        strikeOuts = np.zeros(len(input_df), dtype=np.float32)
        strikeOuts_cum = np.zeros(len(input_df), dtype=np.float32)
        sacBunts = np.zeros(len(input_df), dtype=np.float32)
        sacBunts_cum = np.zeros(len(input_df), dtype=np.float32)
        sacFlies = np.zeros(len(input_df), dtype=np.float32)
        sacFlies_cum = np.zeros(len(input_df), dtype=np.float32)
        caughtStealing = np.zeros(len(input_df), dtype=np.float32)
        caughtStealing_cum = np.zeros(len(input_df), dtype=np.float32)
        stolenBases = np.zeros(len(input_df), dtype=np.float32)
        stolenBases_cum = np.zeros(len(input_df), dtype=np.float32)
        leftOnBase = np.zeros(len(input_df), dtype=np.float32)
        leftOnBase_cum = np.zeros(len(input_df), dtype=np.float32)
        catchersInterference = np.zeros(len(input_df), dtype=np.float32)
        catchersInterference_cum = np.zeros(len(input_df), dtype=np.float32)
        pickoffs = np.zeros(len(input_df), dtype=np.float32)
        pickoffs_cum = np.zeros(len(input_df), dtype=np.float32)
        flyOuts = np.zeros(len(input_df), dtype=np.float32)
        flyOuts_cum = np.zeros(len(input_df), dtype=np.float32)
        groundOuts = np.zeros(len(input_df), dtype=np.float32)
        groundOuts_cum = np.zeros(len(input_df), dtype=np.float32)
        # pitching
        gamesPlayedPitching = np.zeros(len(input_df), dtype=np.float32)
        gamesPlayedPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        gamesStartedPitching = np.zeros(len(input_df), dtype=np.float32)
        gamesStartedPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        winsPitching = np.zeros(len(input_df), dtype=np.float32)
        winsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        lossesPitching = np.zeros(len(input_df), dtype=np.float32)
        lossesPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        completeGamesPitching = np.zeros(len(input_df), dtype=np.float32)
        completeGamesPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        shutoutsPitching = np.zeros(len(input_df), dtype=np.float32)
        shutoutsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        saves = np.zeros(len(input_df), dtype=np.float32)
        saves_cum = np.zeros(len(input_df), dtype=np.float32)
        saveOpportunities = np.zeros(len(input_df), dtype=np.float32)
        saveOpportunities_cum = np.zeros(len(input_df), dtype=np.float32)
        blownSaves = np.zeros(len(input_df), dtype=np.float32)
        blownSaves_cum = np.zeros(len(input_df), dtype=np.float32)
        holds = np.zeros(len(input_df), dtype=np.float32)
        holds_cum = np.zeros(len(input_df), dtype=np.float32)
        inningsPitched = np.zeros(len(input_df), dtype=np.float32)
        inningsPitched_cum = np.zeros(len(input_df), dtype=np.float32)
        runsPitching = np.zeros(len(input_df), dtype=np.float32)
        runsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        earnedRuns = np.zeros(len(input_df), dtype=np.float32)
        earnedRuns_cum = np.zeros(len(input_df), dtype=np.float32)
        pitchesThrown = np.zeros(len(input_df), dtype=np.float32)
        pitchesThrown_cum = np.zeros(len(input_df), dtype=np.float32)
        hitsPitching = np.zeros(len(input_df), dtype=np.float32)
        hitsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        homeRunsPitching = np.zeros(len(input_df), dtype=np.float32)
        homeRunsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        strikeOutsPitching = np.zeros(len(input_df), dtype=np.float32)
        strikeOutsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        baseOnBallsPitching = np.zeros(len(input_df), dtype=np.float32)
        baseOnBallsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        hitByPitchPitching = np.zeros(len(input_df), dtype=np.float32)
        hitByPitchPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        intentionalWalksPitching = np.zeros(len(input_df), dtype=np.float32)
        intentionalWalksPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        balks = np.zeros(len(input_df), dtype=np.float32)
        balks_cum = np.zeros(len(input_df), dtype=np.float32)
        wildPitches = np.zeros(len(input_df), dtype=np.float32)
        wildPitches_cum = np.zeros(len(input_df), dtype=np.float32)
        atBatsPitching = np.zeros(len(input_df), dtype=np.float32)
        atBatsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        battersFaced = np.zeros(len(input_df), dtype=np.float32)
        battersFaced_cum = np.zeros(len(input_df), dtype=np.float32)
        sacBuntsPitching = np.zeros(len(input_df), dtype=np.float32)
        sacBuntsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        sacFliesPitching = np.zeros(len(input_df), dtype=np.float32)
        sacFliesPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        inheritedRunners = np.zeros(len(input_df), dtype=np.float32)
        inheritedRunners_cum = np.zeros(len(input_df), dtype=np.float32)
        inheritedRunnersScored = np.zeros(len(input_df), dtype=np.float32)
        inheritedRunnersScored_cum = np.zeros(len(input_df), dtype=np.float32)
        rbiPitching = np.zeros(len(input_df), dtype=np.float32)
        rbiPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        flyOutsPitching = np.zeros(len(input_df), dtype=np.float32)
        flyOutsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        airOutsPitching = np.zeros(len(input_df), dtype=np.float32)
        airOutsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        doublesPitching = np.zeros(len(input_df), dtype=np.float32)
        doublesPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        triplesPitching = np.zeros(len(input_df), dtype=np.float32)
        triplesPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        caughtStealingPitching = np.zeros(len(input_df), dtype=np.float32)
        caughtStealingPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        stolenBasesPitching = np.zeros(len(input_df), dtype=np.float32)
        stolenBasesPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        outsPitching = np.zeros(len(input_df), dtype=np.float32)
        outsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        balls = np.zeros(len(input_df), dtype=np.float32)
        balls_cum = np.zeros(len(input_df), dtype=np.float32)
        strikes = np.zeros(len(input_df), dtype=np.float32)
        strikes_cum = np.zeros(len(input_df), dtype=np.float32)
        hitBatsmen = np.zeros(len(input_df), dtype=np.float32)
        hitBatsmen_cum = np.zeros(len(input_df), dtype=np.float32)
        pickoffsPitching = np.zeros(len(input_df), dtype=np.float32)
        pickoffsPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        catchersInterferencePitching = np.zeros(len(input_df), dtype=np.float32)
        catchersInterferencePitching_cum = np.zeros(len(input_df), dtype=np.float32)
        assists = np.zeros(len(input_df), dtype=np.float32)
        assists_cum = np.zeros(len(input_df), dtype=np.float32)
        putOuts = np.zeros(len(input_df), dtype=np.float32)
        putOuts_cum = np.zeros(len(input_df), dtype=np.float32)
        errors = np.zeros(len(input_df), dtype=np.float32)
        errors_cum = np.zeros(len(input_df), dtype=np.float32)
        chances = np.zeros(len(input_df), dtype=np.float32)
        chances_cum = np.zeros(len(input_df), dtype=np.float32)
        gamesFinishedPitching = np.zeros(len(input_df), dtype=np.float32)
        gamesFinishedPitching_cum = np.zeros(len(input_df), dtype=np.float32)
        groundOutsPitching = np.zeros(len(input_df), dtype=np.float32)
        groundOutsPitching_cum = np.zeros(len(input_df), dtype=np.float32)

        for i, row in enumerate(tqdm(input_df[["playerId", "date"]].values)):
            season = int(str(row[1])[:4])
            playerid[i] = row[0]
            date[i] = row[1]
            home[i] = self.extract_score(row[0], row[1], season, self.feat_home_dict)
            positionCode[i] = self.extract_score(row[0], row[1], season, self.feat_positionCode_dict)
            battingOrder[i] = self.extract_score(row[0], row[1], season, self.feat_battingOrder_dict)
            gamesPlayedBatting[i] = self.extract_score(row[0], row[1], season, self.feat_gamesPlayedBatting_dict)
            plateAppearances[i] = self.extract_score(row[0], row[1], season, self.feat_plateAppearances_dict)
            atBats[i] = self.extract_score(row[0], row[1], season, self.feat_atBats_dict)
            hits[i] = self.extract_score(row[0], row[1], season, self.feat_hits_dict)
            doubles[i] = self.extract_score(row[0], row[1], season, self.feat_doubles_dict)
            triples[i] = self.extract_score(row[0], row[1], season, self.feat_triples_dict)
            homeRuns[i] = self.extract_score(row[0], row[1], season, self.feat_homeRuns_dict)
            rbi[i] = self.extract_score(row[0], row[1], season, self.feat_rbi_dict)
            runsScored[i] = self.extract_score(row[0], row[1], season, self.feat_runsScored_dict)
            totalBases[i] = self.extract_score(row[0], row[1], season, self.feat_totalBases_dict)
            baseOnBalls[i] = self.extract_score(row[0], row[1], season, self.feat_baseOnBalls_dict)
            hitByPitch[i] = self.extract_score(row[0], row[1], season, self.feat_hitByPitch_dict)
            intentionalWalks[i] = self.extract_score(row[0], row[1], season, self.feat_intentionalWalks_dict)
            groundIntoDoublePlay[i] = self.extract_score(row[0], row[1], season, self.feat_groundIntoDoublePlay_dict)
            groundIntoTriplePlay[i] = self.extract_score(row[0], row[1], season, self.feat_groundIntoTriplePlay_dict)
            strikeOuts[i] = self.extract_score(row[0], row[1], season, self.feat_strikeOuts_dict)
            sacBunts[i] = self.extract_score(row[0], row[1], season, self.feat_sacBunts_dict)
            sacFlies[i] = self.extract_score(row[0], row[1], season, self.feat_sacFlies_dict)
            caughtStealing[i] = self.extract_score(row[0], row[1], season, self.feat_caughtStealing_dict)
            stolenBases[i] = self.extract_score(row[0], row[1], season, self.feat_stolenBases_dict)
            leftOnBase[i] = self.extract_score(row[0], row[1], season, self.feat_leftOnBase_dict)
            catchersInterference[i] = self.extract_score(row[0], row[1], season, self.feat_catchersInterference_dict)
            pickoffs[i] = self.extract_score(row[0], row[1], season, self.feat_pickoffs_dict)
            flyOuts[i] = self.extract_score(row[0], row[1], season, self.feat_flyOuts_dict)
            groundOuts[i] = self.extract_score(row[0], row[1], season, self.feat_groundOuts_dict)

            gamesPlayedPitching[i] = self.extract_score(row[0], row[1], season, self.feat_gamesPlayedPitching_dict)
            gamesStartedPitching[i] = self.extract_score(row[0], row[1], season, self.feat_gamesStartedPitching_dict)
            winsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_winsPitching_dict)
            lossesPitching[i] = self.extract_score(row[0], row[1], season, self.feat_lossesPitching_dict)
            completeGamesPitching[i] = self.extract_score(row[0], row[1], season, self.feat_completeGamesPitching_dict)
            shutoutsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_shutoutsPitching_dict)
            saves[i] = self.extract_score(row[0], row[1], season, self.feat_saves_dict)
            saveOpportunities[i] = self.extract_score(row[0], row[1], season, self.feat_saveOpportunities_dict)
            blownSaves[i] = self.extract_score(row[0], row[1], season, self.feat_blownSaves_dict)
            holds[i] = self.extract_score(row[0], row[1], season, self.feat_holds_dict)
            inningsPitched[i] = self.extract_score(row[0], row[1], season, self.feat_inningsPitched_dict)
            runsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_runsPitching_dict)
            earnedRuns[i] = self.extract_score(row[0], row[1], season, self.feat_earnedRuns_dict)
            pitchesThrown[i] = self.extract_score(row[0], row[1], season, self.feat_pitchesThrown_dict)
            hitsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_hitsPitching_dict)
            homeRunsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_homeRunsPitching_dict)
            strikeOutsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_strikeOutsPitching_dict)
            baseOnBallsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_baseOnBallsPitching_dict)
            hitByPitchPitching[i] = self.extract_score(row[0], row[1], season, self.feat_hitByPitchPitching_dict)
            intentionalWalksPitching[i] = self.extract_score(row[0], row[1], season, self.feat_intentionalWalksPitching_dict)
            balks[i] = self.extract_score(row[0], row[1], season, self.feat_balks_dict)
            wildPitches[i] = self.extract_score(row[0], row[1], season, self.feat_wildPitches_dict)
            atBatsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_atBatsPitching_dict)
            battersFaced[i] = self.extract_score(row[0], row[1], season, self.feat_battersFaced_dict)
            sacBuntsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_sacBuntsPitching_dict)
            sacFliesPitching[i] = self.extract_score(row[0], row[1], season, self.feat_sacFliesPitching_dict)
            inheritedRunners[i] = self.extract_score(row[0], row[1], season, self.feat_inheritedRunners_dict)
            inheritedRunnersScored[i] = self.extract_score(row[0], row[1], season, self.feat_inheritedRunnersScored_dict)
            rbiPitching[i] = self.extract_score(row[0], row[1], season, self.feat_rbiPitching_dict)
            flyOutsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_flyOutsPitching_dict)
            airOutsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_airOutsPitching_dict)
            doublesPitching[i] = self.extract_score(row[0], row[1], season, self.feat_doublesPitching_dict)
            triplesPitching[i] = self.extract_score(row[0], row[1], season, self.feat_triplesPitching_dict)
            caughtStealingPitching[i] = self.extract_score(row[0], row[1], season, self.feat_caughtStealingPitching_dict)
            stolenBasesPitching[i] = self.extract_score(row[0], row[1], season, self.feat_stolenBasesPitching_dict)
            outsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_outsPitching_dict)
            balls[i] = self.extract_score(row[0], row[1], season, self.feat_balls_dict)
            strikes[i] = self.extract_score(row[0], row[1], season, self.feat_strikes_dict)
            hitBatsmen[i] = self.extract_score(row[0], row[1], season, self.feat_hitBatsmen_dict)
            pickoffsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_pickoffsPitching_dict)
            catchersInterferencePitching[i] = self.extract_score(row[0], row[1], season, self.feat_catchersInterferencePitching_dict)
            assists[i] = self.extract_score(row[0], row[1], season, self.feat_assists_dict)
            putOuts[i] = self.extract_score(row[0], row[1], season, self.feat_putOuts_dict)
            errors[i] = self.extract_score(row[0], row[1], season, self.feat_errors_dict)
            chances[i] = self.extract_score(row[0], row[1], season, self.feat_chances_dict)
            gamesFinishedPitching[i] = self.extract_score(row[0], row[1], season, self.feat_gamesFinishedPitching_dict)
            groundOutsPitching[i] = self.extract_score(row[0], row[1], season, self.feat_groundOutsPitching_dict)

            home_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_home_cum_dict)
            gamesPlayedBatting_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_gamesPlayedBatting_cum_dict)
            plateAppearances_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_plateAppearances_cum_dict)
            atBats_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_atBats_cum_dict)
            hits_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_hits_cum_dict)
            doubles_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_doubles_cum_dict)
            triples_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_triples_cum_dict)
            homeRuns_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_homeRuns_cum_dict)
            rbi_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_rbi_cum_dict)
            runsScored_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_runsScored_cum_dict)
            totalBases_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_totalBases_cum_dict)
            baseOnBalls_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_baseOnBalls_cum_dict)
            hitByPitch_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_hitByPitch_cum_dict)
            intentionalWalks_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_intentionalWalks_cum_dict)
            groundIntoDoublePlay_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_groundIntoDoublePlay_cum_dict)
            groundIntoTriplePlay_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_groundIntoTriplePlay_cum_dict)
            strikeOuts_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_strikeOuts_cum_dict)
            sacBunts_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_sacBunts_cum_dict)
            sacFlies_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_sacFlies_cum_dict)
            caughtStealing_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_caughtStealing_cum_dict)
            stolenBases_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_stolenBases_cum_dict)
            leftOnBase_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_leftOnBase_cum_dict)
            catchersInterference_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_catchersInterference_cum_dict)
            pickoffs_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_pickoffs_cum_dict)
            flyOuts_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_flyOuts_cum_dict)
            groundOuts_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_groundOuts_cum_dict)

            gamesPlayedPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_gamesPlayedPitching_cum_dict)
            gamesStartedPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_gamesStartedPitching_cum_dict)
            winsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_winsPitching_cum_dict)
            lossesPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_lossesPitching_cum_dict)
            completeGamesPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_completeGamesPitching_cum_dict)
            shutoutsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_shutoutsPitching_cum_dict)
            saves_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_saves_cum_dict)
            saveOpportunities_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_saveOpportunities_cum_dict)
            blownSaves_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_blownSaves_cum_dict)
            holds_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_holds_cum_dict)
            inningsPitched_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_inningsPitched_cum_dict)
            runsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_runsPitching_cum_dict)
            earnedRuns_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_earnedRuns_cum_dict)
            pitchesThrown_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_pitchesThrown_cum_dict)
            hitsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_hitsPitching_cum_dict)
            homeRunsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_homeRunsPitching_cum_dict)
            strikeOutsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_strikeOutsPitching_cum_dict)
            baseOnBallsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_baseOnBallsPitching_cum_dict)
            hitByPitchPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_hitByPitchPitching_cum_dict)
            intentionalWalksPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_intentionalWalksPitching_cum_dict)
            balks_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_balks_cum_dict)
            wildPitches_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_wildPitches_cum_dict)
            atBatsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_atBatsPitching_cum_dict)
            battersFaced_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_battersFaced_cum_dict)
            sacBuntsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_sacBuntsPitching_cum_dict)
            sacFliesPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_sacFliesPitching_cum_dict)
            inheritedRunners_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_inheritedRunners_cum_dict)
            inheritedRunnersScored_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_inheritedRunnersScored_cum_dict)
            rbiPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_rbiPitching_cum_dict)
            flyOutsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_flyOutsPitching_cum_dict)
            airOutsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_airOutsPitching_cum_dict)
            doublesPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_doublesPitching_cum_dict)
            triplesPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_triplesPitching_cum_dict)
            caughtStealingPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_caughtStealingPitching_cum_dict)
            stolenBasesPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_stolenBasesPitching_cum_dict)
            outsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_outsPitching_cum_dict)
            balls_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_balls_cum_dict)
            strikes_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_strikes_cum_dict)
            hitBatsmen_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_hitBatsmen_cum_dict)
            pickoffsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_pickoffsPitching_cum_dict)
            catchersInterferencePitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_catchersInterferencePitching_cum_dict)
            assists_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_assists_cum_dict)
            putOuts_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_putOuts_cum_dict)
            errors_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_errors_cum_dict)
            chances_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_chances_cum_dict)
            gamesFinishedPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_gamesFinishedPitching_cum_dict)
            groundOutsPitching_cum[i] = self.extract_cumsum_score(row[0], row[1], season, self.feat_groundOutsPitching_cum_dict)

        output_df = pd.DataFrame({
            "date": date,
            "home": home, 
            "home_cum": home_cum,
            "positionCode": positionCode,
            "battingOrder": battingOrder,
            "gamesPlayedBatting": gamesPlayedBatting,
            "gamesPlayedBatting_cum": gamesPlayedBatting_cum,
            "plateAppearances": plateAppearances,
            "plateAppearances_cum": plateAppearances_cum,
            "atBats": atBats,
            "atBats_cum": atBats_cum,
            "hits": hits,
            "hits_cum": hits_cum,
            "doubles": doubles,
            "doubles_cum": doubles_cum,
            "triples": triples,
            "triples_cum": triples_cum,
            "homeRuns": homeRuns,
            "homeRuns_cum": homeRuns_cum,
            "rbi": rbi,
            "rbi_cum": rbi_cum,
            "runsScored": runsScored,
            "runsScored_cum": runsScored_cum,
            "totalBases": totalBases,
            "totalBases_cum": totalBases_cum,
            "baseOnBalls": baseOnBalls,
            "baseOnBalls_cum": baseOnBalls_cum,
            "hitByPitch": hitByPitch,
            "hitByPitch_cum": hitByPitch_cum,
            "intentionalWalks": intentionalWalks,
            "intentionalWalks_cum": intentionalWalks_cum,
            "groundIntoDoublePlay": groundIntoDoublePlay,
            "groundIntoDoublePlay_cum": groundIntoDoublePlay_cum,
            "groundIntoTriplePlay": groundIntoTriplePlay,
            "groundIntoTriplePlay_cum": groundIntoTriplePlay_cum,
            "strikeOuts": strikeOuts,
            "strikeOuts_cum": strikeOuts_cum,
            "sacBunts": sacBunts,
            "sacBunts_cum": sacBunts_cum,
            "sacFlies": sacFlies,
            "sacFlies_cum": sacFlies_cum,
            "caughtStealing": caughtStealing,
            "caughtStealing_cum": caughtStealing_cum,
            "stolenBases": stolenBases,
            "stolenBases_cum": stolenBases_cum,
            "leftOnBase": leftOnBase,
            "leftOnBase_cum": leftOnBase_cum,
            "catchersInterference": catchersInterference,
            "catchersInterference_cum": catchersInterference_cum,
            "pickoffs": pickoffs,
            "pickoffs_cum": pickoffs_cum,
            "flyOuts": flyOuts,
            "flyOuts_cum": flyOuts_cum,
            "groundOuts": groundOuts,
            "groundOuts_cum": groundOuts_cum,
            "gamesPlayedPitching": gamesPlayedPitching,
            "gamesPlayedPitching_cum": gamesPlayedPitching_cum,
            "gamesStartedPitching": gamesStartedPitching,
            "gamesStartedPitching_cum": gamesStartedPitching_cum,
            "winsPitching": winsPitching,
            "winsPitching_cum": winsPitching_cum,
            "lossesPitching": lossesPitching,
            "lossesPitching_cum": lossesPitching_cum,
            "completeGamesPitching": completeGamesPitching,
            "completeGamesPitching_cum": completeGamesPitching_cum,
            "shutoutsPitching": shutoutsPitching,
            "shutoutsPitching_cum": shutoutsPitching_cum,
            "saves": saves,
            "saves_cum": saves_cum,
            "saveOpportunities": saveOpportunities,
            "saveOpportunities_cum": saveOpportunities_cum,
            "blownSaves": blownSaves,
            "blownSaves_cum": blownSaves_cum,
            "holds": holds,
            "holds_cum": holds_cum,
            "inningsPitched": inningsPitched,
            "inningsPitched_cum": inningsPitched_cum,
            "runsPitching": runsPitching,
            "runsPitching_cum": runsPitching_cum,
            "earnedRuns": earnedRuns,
            "earnedRuns_cum": earnedRuns_cum,
            "pitchesThrown": pitchesThrown,
            "pitchesThrown_cum": pitchesThrown_cum,
            "hitsPitching": hitsPitching,
            "hitsPitching_cum": hitsPitching_cum,
            "homeRunsPitching": homeRunsPitching,
            "homeRunsPitching_cum": homeRunsPitching_cum,
            "strikeOutsPitching": strikeOutsPitching,
            "strikeOutsPitching_cum": strikeOutsPitching_cum,
            "baseOnBallsPitching": baseOnBallsPitching,
            "baseOnBallsPitching_cum": baseOnBallsPitching_cum,
            "hitByPitchPitching": hitByPitchPitching,
            "hitByPitchPitching_cum": hitByPitchPitching_cum,
            "intentionalWalksPitching": intentionalWalksPitching,
            "intentionalWalksPitching_cum": intentionalWalksPitching_cum,
            "balks": balks,
            "balks_cum": balks_cum,
            "wildPitches": wildPitches,
            "wildPitches_cum": wildPitches_cum,
            "atBatsPitching": atBatsPitching,
            "atBatsPitching_cum": atBatsPitching_cum,
            "battersFaced": battersFaced,
            "battersFaced_cum": battersFaced_cum,
            "sacBuntsPitching": sacBuntsPitching,
            "sacBuntsPitching_cum": sacBuntsPitching_cum,
            "sacFliesPitching": sacFliesPitching,
            "sacFliesPitching_cum": sacFliesPitching_cum,
            "inheritedRunners": inheritedRunners,
            "inheritedRunners_cum": inheritedRunners_cum,
            "inheritedRunnersScored": inheritedRunnersScored,
            "inheritedRunnersScored_cum": inheritedRunnersScored_cum,
            "rbiPitching": rbiPitching,
            "rbiPitching_cum": rbiPitching_cum,
            "flyOutsPitching": flyOutsPitching,
            "flyOutsPitching_cum": flyOutsPitching_cum,
            "airOutsPitching": airOutsPitching,
            "airOutsPitching_cum": airOutsPitching_cum,
            "doublesPitching": doublesPitching,
            "doublesPitching_cum": doublesPitching_cum,
            "triplesPitching": triplesPitching,
            "triplesPitching_cum": triplesPitching_cum,
            "caughtStealingPitching": caughtStealingPitching,
            "caughtStealingPitching_cum": caughtStealingPitching_cum,
            "stolenBasesPitching": stolenBasesPitching,
            "stolenBasesPitching_cum": stolenBasesPitching_cum,
            "outsPitching": outsPitching,
            "outsPitching_cum": outsPitching_cum,
            "balls": balls,
            "balls_cum": balls_cum,
            "strikes": strikes,
            "strikes_cum": strikes_cum,
            "hitBatsmen": hitBatsmen,
            "hitBatsmen_cum": hitBatsmen_cum,
            "pickoffsPitching": pickoffsPitching,
            "pickoffsPitching_cum": pickoffsPitching_cum,
            "catchersInterferencePitching": catchersInterferencePitching,
            "catchersInterferencePitching_cum": catchersInterferencePitching_cum,
            "assists": assists,
            "assists_cum": assists_cum,
            "putOuts": putOuts,
            "putOuts_cum": putOuts_cum,
            "errors": errors,
            "errors_cum": errors_cum,
            "chances": chances,
            "chances_cum": chances_cum,
            "gamesFinishedPitching": gamesFinishedPitching,
            "gamesFinishedPitching_cum": gamesFinishedPitching_cum,
            "groundOutsPitching": groundOutsPitching,
            "groundOutsPitching_cum": groundOutsPitching_cum,
        })

        # 打撃指標
        output_df["battingAverage"] = output_df["hits_cum"] / output_df["atBats_cum"] # 打率
        output_df["sluggingPercentage"] = output_df["totalBases_cum"] / output_df["atBats_cum"] # 長打率
        output_df["onBasePercentage"] = (output_df["hits_cum"] + output_df["baseOnBalls_cum"] + output_df["hitByPitch_cum"])/(output_df["atBats_cum"] + output_df["baseOnBalls_cum"] + output_df["hitByPitch_cum"] + output_df["sacFlies_cum"]) # 出塁率
        output_df["ops"] = output_df["sluggingPercentage"] + output_df["onBasePercentage"] # OPS
        output_df["isop"] = output_df["sluggingPercentage"] - output_df["battingAverage"] # IsoP
        output_df["isod"] = output_df["onBasePercentage"] - output_df["battingAverage"] # IsoD
        output_df["rc"] = (output_df["hits_cum"] + output_df["baseOnBalls_cum"]) * output_df["totalBases_cum"] / (output_df["atBats_cum"] + output_df["baseOnBalls_cum"]) # RC
        output_df["rc27"] = output_df["rc"] / (output_df["atBats_cum"] - output_df["hits_cum"] + output_df["caughtStealing_cum"] + output_df["sacBunts_cum"] + output_df["sacFlies_cum"] + output_df["groundIntoDoublePlay_cum"]) * 27 # RC27
        output_df["rc27"] = output_df["rc27"].replace({np.inf: 0})
        output_df["babip"] = (output_df["hits_cum"] - output_df["homeRuns_cum"]) / (output_df["atBats_cum"] - output_df["strikeOuts_cum"] - output_df["homeRuns_cum"] + output_df["sacFlies_cum"]) # BABIP
        output_df["bb_k"] = output_df["baseOnBalls_cum"] / output_df["strikeOuts_cum"] # BB/K
        output_df["k%"] = output_df["strikeOuts_cum"] / output_df["plateAppearances_cum"] # K%

        # 投手指標
        output_df["qs"] = ((output_df["inningsPitched"] >= 6) & (output_df["runsPitching"] <= 3)) * output_df["gamesStartedPitching"]
        output_df["hqs"] = ((output_df["inningsPitched"] >= 7) & (output_df["runsPitching"] <= 2)) * output_df["gamesStartedPitching"]
        output_df["era"] = output_df["earnedRuns_cum"] * 9 / output_df["inningsPitched_cum"] # 防御率
        output_df["wp"] = output_df["winsPitching_cum"] / (output_df["winsPitching_cum"] + output_df["lossesPitching_cum"]) # 勝率
        output_df["k9"] = output_df["strikeOutsPitching_cum"] * 9 / output_df["inningsPitched_cum"] # K/9(奪三振率)
        output_df["hits_allowed_average"] = output_df["hitsPitching_cum"] / output_df["atBatsPitching_cum"] # 被打率
        output_df["hp"] = output_df["holds_cum"] + output_df["winsPitching_cum"] # HP
        # output_df["qs%"] = output_df["qs_cum"] / output_df["gamesStartedPitching_cum"] # QS率
        # output_df["hqs%"] = output_df["hqs_cum"] / output_df["gamesStartedPitching_cum"] # HQS率
        output_df["k%_pitching"] = output_df["strikeOutsPitching_cum"] / output_df["battersFaced_cum"] # K%
        output_df["bb9"] = output_df["baseOnBallsPitching_cum"] * 9 / output_df["inningsPitched_cum"] # BB/9
        output_df["bb%"] = output_df["baseOnBallsPitching_cum"] / output_df["battersFaced_cum"] # BB%
        output_df["k_bb"] = output_df["strikeOutsPitching_cum"] / (output_df["baseOnBallsPitching_cum"] + output_df["hitByPitchPitching_cum"]) # K/BB
        output_df["hr9"] = output_df["homeRunsPitching_cum"] * 9 / output_df["inningsPitched_cum"] # HR/9
        output_df["babip_pitching"] = (output_df["hitsPitching_cum"] - output_df["homeRunsPitching_cum"]) / (output_df["atBatsPitching_cum"] - output_df["strikeOutsPitching_cum"] - output_df["homeRunsPitching_cum"] + output_df["sacFliesPitching_cum"]) # BABIP
        output_df["whip"] = (output_df["baseOnBallsPitching_cum"] + output_df["hitsPitching_cum"]) / output_df["inningsPitched_cum"] # WHIP

        # ランキング
        output_df["homeRuns_rank"] = output_df.groupby(["date"])["homeRuns_cum"].rank(ascending=False, method="min")
        output_df["hits_rank"] = output_df.groupby(["date"])["hits_cum"].rank(ascending=False, method="min")
        output_df["rbi_rank"] = output_df.groupby(["date"])["rbi_cum"].rank(ascending=False, method="min")
        output_df["doubles_rank"] = output_df.groupby(["date"])["doubles_cum"].rank(ascending=False, method="min")
        output_df["triples_rank"] = output_df.groupby(["date"])["triples_cum"].rank(ascending=False, method="min")
        output_df["runsScored_rank"] = output_df.groupby(["date"])["runsScored_cum"].rank(ascending=False, method="min")
        output_df["totalBases_rank"] = output_df.groupby(["date"])["totalBases_cum"].rank(ascending=False, method="min")
        output_df["baseOnBalls_rank"] = output_df.groupby(["date"])["baseOnBalls_cum"].rank(ascending=False, method="min")
        output_df["hitByPitch_rank"] = output_df.groupby(["date"])["hitByPitch_cum"].rank(ascending=False, method="min")
        output_df["strikeOuts_rank"] = output_df.groupby(["date"])["strikeOuts_cum"].rank(ascending=False, method="min")
        output_df["stolenBases_rank"] = output_df.groupby(["date"])["stolenBases_cum"].rank(ascending=False, method="min")
        output_df["plateAppearances_rank"] = output_df.groupby(["date"])["plateAppearances_cum"].rank(ascending=False, method="min")
        output_df["atBats_rank"] = output_df.groupby(["date"])["atBats_cum"].rank(ascending=False, method="min")

        output_df["battingAverage_rank"] = output_df[output_df["gamesPlayedBatting"] == 1.0].groupby(["date"])["battingAverage"].rank(ascending=False, method="min")
        output_df["ops_rank"] = output_df.groupby(["date"])["ops"].rank(ascending=False, method="min")
        output_df["isop_rank"] = output_df.groupby(["date"])["isop"].rank(ascending=False, method="min")
        output_df["isod_rank"] = output_df.groupby(["date"])["isod"].rank(ascending=False, method="min")
        output_df["rc_rank"] = output_df.groupby(["date"])["rc"].rank(ascending=False, method="min")
        output_df["rc27_rank"] = output_df.groupby(["date"])["rc27"].rank(ascending=False, method="min")
        output_df["babip_rank"] = output_df.groupby(["date"])["babip"].rank(ascending=False, method="min")
        output_df["bb_k_rank"] = output_df.groupby(["date"])["bb_k"].rank(ascending=False, method="min")
        output_df["k%_rank"] = output_df.groupby(["date"])["k%"].rank(ascending=False, method="min")

        output_df["winsPitching_rank"] = output_df.groupby(["date"])["winsPitching_cum"].rank(ascending=False, method="min")
        output_df["lossesPitching_rank"] = output_df.groupby(["date"])["lossesPitching_cum"].rank(ascending=False, method="min")
        output_df["saves_rank"] = output_df.groupby(["date"])["saves_cum"].rank(ascending=False, method="min")
        output_df["holds_rank"] = output_df.groupby(["date"])["holds_cum"].rank(ascending=False, method="min")
        output_df["completeGamesPitching_rank"] = output_df.groupby(["date"])["completeGamesPitching_cum"].rank(ascending=False, method="min")
        output_df["shutoutsPitching_rank"] = output_df.groupby(["date"])["shutoutsPitching_cum"].rank(ascending=False, method="min")
        output_df["inningsPitched_rank"] = output_df.groupby(["date"])["inningsPitched_cum"].rank(ascending=False, method="min")
        output_df["runsPitching_rank"] = output_df.groupby(["date"])["runsPitching_cum"].rank(ascending=False, method="min")
        output_df["earnedRuns_rank"] = output_df.groupby(["date"])["earnedRuns_cum"].rank(ascending=False, method="min")

        output_df["era_rank"] = output_df.groupby(["date"])["era"].rank(ascending=False, method="min")
        output_df["whip_rank"] = output_df.groupby(["date"])["whip"].rank(ascending=False, method="min")
        
        return output_df.drop(columns=["date"], axis=1).add_prefix("PlayerStats_")

    def extract_score(self, x, y, z, dict):
        if y in dict[x][z].keys():
            return dict[x][z][y]
        else:
            return np.nan

    def extract_cumsum_score(self, x, y, z, dict):
        previous_gamedates = [i for i in dict[x][z].keys() if i < y]
        if y in dict[x][z].keys():
            return dict[x][z][y]
        elif len(previous_gamedates) != 0:
            return dict[x][z][max(previous_gamedates)]
        elif len(previous_gamedates) == 0:
            return np.nan

    def add_cumsum2dict(self, x, y, z, dict, value):
        value = value if value == value else 0
        previous_gamedates = [i for i in dict[x][z].keys() if i < y]
        if len(previous_gamedates) != 0:
            return value + dict[x][z][max(previous_gamedates)]
        else:
            return value


class StandingsMetaBlock(BaseBlock):
    def __init__(self, columns: List[str]):
        self.columns = columns

    def fit(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame,
        standings_df: pd.DataFrame,
        playerTwitterFollowers_df: pd.DataFrame,
        teamTwitterFollowers_df: pd.DataFrame,
        only_fit: bool
    ):
        if only_fit == False:
            return self.transform(
                input_df,
                target_df,
                rosters_df,
                playerBoxScores_df,
                standings_df,
                playerTwitterFollowers_df,
                teamTwitterFollowers_df
            )

    def transform(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame,
        standings_df: pd.DataFrame,
        playerTwitterFollowers_df: pd.DataFrame,
        teamTwitterFollowers_df: pd.DataFrame
    ):
        self.standings_df = rosters_df[["date", "playerId", "teamId"]].merge(standings_df[self.columns], on=["date", "teamId"], how="left")

        return merge_by_key(input_df, self.standings_df.drop(columns=["teamId"], axis=1)).add_prefix("StandingsMeta_")


def convert_cos(x, range):
    return np.cos(2 * np.pi * (x / range))


def convert_sin(x, range):
    return np.sin(2 * np.pi * (x / range))


class Datetime2trigonometricEncodingBlock(BaseBlock):
    def fit(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame,
        standings_df: pd.DataFrame,
        playerTwitterFollowers_df: pd.DataFrame,
        teamTwitterFollowers_df: pd.DataFrame,
        only_fit: bool
    ):
        if only_fit == False:
            return self.transform(
                input_df,
                target_df,
                rosters_df,
                playerBoxScores_df,
                standings_df,
                playerTwitterFollowers_df,
                teamTwitterFollowers_df
            )
    
    def transform(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame,
        standings_df: pd.DataFrame,
        playerTwitterFollowers_df: pd.DataFrame,
        teamTwitterFollowers_df: pd.DataFrame,
    ):
        _input_df = input_df.copy()
        _input_df["day"] = pd.to_datetime(_input_df["date"].astype(str), format="%Y%m%d").dt.day
        _input_df["dow"] = pd.to_datetime(_input_df["date"].astype(str), format="%Y%m%d").dt.dayofweek

        _input_df["day2cos"] = _input_df["day"].apply(lambda x: convert_cos(x, 31))
        _input_df["day2sin"] = _input_df["day"].apply(lambda x: convert_sin(x, 31))
        _input_df["dow2cos"] = _input_df["dow"].apply(lambda x: convert_cos(x, 7))
        _input_df["dow2sin"] = _input_df["dow"].apply(lambda x: convert_sin(x, 7))

        return _input_df[["day2cos", "day2sin", "dow2cos", "dow2sin"]].add_prefix("DatetimeEncoding_")


class PlayerTwitterFollowerAggregateBlock(BaseBlock):
    def __init__(self):
        self.feat_playerTwitterFollowers_dict = defaultdict(lambda: defaultdict(int))

    def fit(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame,
        standings_df: pd.DataFrame,
        playerTwitterFollowers_df: pd.DataFrame,
        teamTwitterFollowers_df: pd.DataFrame,
        only_fit: bool
    ):
        if playerTwitterFollowers_df is not None:
            playerTwitterFollowers_df["year_month"] = [int(i[:6]) for i in playerTwitterFollowers_df["date"].astype(str).values]
            for i, row in enumerate(playerTwitterFollowers_df[["playerId", "numberOfFollowers", "year_month"]].values):
                self.feat_playerTwitterFollowers_dict[row[0]][row[2]] = row[1]

        if only_fit == False:
            return self.transform(
                input_df,
                target_df,
                rosters_df,
                playerBoxScores_df,
                standings_df,
                playerTwitterFollowers_df,
                teamTwitterFollowers_df,
            )

    def transform(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame,
        standings_df: pd.DataFrame,
        playerTwitterFollowers_df: pd.DataFrame,
        teamTwitterFollowers_df: pd.DataFrame,
    ):
        numberOfFollowers = np.zeros(len(input_df), np.float32)

        for i, row in enumerate(tqdm(input_df[["playerId", "date"]].values)):
            year_month = int(row[1].astype(str)[:6])
            if row[0] in self.feat_playerTwitterFollowers_dict.keys():
                if year_month in self.feat_playerTwitterFollowers_dict[row[0]].keys():
                    numberOfFollowers[i] = self.feat_playerTwitterFollowers_dict[row[0]][year_month]
                else:
                    numberOfFollowers[i] = np.nan
            else:
                numberOfFollowers[i] = np.nan

        output_df = pd.DataFrame({"numberOfFollowers": numberOfFollowers})

        return output_df.add_prefix("PlayerTwitterFollowerAggregate_")


class TeamTwitterFollowerAggregateBlock(BaseBlock):
    def __init__(self):
        self.feat_teamTwitterFollowers_dict = defaultdict(lambda: defaultdict(int))

    def fit(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame,
        standings_df: pd.DataFrame,
        playerTwitterFollowers_df: pd.DataFrame,
        teamTwitterFollowers_df: pd.DataFrame,
        only_fit: bool
    ):
        if teamTwitterFollowers_df is not None:
            teamTwitterFollowers_df["year_month"] = [int(i[:6]) for i in teamTwitterFollowers_df["date"].astype(str).values]
            for i, row in enumerate(teamTwitterFollowers_df[["teamId", "numberOfFollowers", "year_month"]].values):
                self.feat_teamTwitterFollowers_dict[row[0]][row[2]] = row[1]

        if only_fit == False:
            return self.transform(
                input_df,
                target_df,
                rosters_df,
                playerBoxScores_df,
                standings_df,
                playerTwitterFollowers_df,
                teamTwitterFollowers_df,
            )

    def transform(
        self,
        input_df: pd.DataFrame,
        target_df: pd.DataFrame,
        rosters_df: pd.DataFrame,
        playerBoxScores_df: pd.DataFrame,
        standings_df: pd.DataFrame,
        playerTwitterFollowers_df: pd.DataFrame,
        teamTwitterFollowers_df: pd.DataFrame
    ):
        input_df = input_df.merge(rosters_df, on=["date", "playerId"], how="left")
        numberOfFollowers = np.zeros(len(input_df), np.float32)

        for i, row in enumerate(tqdm(input_df[["teamId", "date"]].values)):
            year_month = int(row[1].astype(str)[:6])
            if row[0] in self.feat_teamTwitterFollowers_dict.keys():
                if year_month in self.feat_teamTwitterFollowers_dict[row[0]].keys():
                    numberOfFollowers[i] = self.feat_teamTwitterFollowers_dict[row[0]][year_month]
                else:
                    numberOfFollowers[i] = np.nan
            else:
                numberOfFollowers[i] = np.nan

        output_df = pd.DataFrame({"numberOfFollowers": numberOfFollowers})

        return output_df.add_prefix("TeamTwitterFollowerAggregate_")

In [7]:
def create_train_feature(
    input_df: pd.DataFrame,
    target_df: pd.DataFrame,
    rosters_df: pd.DataFrame,
    playerBoxScores_df: pd.DataFrame,
    standings_df: pd.DataFrame,
    playerTwitterFollowers_df: pd.DataFrame,
    teamTwitterFollowers_df: pd.DataFrame,
    update_blocks: list,
    non_update_blocks: list,
    only_fit: bool
    ) -> pd.DataFrame:
    feat_df = pd.DataFrame()
    blocks = update_blocks + non_update_blocks

    for block in blocks:
        with timer(name=f"{str(block) + '_fit'}", logger=logger):
            try:
                out_feat_block = block.fit(
                    input_df,
                    target_df,
                    rosters_df,
                    playerBoxScores_df,
                    standings_df,
                    playerTwitterFollowers_df,
                    teamTwitterFollowers_df,
                    only_fit=False
                    )
            except Exception as e:
                print(f"Error on {block} fit.")
                raise e from e

            assert len(out_feat_block) == len(input_df), block

        feat_df = pd.concat([feat_df, out_feat_block], axis=1)

        del out_feat_block

    return feat_df


def fit_train_feature(
    input_df: pd.DataFrame,
    target_df: pd.DataFrame,
    rosters_df: pd.DataFrame,
    playerBoxScores_df: pd.DataFrame,
    standings_df: pd.DataFrame,
    playerTwitterFollowers_df: pd.DataFrame,
    teamTwitterFollowers_df: pd.DataFrame,
    update_blocks: list,
    non_update_blocks: list,
    only_fit: bool
    ) -> pd.DataFrame:
    blocks = update_blocks + non_update_blocks

    for block in blocks:
        with timer(name=f"{str(block) + '_fit'}", logger=logger):
            try:
                block.fit(
                    input_df,
                    target_df,
                    rosters_df,
                    playerBoxScores_df,
                    standings_df,
                    playerTwitterFollowers_df,
                    teamTwitterFollowers_df,
                    only_fit
                    )
            except Exception as e:
                print(f"Error on {block} fit.")
                raise e from e


def create_test_feature(
    input_df: pd.DataFrame,
    target_df: pd.DataFrame,
    rosters_df: pd.DataFrame,
    playerBoxScores_df: pd.DataFrame,
    standings_df: pd.DataFrame,
    playerTwitterFollowers_df: pd.DataFrame,
    teamTwitterFollowers_df: pd.DataFrame,
    update_blocks: list,
    non_update_blocks: list
    ) -> pd.DataFrame:
    is_update = True if len(update_blocks) != 0 else False
    update_feat_df = pd.DataFrame()
    non_update_feat_df = pd.DataFrame()
    
    if is_update:
        for block in update_blocks:
            with timer(name=f"{str(block) + '_update'}", logger=logger):
                try:
                    out_feat_block = block.fit(
                        input_df,
                        target_df,
                        rosters_df,
                        playerBoxScores_df,
                        standings_df,
                        playerTwitterFollowers_df,
                        teamTwitterFollowers_df,
                        only_fit=False,
                        )
                except Exception as e:
                    print(f"Error on {block} transform.")
                    raise e from e

                assert len(out_feat_block) == len(input_df), block

            update_feat_df = pd.concat([update_feat_df, out_feat_block], axis=1)

            del out_feat_block
        

    for block in non_update_blocks:
        with timer(name=f"{str(block) + '_transform'}", logger=logger):
            try:
                out_feat_block = block.transform(
                    input_df,
                    target_df,
                    rosters_df,
                    playerBoxScores_df,
                    standings_df,
                    playerTwitterFollowers_df,
                    teamTwitterFollowers_df
                )
            except Exception as e:
                print(f"Error on {block} transform.")
                raise e from e

            assert len(out_feat_block) == len(input_df), block

        non_update_feat_df = pd.concat([non_update_feat_df, out_feat_block], axis=1)

        del out_feat_block

    feat_df = pd.concat([update_feat_df, non_update_feat_df], axis=1) if is_update else non_update_feat_df

    del update_feat_df, non_update_feat_df

    return feat_df

## CV

In [8]:
def get_timeseries_holdout(train_df: pd.DataFrame, valid_start_date: int) -> List[tuple]:
    fold = []
    train_idx = np.array(train_df[train_df["date"] < valid_start_date].index)
    valid_idx = np.array(train_df[(train_df["date"] >= valid_start_date) & (train_df["date"] < (valid_start_date + 100))].index)
    valid_preds_idx = np.array(train_df[(train_df["date"] >= valid_start_date) & (train_df["date"] < (valid_start_date + 100)) & (train_df["playerForTestSetAndFuturePreds"] == True)].index)
    fold.append((train_idx, valid_idx, valid_preds_idx))

    return fold

## Model

In [9]:
class LightGBMTrainer:
    def __init__(self, params: dict, seeds: List[int]):
        self.params = params
        self.seeds = seeds
        self.models = []

    def fit(self, X_train: np.ndarray, y_train: np.ndarray, cv: List[tuple]):
        oof_all = []
        for i, seed in enumerate(self.seeds):
            self.params["seed"] = seed

            for train_idx, valid_idx, valid_preds_idx in cv:
                X_train_fold = X_train[train_idx]
                X_valid_fold = X_train[valid_preds_idx]

                y_train_fold = y_train[train_idx]
                y_valid_fold = y_train[valid_preds_idx]

                train_set = lgb.Dataset(X_train_fold, y_train_fold)
                valid_set = lgb.Dataset(X_valid_fold, y_valid_fold, reference=train_set)

                model = lgb.train(
                    params=self.params,
                    train_set=train_set,
                    valid_sets=[train_set, valid_set],
                    valid_names=["train", "valid"],
                    verbose_eval=100,
                )

                y_oof = model.predict(X_valid_fold, num_iteration=model.best_iteration)
                oof_all.append(y_oof)
                self.models.append(model)

        oof_all = np.mean(oof_all, axis=0)
        oof_all = np.clip(oof_all, 0, 100)
        oof_score = mean_absolute_error(oof_all, y_valid_fold)

        return oof_score, self.models

    def predict(self, X_test: np.ndarray):
        y_pred = np.mean([model.predict(X_test, num_iteration=model.best_iteration) for model in self.models], axis=0)

        return y_pred


def run_lgb(X_train, targets, seeds, fold):
    cv_scores = []
    models = []

    for i, target in enumerate(targets):
        print(f"Training for Target{i+1}")
        if i == 0:
            params = CFG.LGB_TARGET1_PARAMS
        elif i == 1:
            params = CFG.LGB_TARGET2_PARAMS
        elif i == 2:
            params = CFG.LGB_TARGET3_PARAMS
        elif i == 3:
            params = CFG.LGB_TARGET4_PARAMS
        trainer = LightGBMTrainer(params, seeds)
        oof_score_tmp, models_tmp = trainer.fit(X_train, target, fold)
        cv_scores.append(oof_score_tmp)
        models.append(models_tmp)
        print(f"Local Target{i+1} OOF-MAE : {np.mean(oof_score_tmp)}")
        print("-"*50)

    print(f"Local MCMAE : {np.mean(cv_scores)}")

    return models

## Training

In [10]:
# # set-up logger
# logger = get_logger()

# # create feature
# update_blocks = [
#     PlayerStatsBlock(columns=[
#         "playerId", 
#         "date", 
#         "home", 
#         "positionCode", 
#         "battingOrder", 
#         "gamesPlayedBatting", 
#         "plateAppearances",
#         "atBats",
#         "hits",
#         "doubles",
#         "triples",
#         "homeRuns",
#         "rbi",
#         "runsScored",
#         "totalBases",
#         "baseOnBalls",
#         "hitByPitch",
#         "intentionalWalks",
#         "groundIntoDoublePlay",
#         "groundIntoTriplePlay",
#         "strikeOuts",
#         "sacBunts",
#         "sacFlies",
#         "caughtStealing",
#         "stolenBases",
#         "leftOnBase",
#         "catchersInterference",
#         "pickoffs",
#         "flyOuts",
#         "groundOuts",
#         "gamesPlayedPitching",
#         "gamesStartedPitching",
#         "winsPitching",
#         "lossesPitching",
#         "completeGamesPitching",
#         "shutoutsPitching",
#         "saves",
#         "saveOpportunities",
#         "blownSaves",
#         "holds",
#         "inningsPitched",
#         "runsPitching",
#         "earnedRuns",
#         "pitchesThrown",
#         "hitsPitching",
#         "homeRunsPitching",
#         "strikeOutsPitching",
#         "baseOnBallsPitching",
#         "hitByPitchPitching",
#         "intentionalWalksPitching",
#         "balks",
#         "wildPitches",
#         "atBatsPitching",
#         "battersFaced",
#         "sacBuntsPitching",
#         "sacFliesPitching",
#         "inheritedRunners",
#         "inheritedRunnersScored",
#         "rbiPitching",
#         "flyOutsPitching",
#         "airOutsPitching",
#         "doublesPitching",
#         "triplesPitching",
#         "caughtStealingPitching",
#         "stolenBasesPitching",
#         "outsPitching",
#         "balls",
#         "strikes",
#         "hitBatsmen",
#         "pickoffsPitching",
#         "catchersInterferencePitching",
#         "assists",
#         "putOuts",
#         "errors",
#         "chances",
#         "gamesFinishedPitching",
#         "groundOutsPitching",
#     ]),
#     PlayerTwitterFollowerAggregateBlock(),
#     TeamTwitterFollowerAggregateBlock(),
# ]
# non_update_blocks = [
#     TargetAggregateBlock(periods=[
#         # 20210201,
#         # 20210301,
#         20210401,
#         20210501,
#         20210601,
#         20210701,
#     ]),
#     RostersLabelEncodingBlock(columns=[
#         "teamId",
#         "status"
#     ]),
#     PlayersLabelEncodingBlock(columns=[
#         "birthCountry",
#         # "birthCity",
#         # "birthStateProvince",
#         # "heightInches",
#         # "weight",
#         "primaryPositionCode",
#         # "playerAge"
#     ]),
#     StandingsMetaBlock(columns=[
#         'date',
#         # 'season',
#         # 'gameDate',
#         'divisionId',
#         'teamId',
#         # 'teamName',
#         # 'streakCode',
#         'divisionRank',
#         'leagueRank',
#         'wildCardRank',
#         # 'leagueGamesBack',
#         # 'sportGamesBack',
#         # 'divisionGamesBack',
#         'wins',
#         'losses',
#         'pct',
#         'runsAllowed',
#         'runsScored',
#         # 'divisionChamp',
#         # 'divisionLeader',
#         # 'wildCardLeader',
#         # 'eliminationNumber',
#         # 'wildCardEliminationNumber',
#         'homeWins',
#         'homeLosses',
#         'awayWins',
#         'awayLosses',
#         'lastTenWins',
#         'lastTenLosses',
#         'extraInningWins',
#         'extraInningLosses',
#         'oneRunWins',
#         'oneRunLosses',
#         'dayWins',
#         'dayLosses',
#         'nightWins',
#         'nightLosses',
#         'grassWins',
#         'grassLosses',
#         'turfWins',
#         'turfLosses',
#         'divWins',
#         'divLosses',
#         'alWins',
#         'alLosses',
#         'nlWins',
#         'nlLosses',
#         'xWinLossPct'
#     ]),
#     Datetime2trigonometricEncodingBlock(),
# ]

# # create features
# input_df = mlb_train_ds.train_next[CFG.OBJECT_ID].copy()
# target_df = mlb_train_ds.train_next
# rosters_df = mlb_train_ds.train_rosters
# playerBoxScores_df = mlb_train_ds.train_scores
# standings_df = mlb_train_ds.train_standings
# playerTwitterFollowers_df = mlb_train_ds.train_ptf
# teamTwitterFollowers_df = mlb_train_ds.train_ttf

# # for inference
# fit_train_feature(
#     input_df,
#     target_df,
#     rosters_df,
#     playerBoxScores_df,
#     standings_df,
#     playerTwitterFollowers_df,
#     teamTwitterFollowers_df,
#     update_blocks,
#     non_update_blocks,
#     only_fit=True,
# )

In [11]:
# set-up logger
logger = get_logger()

# create feature
update_blocks = [
    PlayerStatsBlock(columns=[
        "playerId", 
        "date", 
        "home", 
        "positionCode", 
        "battingOrder", 
        "gamesPlayedBatting", 
        "plateAppearances",
        "atBats",
        "hits",
        "doubles",
        "triples",
        "homeRuns",
        "rbi",
        "runsScored",
        "totalBases",
        "baseOnBalls",
        "hitByPitch",
        "intentionalWalks",
        "groundIntoDoublePlay",
        "groundIntoTriplePlay",
        "strikeOuts",
        "sacBunts",
        "sacFlies",
        "caughtStealing",
        "stolenBases",
        "leftOnBase",
        "catchersInterference",
        "pickoffs",
        "flyOuts",
        "groundOuts",
        "gamesPlayedPitching",
        "gamesStartedPitching",
        "winsPitching",
        "lossesPitching",
        "completeGamesPitching",
        "shutoutsPitching",
        "saves",
        "saveOpportunities",
        "blownSaves",
        "holds",
        "inningsPitched",
        "runsPitching",
        "earnedRuns",
        "pitchesThrown",
        "hitsPitching",
        "homeRunsPitching",
        "strikeOutsPitching",
        "baseOnBallsPitching",
        "hitByPitchPitching",
        "intentionalWalksPitching",
        "balks",
        "wildPitches",
        "atBatsPitching",
        "battersFaced",
        "sacBuntsPitching",
        "sacFliesPitching",
        "inheritedRunners",
        "inheritedRunnersScored",
        "rbiPitching",
        "flyOutsPitching",
        "airOutsPitching",
        "doublesPitching",
        "triplesPitching",
        "caughtStealingPitching",
        "stolenBasesPitching",
        "outsPitching",
        "balls",
        "strikes",
        "hitBatsmen",
        "pickoffsPitching",
        "catchersInterferencePitching",
        "assists",
        "putOuts",
        "errors",
        "chances",
        "gamesFinishedPitching",
        "groundOutsPitching",
    ]),
    PlayerTwitterFollowerAggregateBlock(),
    TeamTwitterFollowerAggregateBlock(),
]
non_update_blocks = [
    TargetAggregateBlock(periods=[
        # 20210201,
        # 20210301,
        20210401,
        20210501,
        20210601,
        20210701,
    ]),
    RostersLabelEncodingBlock(columns=[
        "teamId",
        "status"
    ]),
    PlayersLabelEncodingBlock(columns=[
        "birthCountry",
        # "birthCity",
        # "birthStateProvince",
        # "heightInches",
        # "weight",
        "primaryPositionCode",
        # "playerAge"
    ]),
    StandingsMetaBlock(columns=[
        'date',
        # 'season',
        # 'gameDate',
        'divisionId',
        'teamId',
        # 'teamName',
        # 'streakCode',
        'divisionRank',
        'leagueRank',
        'wildCardRank',
        # 'leagueGamesBack',
        # 'sportGamesBack',
        # 'divisionGamesBack',
        'wins',
        'losses',
        'pct',
        'runsAllowed',
        'runsScored',
        # 'divisionChamp',
        # 'divisionLeader',
        # 'wildCardLeader',
        # 'eliminationNumber',
        # 'wildCardEliminationNumber',
        'homeWins',
        'homeLosses',
        'awayWins',
        'awayLosses',
        'lastTenWins',
        'lastTenLosses',
        'extraInningWins',
        'extraInningLosses',
        'oneRunWins',
        'oneRunLosses',
        'dayWins',
        'dayLosses',
        'nightWins',
        'nightLosses',
        'grassWins',
        'grassLosses',
        'turfWins',
        'turfLosses',
        'divWins',
        'divLosses',
        'alWins',
        'alLosses',
        'nlWins',
        'nlLosses',
        'xWinLossPct'
    ]),
    Datetime2trigonometricEncodingBlock(),
]

# create features
input_df = mlb_train_ds.train_next[CFG.OBJECT_ID].copy()

# input_df["_date"] = pd.to_datetime(input_df["date"], format="%Y%m%d")
# input_2018 = input_df[(input_df["_date"] >= "2018-03-29") & (input_df["_date"] <= "2018-10-28")]
# input_2019 = input_df[(input_df["_date"] >= "2019-03-20") & (input_df["_date"] <= "2019-10-30")]
# input_2020 = input_df[(input_df["_date"] >= "2020-07-23") & (input_df["_date"] <= "2020-10-28")]
# input_2021 = input_df[(input_df["_date"] >= "2021-02-28") & (input_df["_date"] <= "2021-10-31")]
# input_df = pd.concat([input_2018, input_2019, input_2020, input_2021], axis=0).drop(columns=["_date"], axis=1).reset_index(drop=True)

input_df = input_df.merge(mlb_train_ds.players[["playerId", "playerForTestSetAndFuturePreds"]], on="playerId", how="left")
target_df = mlb_train_ds.train_next
rosters_df = mlb_train_ds.train_rosters
playerBoxScores_df = mlb_train_ds.train_scores
standings_df = mlb_train_ds.train_standings
playerTwitterFollowers_df = mlb_train_ds.train_ptf
teamTwitterFollowers_df = mlb_train_ds.train_ttf

if DEBUG:
    input_df = input_df[input_df["date"] >= 20210401].reset_index(drop=True)
    target_df = target_df[target_df["date"] >= 20210401].reset_index(drop=True)
    rosters_df = rosters_df[rosters_df["date"] >= 20210401].reset_index(drop=True)
    playerBoxScores_df = playerBoxScores_df[playerBoxScores_df["date"] >= 20210401].reset_index(drop=True)
    standings_df = standings_df[standings_df["date"] >= 20210401].reset_index(drop=True)
    playerTwitterFollowers_df = playerTwitterFollowers_df[playerTwitterFollowers_df["date"] >= 20210401].reset_index(drop=True)
    teamTwitterFollowers_df = teamTwitterFollowers_df[teamTwitterFollowers_df["date"] >= 20210401].reset_index(drop=True)

train_feat_df = create_train_feature(
    input_df,
    target_df,
    rosters_df,
    playerBoxScores_df,
    standings_df,
    playerTwitterFollowers_df,
    teamTwitterFollowers_df,
    update_blocks,
    non_update_blocks,
    only_fit=False,
)

X_train = train_feat_df.values
targets = [target_df[col].values for col in CFG.TARGETS]

if DEBUG:
    targets = [target_df[col].values for col in CFG.TARGETS]

# split train/valid
fold = get_timeseries_holdout(input_df, valid_start_date=20210701)

# training
models = run_lgb(X_train, targets, CFG.SEEDS, fold)

[2021-08-01 02:52:56,034] [INFO] [logger set up]
[2021-08-01 02:52:56,734] [INFO] [<<__main__.PlayerStatsBlock object at 0x1d29499a0>_fit> start]


  0%|          | 0/39714 [00:00<?, ?it/s]

  0%|          | 0/222588 [00:00<?, ?it/s]

[2021-08-01 02:54:37,862] [INFO] [<<__main__.PlayerStatsBlock object at 0x1d29499a0>_fit> done in 101.13 s]
[2021-08-01 02:54:37,916] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_fit> start]


  0%|          | 0/222588 [00:00<?, ?it/s]

[2021-08-01 02:54:39,042] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_fit> done in 1.13 s]
[2021-08-01 02:54:39,091] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_fit> start]


  0%|          | 0/222588 [00:00<?, ?it/s]

[2021-08-01 02:54:41,179] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_fit> done in 2.09 s]
[2021-08-01 02:54:41,375] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_fit> start]


  x = np.asarray((x - loc)/scale, dtype=dtyp)
  x = np.asarray((x - loc)/scale, dtype=dtyp)
  x = np.asarray((x - loc)/scale, dtype=dtyp)
  x = np.asarray((x - loc)/scale, dtype=dtyp)


[2021-08-01 02:56:51,339] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_fit> done in 129.96 s]
[2021-08-01 02:56:51,581] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_fit> start]
[2021-08-01 02:56:51,679] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_fit> done in 0.10 s]
[2021-08-01 02:56:52,075] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_fit> start]
[2021-08-01 02:56:52,102] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_fit> done in 0.03 s]
[2021-08-01 02:56:52,344] [INFO] [<<__main__.StandingsMetaBlock object at 0x105db9e80>_fit> start]
[2021-08-01 02:56:52,564] [INFO] [<<__main__.StandingsMetaBlock object at 0x105db9e80>_fit> done in 0.22 s]
[2021-08-01 02:56:52,820] [INFO] [<<__main__.Datetime2trigonometricEncodingBlock object at 0x105db9490>_fit> start]
[2021-08-01 02:56:54,319] [INFO] [<<__main__.Datetime2trigonometricEncodingBlock object at 0x105db9490>_fit> done in 1.50 s



Training until validation scores don't improve for 100 rounds
[100]	train's l1: 0.61968	valid's l1: 0.912317
[200]	train's l1: 0.61878	valid's l1: 0.911206
[300]	train's l1: 0.617773	valid's l1: 0.909749
[400]	train's l1: 0.614284	valid's l1: 0.906536
[500]	train's l1: 0.614266	valid's l1: 0.90654
Early stopping, best iteration is:
[441]	train's l1: 0.61428	valid's l1: 0.906509
Training until validation scores don't improve for 100 rounds
[100]	train's l1: 0.61696	valid's l1: 0.912286
[200]	train's l1: 0.616683	valid's l1: 0.912094
[300]	train's l1: 0.616665	valid's l1: 0.912069
Early stopping, best iteration is:
[290]	train's l1: 0.616665	valid's l1: 0.912067
Local Target1 OOF-MAE : 0.9074583972649649
--------------------------------------------------
Training for Target2




Training until validation scores don't improve for 100 rounds
[100]	train's l1: 1.04274	valid's l1: 1.11742
[200]	train's l1: 0.986633	valid's l1: 1.09605
Early stopping, best iteration is:
[199]	train's l1: 0.987203	valid's l1: 1.09573
Training until validation scores don't improve for 100 rounds
[100]	train's l1: 1.04054	valid's l1: 1.13371
[200]	train's l1: 0.993205	valid's l1: 1.13543
Early stopping, best iteration is:
[165]	train's l1: 1.0033	valid's l1: 1.12726
Local Target2 OOF-MAE : 1.1006564912581913
--------------------------------------------------
Training for Target3




Training until validation scores don't improve for 100 rounds
[100]	train's l1: 0.500376	valid's l1: 0.693462
[200]	train's l1: 0.498128	valid's l1: 0.689088
[300]	train's l1: 0.492322	valid's l1: 0.690493
Early stopping, best iteration is:
[216]	train's l1: 0.495791	valid's l1: 0.686705
Training until validation scores don't improve for 100 rounds
[100]	train's l1: 0.500376	valid's l1: 0.693462
[200]	train's l1: 0.498128	valid's l1: 0.689088
[300]	train's l1: 0.492322	valid's l1: 0.690493
Early stopping, best iteration is:
[216]	train's l1: 0.495791	valid's l1: 0.686705
Local Target3 OOF-MAE : 0.6867017141576985
--------------------------------------------------
Training for Target4




Training until validation scores don't improve for 100 rounds
[100]	train's l1: 0.750591	valid's l1: 0.710646
Early stopping, best iteration is:
[7]	train's l1: 1.07315	valid's l1: 0.624701
Training until validation scores don't improve for 100 rounds
[100]	train's l1: 0.741231	valid's l1: 0.756988
Early stopping, best iteration is:
[9]	train's l1: 1.02977	valid's l1: 0.616279
Local Target4 OOF-MAE : 0.6160397902965397
--------------------------------------------------
Local MCMAE : 0.8277140982443486


In [12]:
def visualize_feature_importance(models, feat_train_df) -> plotly.graph_objects.Figure:
    '''LightGBMのfeature importanceを可視化
    '''
    for target_i in range(len(models)):
        feature_importance_df = pd.DataFrame()
        for i, model in enumerate(models[target_i]):
            _df = pd.DataFrame()
            _df['feature_importance'] = model.feature_importance(importance_type="gain")
            _df['feature'] = feat_train_df.columns
            _df['model_no'] = i + 1
            feature_importance_df = pd.concat([feature_importance_df, _df], 
                                            axis=0, ignore_index=True)

        order = feature_importance_df.groupby('feature')\
            .mean()[['feature_importance']]\
            .sort_values('feature_importance', ascending=False).index[:50]
        
        fig = px.box(
            feature_importance_df.query("feature in @order"),
            x="feature_importance",
            y="feature",
            category_orders={"feature": order},
            width=1250,
            height=900,
            title=f"Target{target_i+1} Top 50 feature importance",
        )
        fig.update_yaxes(showgrid=True)
        fig.show()

In [13]:
visualize_feature_importance(models, train_feat_df)

In [24]:
# for target, model_tmps in zip(CFG.TARGETS, models):
#     for i, model_tmp in enumerate(model_tmps):
#         model_tmp.save_model(f"{CFG.MODEL_PATH}/lgb_{target}_{i}.txt")

## Inference

In [15]:
def update_dataset(input_df: pd.DataFrame, sample_prediction_df: pd.DataFrame, eval_start_date: int):
    '''datasetの更新
    '''
    # columnがNaNでなければ分岐処理が実行される(pd.isnaとかでも代用可)
    # NaN == NaNはFalseになる
    ####################
    # rosters #
    ####################
    if input_df["rosters"].iloc[0] == input_df["rosters"].iloc[0]:
        test_rosters_df = pd.read_json(input_df["rosters"].iloc[0])
    else:
        test_rosters_df = sample_prediction_df[["playerId"]].copy()
        for col in mlb_train_ds.train_rosters.columns:
            if col == "playerId": continue
            test_rosters_df[col] = np.nan

    test_rosters_df["date"] = input_df.iloc[0]["date"]
    ####################
    # playerBoxScores #
    ####################
    if (input_df["playerBoxScores"].iloc[0] == input_df["playerBoxScores"].iloc[0]) & (input_df.iloc[0]["date"] >= eval_start_date):
        test_playerBoxScores_df = pd.read_json(input_df["playerBoxScores"].iloc[0])
        test_playerBoxScores_df["date"] = input_df.iloc[0]["date"]
    else:
        test_playerBoxScores_df = None
    # ####################
    # # teamBoxScores #
    # ####################
    # if input_df["teamBoxScores"].iloc[0] == input_df["teamBoxScores"].iloc[0]:
    #     test_teamBoxScores = pd.read_json(input_df["teamBoxScores"].iloc[0])
    # else:
    #     cols = mlb_train_ds.train_tbs.drop(columns="date", axis=1).columns.tolist()
    #     test_teamBoxScores = pd.DataFrame(columns=cols)
    #     test_teamBoxScores["teamId"] = mlb_train_ds.train_tbs.teamId.unique()

    # test_teamBoxScores["date"] = input_df.iloc[0]["date"]
    # if input_df.iloc[0]["date"] > 20210430:
    #     mlb_train_ds.train_tbs = reduce_mem_usage(pd.concat([mlb_train_ds.train_tbs, test_teamBoxScores], axis=0, ignore_index=True))
    # ####################
    # # games #
    # ####################
    # if input_df["games"].iloc[0] == input_df["games"].iloc[0]:
    #     test_games = pd.read_json(input_df["games"].iloc[0])
    # else:
    #     cols = mlb_train_ds.train_games.drop(columns="date", axis=1).columns.tolist()
    #     test_games = pd.DataFrame(columns=cols)

    # test_games["date"] = input_df.iloc[0]["date"]
    # if input_df.iloc[0]["date"] > 20210430:
    #     mlb_train_ds.train_games = reduce_mem_usage(pd.concat([mlb_train_ds.train_games, test_games], axis=0, ignore_index=True))
    ####################
    # staindings #
    ####################
    if input_df["standings"].iloc[0] == input_df["standings"].iloc[0]:
        test_standings_df = pd.read_json(input_df["standings"].iloc[0])
    else:
        cols = mlb_train_ds.train_standings.drop(columns="date", axis=1).columns.tolist()
        test_standings_df = pd.DataFrame(columns=cols)
    
    test_standings_df["date"] = input_df.iloc[0]["date"]
    # ####################
    # # awards #
    # ####################
    # if input_df["awards"].iloc[0] == input_df["awards"].iloc[0]:
    #     test_awards = pd.read_json(input_df["awards"].iloc[0])
    # else:
    #     cols = mlb_train_ds.train_awards.drop(columns="date", axis=1).columns.tolist()
    #     test_awards = pd.DataFrame(columns=cols)
    
    # test_awards["date"] = input_df.iloc[0]["date"]
    # if input_df.iloc[0]["date"] > 20210430:
    #     mlb_train_ds.train_awards = reduce_mem_usage(pd.concat([mlb_train_ds.train_awards, test_awards], axis=0, ignore_index=True))
    ####################
    # playerTwitterFollowers #
    ####################
    if (input_df["playerTwitterFollowers"].iloc[0] == input_df["playerTwitterFollowers"].iloc[0]) & (input_df.iloc[0]["date"] >= eval_start_date):
        test_playerTwitterFollowers_df = pd.read_json(input_df["playerTwitterFollowers"].iloc[0])
        test_playerTwitterFollowers_df["date"] = input_df.iloc[0]["date"]
    else:
        test_playerTwitterFollowers_df = None
    ####################
    # teamTwitterFollowers #
    ####################
    if (input_df["teamTwitterFollowers"].iloc[0] == input_df["teamTwitterFollowers"].iloc[0]) & (input_df.iloc[0]["date"] >= eval_start_date):
        test_teamTwitterFollowers_df = pd.read_json(input_df["teamTwitterFollowers"].iloc[0])
        test_teamTwitterFollowers_df["date"] = input_df.iloc[0]["date"]
    else:
        test_teamTwitterFollowers_df = None

    return test_rosters_df, test_playerBoxScores_df, test_standings_df, test_playerTwitterFollowers_df, test_teamTwitterFollowers_df

In [16]:
# models = []
# for target in CFG.TARGETS:
#     tmp = [lgb.Booster(model_file=f"{CFG.MODEL_PATH}/lgb_{target}_{i}.txt") for i in range(len(CFG.SEEDS))]
#     models.append(tmp)

In [17]:
# env = mlb.make_env()
# iter_test = env.iter_test()

# for (test_df, sample_prediction_df) in iter_test:
#     sample_prediction_df = sample_prediction_df.reset_index(drop=True)
#     # create dataset
#     test_df = test_df.reset_index()
#     test_df = test_df.rename(columns={"index": "date"})
#     sample_prediction_df["date"] = test_df.iloc[0]["date"]
#     sample_prediction_df["playerId"] = sample_prediction_df["date_playerId"].map(lambda x: int(x.split("_")[1]))
#     test_rosters_df, test_playerBoxScores_df, test_standings_df, test_playerTwitterFollowers_df, test_teamTwitterFollowers_df = update_dataset(test_df, sample_prediction_df, 20210718)
#     # create features
#     test_feat_df = create_test_feature(
#         sample_prediction_df,
#         target_df,
#         test_rosters_df,
#         test_playerBoxScores_df,
#         test_standings_df,
#         test_playerTwitterFollowers_df,
#         test_teamTwitterFollowers_df,
#         update_blocks, 
#         non_update_blocks)
#     X_test = test_feat_df.values
#     # prediction
#     for target, model in zip(CFG.TARGETS, models):
#         pred = np.mean([model_.predict(X_test, num_iteration=model_.best_iteration) for model_ in model], axis=0)
#         sample_prediction_df[target] = np.clip(pred, 0, 100)

#     sample_prediction_df = sample_prediction_df.drop(columns=["playerId", "date"], axis=1)

#     env.predict(sample_prediction_df)
#     del test_feat_df, test_df, sample_prediction_df

In [18]:
# Local inference check
# test_df = mlb_train_ds.example_test.query("date <= 20210426").copy()
# sample_prediction_df = mlb_train_ds.sample_submission.query("date <= 20210426").copy()
# sample_prediction_df["playerId"] = sample_prediction_df["date_playerId"].map(lambda x: int(x.split("_")[1]))

In [19]:
class Environment:
    def __init__(self,
                 data_dir: str,
                 eval_start_day: int,
                 eval_end_day: Optional[int],
                 use_updated: bool,
                 multiple_days_per_iter: bool):
        warnings.warn('this is mock module for mlb')

        postfix = '_updated' if use_updated else ''
        
        # recommend to replace this with pickle, feather etc to speedup preparing data
        df_train = pd.read_csv(os.path.join(data_dir, f'train{postfix}.csv'))

        players = pd.read_csv(os.path.join(data_dir, 'players.csv'))

        self.players = players[players['playerForTestSetAndFuturePreds'] == True]['playerId'].astype(str)
        if eval_end_day is not None:
            self.df_train = df_train.set_index('date').loc[eval_start_day:eval_end_day]
        else:
            self.df_train = df_train.set_index('date').loc[eval_start_day:]
        self.date = self.df_train.index.values
        self.n_rows = len(self.df_train)
        self.multiple_days_per_iter = multiple_days_per_iter

        assert self.n_rows > 0, 'no data to emulate'

        self.prediction_df = pd.DataFrame()

    def predict(self, df: pd.DataFrame) -> None:
        # if you want to emulate public LB, store your prediction here and calculate MAE
        self.prediction_df = pd.concat([self.prediction_df, df], axis=0).reset_index(drop=True)

    def iter_test(self) -> Tuple[pd.DataFrame, pd.DataFrame]:
        if self.multiple_days_per_iter:
            for i in range(self.n_rows // 2):
                date1 = self.date[2 * i]
                date2 = self.date[2 * i + 1]
                sample_sub1 = self._make_sample_sub(date1)
                sample_sub2 = self._make_sample_sub(date2)
                sample_sub = pd.concat([sample_sub1, sample_sub2]).reset_index(drop=True)
                df = self.df_train.loc[date1:date2]

                yield df, sample_sub.set_index('date')
        else:
            for i in range(self.n_rows):
                date = self.date[i]
                sample_sub = self._make_sample_sub(date)
                df = self.df_train.loc[date:date]

                yield df, sample_sub.set_index('date')

    def _make_sample_sub(self, date: int) -> pd.DataFrame:
        next_day = (pd.to_datetime(date, format='%Y%m%d') + pd.to_timedelta(1, 'd')).strftime('%Y%m%d')
        sample_sub = pd.DataFrame()
        sample_sub['date_playerId'] = next_day + '_' + self.players
        sample_sub['target1'] = 0
        sample_sub['target2'] = 0
        sample_sub['target3'] = 0
        sample_sub['target4'] = 0
        sample_sub['date'] = date
        return sample_sub


class MLBEmulator:
    def __init__(self,
                 data_dir: str = '../input/mlb-player-digital-engagement-forecasting',
                 eval_start_day: int = 20210401,
                 eval_end_day: Optional[int] = 20210430,
                 use_updated: bool = True,
                 multiple_days_per_iter: bool = False):
        self.data_dir = data_dir
        self.eval_start_day = eval_start_day
        self.eval_end_day = eval_end_day
        self.use_updated = use_updated
        self.multiple_days_per_iter = multiple_days_per_iter

    def make_env(self) -> Environment:
        return Environment(self.data_dir,
                           self.eval_start_day,
                           self.eval_end_day,
                           self.use_updated,
                           self.multiple_days_per_iter)


emulation_mode = True

if emulation_mode:
    mlb = MLBEmulator(eval_start_day=20210701, eval_end_day=20210731)
else:
    import mlb

env = mlb.make_env()
iter_test = env.iter_test()


for (test_df, sample_prediction_df) in iter_test:
    sample_prediction_df = sample_prediction_df.reset_index(drop=True)
    # create dataset
    test_df = test_df.reset_index()
    test_df = test_df.rename(columns={"index": "date"})
    sample_prediction_df["date"] = test_df.iloc[0]["date"]
    sample_prediction_df["playerId"] = sample_prediction_df["date_playerId"].map(lambda x: int(x.split("_")[1]))
    test_rosters_df, test_playerBoxScores_df, test_standings_df, test_playerTwitterFollowers_df, test_teamTwitterFollowers_df = update_dataset(test_df, sample_prediction_df, 20210718)
    # create features
    test_feat_df = create_test_feature(
        sample_prediction_df,
        target_df,
        test_rosters_df,
        test_playerBoxScores_df,
        test_standings_df,
        test_playerTwitterFollowers_df,
        test_teamTwitterFollowers_df,
        update_blocks, 
        non_update_blocks)
    X_test = test_feat_df.values
    # prediction
    for target, model in zip(CFG.TARGETS, models):
        pred = np.mean([model_.predict(X_test, num_iteration=model_.best_iteration) for model_ in model], axis=0)
        sample_prediction_df[target] = np.clip(pred, 0, 100)

    sample_prediction_df = sample_prediction_df.drop(columns=["playerId", "date"], axis=1)

    env.predict(sample_prediction_df)


this is mock module for mlb



[2021-08-01 03:00:00,062] [INFO] [<<__main__.PlayerStatsBlock object at 0x1d29499a0>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:01,039] [INFO] [<<__main__.PlayerStatsBlock object at 0x1d29499a0>_update> done in 0.98 s]
[2021-08-01 03:00:01,040] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:01,074] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> done in 0.03 s]
[2021-08-01 03:00:01,078] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:01,123] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> done in 0.04 s]
[2021-08-01 03:00:01,127] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> start]
[2021-08-01 03:00:01,147] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> done in 0.02 s]
[2021-08-01 03:00:01,152] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> start]
[2021-08-01 03:00:01,163] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> done in 0.01 s]
[2021-08-01 03:00:01,170] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> start]
[2021-08-01 03:00:01,175] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> done in 0.01 s]
[2021-08-01 03:00:01,183] [INFO] [<<__main__.StandingsMetaBlock object at 0x105db9e80>_transform> start]
[2021-08-01 03:00:01,196] [INFO] [<<__main__.StandingsMetaBlock object at 0x105d

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:02,101] [INFO] [<<__main__.PlayerStatsBlock object at 0x1d29499a0>_update> done in 0.76 s]
[2021-08-01 03:00:02,102] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:02,135] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> done in 0.03 s]
[2021-08-01 03:00:02,137] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:02,183] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> done in 0.05 s]
[2021-08-01 03:00:02,185] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> start]
[2021-08-01 03:00:02,202] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> done in 0.02 s]
[2021-08-01 03:00:02,207] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> start]
[2021-08-01 03:00:02,217] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> done in 0.01 s]
[2021-08-01 03:00:02,224] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> start]
[2021-08-01 03:00:02,230] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> done in 0.01 s]
[2021-08-01 03:00:02,237] [INFO] [<<__main__.StandingsMetaBlock object at 0x105db9e80>_transform> start]
[2021-08-01 03:00:02,251] [INFO] [<<__main__.StandingsMetaBlock object at 0x105d

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:03,146] [INFO] [<<__main__.PlayerStatsBlock object at 0x1d29499a0>_update> done in 0.75 s]
[2021-08-01 03:00:03,148] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:03,178] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> done in 0.03 s]
[2021-08-01 03:00:03,180] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:03,224] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> done in 0.04 s]
[2021-08-01 03:00:03,226] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> start]
[2021-08-01 03:00:03,242] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> done in 0.02 s]
[2021-08-01 03:00:03,248] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> start]
[2021-08-01 03:00:03,256] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> done in 0.01 s]
[2021-08-01 03:00:03,262] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> start]
[2021-08-01 03:00:03,267] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> done in 0.00 s]
[2021-08-01 03:00:03,273] [INFO] [<<__main__.StandingsMetaBlock object at 0x105db9e80>_transform> start]
[2021-08-01 03:00:03,287] [INFO] [<<__main__.StandingsMetaBlock object at 0x105d

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:04,222] [INFO] [<<__main__.PlayerStatsBlock object at 0x1d29499a0>_update> done in 0.79 s]
[2021-08-01 03:00:04,223] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:04,256] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> done in 0.03 s]
[2021-08-01 03:00:04,258] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:04,312] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> done in 0.05 s]
[2021-08-01 03:00:04,315] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> start]
[2021-08-01 03:00:04,332] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> done in 0.02 s]
[2021-08-01 03:00:04,337] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> start]
[2021-08-01 03:00:04,348] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> done in 0.01 s]
[2021-08-01 03:00:04,354] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> start]
[2021-08-01 03:00:04,359] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> done in 0.01 s]
[2021-08-01 03:00:04,366] [INFO] [<<__main__.StandingsMetaBlock object at 0x105db9e80>_transform> start]
[2021-08-01 03:00:04,380] [INFO] [<<__main__.StandingsMetaBlock object at 0x105d

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:05,493] [INFO] [<<__main__.PlayerStatsBlock object at 0x1d29499a0>_update> done in 0.98 s]
[2021-08-01 03:00:05,494] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:05,537] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> done in 0.04 s]
[2021-08-01 03:00:05,540] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:05,599] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> done in 0.06 s]
[2021-08-01 03:00:05,602] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> start]
[2021-08-01 03:00:05,627] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> done in 0.03 s]
[2021-08-01 03:00:05,633] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> start]
[2021-08-01 03:00:05,643] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> done in 0.01 s]
[2021-08-01 03:00:05,651] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> start]
[2021-08-01 03:00:05,655] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> done in 0.00 s]
[2021-08-01 03:00:05,661] [INFO] [<<__main__.StandingsMetaBlock object at 0x105db9e80>_transform> start]
[2021-08-01 03:00:05,673] [INFO] [<<__main__.StandingsMetaBlock object at 0x105d

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:06,629] [INFO] [<<__main__.PlayerStatsBlock object at 0x1d29499a0>_update> done in 0.81 s]
[2021-08-01 03:00:06,631] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:06,670] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> done in 0.04 s]
[2021-08-01 03:00:06,672] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:06,754] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> done in 0.08 s]
[2021-08-01 03:00:06,758] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> start]
[2021-08-01 03:00:06,783] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> done in 0.02 s]
[2021-08-01 03:00:06,788] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> start]
[2021-08-01 03:00:06,799] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> done in 0.01 s]
[2021-08-01 03:00:06,810] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> start]
[2021-08-01 03:00:06,817] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> done in 0.01 s]
[2021-08-01 03:00:06,825] [INFO] [<<__main__.StandingsMetaBlock object at 0x105db9e80>_transform> start]
[2021-08-01 03:00:06,843] [INFO] [<<__main__.StandingsMetaBlock object at 0x105d

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:07,787] [INFO] [<<__main__.PlayerStatsBlock object at 0x1d29499a0>_update> done in 0.76 s]
[2021-08-01 03:00:07,789] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:07,819] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> done in 0.03 s]
[2021-08-01 03:00:07,821] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:07,868] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> done in 0.05 s]
[2021-08-01 03:00:07,871] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> start]
[2021-08-01 03:00:07,894] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> done in 0.02 s]
[2021-08-01 03:00:07,899] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> start]
[2021-08-01 03:00:07,912] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> done in 0.01 s]
[2021-08-01 03:00:07,920] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> start]
[2021-08-01 03:00:07,925] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> done in 0.01 s]
[2021-08-01 03:00:07,932] [INFO] [<<__main__.StandingsMetaBlock object at 0x105db9e80>_transform> start]
[2021-08-01 03:00:07,947] [INFO] [<<__main__.StandingsMetaBlock object at 0x105d

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:08,844] [INFO] [<<__main__.PlayerStatsBlock object at 0x1d29499a0>_update> done in 0.76 s]
[2021-08-01 03:00:08,845] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:08,874] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> done in 0.03 s]
[2021-08-01 03:00:08,877] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:08,921] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> done in 0.04 s]
[2021-08-01 03:00:08,923] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> start]
[2021-08-01 03:00:08,940] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> done in 0.02 s]
[2021-08-01 03:00:08,948] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> start]
[2021-08-01 03:00:08,957] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> done in 0.01 s]
[2021-08-01 03:00:08,963] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> start]
[2021-08-01 03:00:08,968] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> done in 0.01 s]
[2021-08-01 03:00:08,975] [INFO] [<<__main__.StandingsMetaBlock object at 0x105db9e80>_transform> start]
[2021-08-01 03:00:08,988] [INFO] [<<__main__.StandingsMetaBlock object at 0x105d

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:09,861] [INFO] [<<__main__.PlayerStatsBlock object at 0x1d29499a0>_update> done in 0.74 s]
[2021-08-01 03:00:09,862] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:09,893] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> done in 0.03 s]
[2021-08-01 03:00:09,894] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:09,937] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> done in 0.04 s]
[2021-08-01 03:00:09,940] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> start]
[2021-08-01 03:00:09,957] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> done in 0.02 s]
[2021-08-01 03:00:09,962] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> start]
[2021-08-01 03:00:09,971] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> done in 0.01 s]
[2021-08-01 03:00:09,978] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> start]
[2021-08-01 03:00:09,983] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> done in 0.00 s]
[2021-08-01 03:00:09,989] [INFO] [<<__main__.StandingsMetaBlock object at 0x105db9e80>_transform> start]
[2021-08-01 03:00:10,001] [INFO] [<<__main__.StandingsMetaBlock object at 0x105d

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:10,892] [INFO] [<<__main__.PlayerStatsBlock object at 0x1d29499a0>_update> done in 0.76 s]
[2021-08-01 03:00:10,893] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:10,922] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> done in 0.03 s]
[2021-08-01 03:00:10,924] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:10,970] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> done in 0.05 s]
[2021-08-01 03:00:10,973] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> start]
[2021-08-01 03:00:10,993] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> done in 0.02 s]
[2021-08-01 03:00:10,999] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> start]
[2021-08-01 03:00:11,008] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> done in 0.01 s]
[2021-08-01 03:00:11,016] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> start]
[2021-08-01 03:00:11,020] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> done in 0.00 s]
[2021-08-01 03:00:11,026] [INFO] [<<__main__.StandingsMetaBlock object at 0x105db9e80>_transform> start]
[2021-08-01 03:00:11,040] [INFO] [<<__main__.StandingsMetaBlock object at 0x105d

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:11,966] [INFO] [<<__main__.PlayerStatsBlock object at 0x1d29499a0>_update> done in 0.77 s]
[2021-08-01 03:00:11,967] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:12,003] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> done in 0.04 s]
[2021-08-01 03:00:12,005] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:12,055] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> done in 0.05 s]
[2021-08-01 03:00:12,059] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> start]
[2021-08-01 03:00:12,082] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> done in 0.02 s]
[2021-08-01 03:00:12,088] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> start]
[2021-08-01 03:00:12,099] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> done in 0.01 s]
[2021-08-01 03:00:12,108] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> start]
[2021-08-01 03:00:12,113] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> done in 0.01 s]
[2021-08-01 03:00:12,119] [INFO] [<<__main__.StandingsMetaBlock object at 0x105db9e80>_transform> start]
[2021-08-01 03:00:12,133] [INFO] [<<__main__.StandingsMetaBlock object at 0x105d

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:13,121] [INFO] [<<__main__.PlayerStatsBlock object at 0x1d29499a0>_update> done in 0.83 s]
[2021-08-01 03:00:13,123] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:13,157] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> done in 0.03 s]
[2021-08-01 03:00:13,159] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:13,205] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> done in 0.05 s]
[2021-08-01 03:00:13,207] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> start]
[2021-08-01 03:00:13,226] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> done in 0.02 s]
[2021-08-01 03:00:13,240] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> start]
[2021-08-01 03:00:13,258] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> done in 0.02 s]
[2021-08-01 03:00:13,266] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> start]
[2021-08-01 03:00:13,273] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> done in 0.01 s]
[2021-08-01 03:00:13,283] [INFO] [<<__main__.StandingsMetaBlock object at 0x105db9e80>_transform> start]
[2021-08-01 03:00:13,299] [INFO] [<<__main__.StandingsMetaBlock object at 0x105d

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:14,196] [INFO] [<<__main__.PlayerStatsBlock object at 0x1d29499a0>_update> done in 0.75 s]
[2021-08-01 03:00:14,198] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:14,228] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> done in 0.03 s]
[2021-08-01 03:00:14,229] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:14,271] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> done in 0.04 s]
[2021-08-01 03:00:14,273] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> start]
[2021-08-01 03:00:14,290] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> done in 0.02 s]
[2021-08-01 03:00:14,296] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> start]
[2021-08-01 03:00:14,305] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> done in 0.01 s]
[2021-08-01 03:00:14,311] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> start]
[2021-08-01 03:00:14,316] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> done in 0.01 s]
[2021-08-01 03:00:14,323] [INFO] [<<__main__.StandingsMetaBlock object at 0x105db9e80>_transform> start]
[2021-08-01 03:00:14,336] [INFO] [<<__main__.StandingsMetaBlock object at 0x105d

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:15,280] [INFO] [<<__main__.PlayerStatsBlock object at 0x1d29499a0>_update> done in 0.81 s]
[2021-08-01 03:00:15,282] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:15,318] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> done in 0.04 s]
[2021-08-01 03:00:15,320] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:15,368] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> done in 0.05 s]
[2021-08-01 03:00:15,371] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> start]
[2021-08-01 03:00:15,394] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> done in 0.02 s]
[2021-08-01 03:00:15,400] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> start]
[2021-08-01 03:00:15,411] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> done in 0.01 s]
[2021-08-01 03:00:15,418] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> start]
[2021-08-01 03:00:15,424] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> done in 0.01 s]
[2021-08-01 03:00:15,432] [INFO] [<<__main__.StandingsMetaBlock object at 0x105db9e80>_transform> start]
[2021-08-01 03:00:15,450] [INFO] [<<__main__.StandingsMetaBlock object at 0x105d

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:16,498] [INFO] [<<__main__.PlayerStatsBlock object at 0x1d29499a0>_update> done in 0.90 s]
[2021-08-01 03:00:16,500] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:16,538] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> done in 0.04 s]
[2021-08-01 03:00:16,542] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:16,600] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> done in 0.06 s]
[2021-08-01 03:00:16,604] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> start]
[2021-08-01 03:00:16,623] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> done in 0.02 s]
[2021-08-01 03:00:16,629] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> start]
[2021-08-01 03:00:16,642] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> done in 0.01 s]
[2021-08-01 03:00:16,650] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> start]
[2021-08-01 03:00:16,655] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> done in 0.01 s]
[2021-08-01 03:00:16,662] [INFO] [<<__main__.StandingsMetaBlock object at 0x105db9e80>_transform> start]
[2021-08-01 03:00:16,679] [INFO] [<<__main__.StandingsMetaBlock object at 0x105d

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:17,726] [INFO] [<<__main__.PlayerStatsBlock object at 0x1d29499a0>_update> done in 0.90 s]
[2021-08-01 03:00:17,727] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:17,766] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> done in 0.04 s]
[2021-08-01 03:00:17,768] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:17,822] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> done in 0.05 s]
[2021-08-01 03:00:17,825] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> start]
[2021-08-01 03:00:17,853] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> done in 0.03 s]
[2021-08-01 03:00:17,859] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> start]
[2021-08-01 03:00:17,871] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> done in 0.01 s]
[2021-08-01 03:00:17,881] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> start]
[2021-08-01 03:00:17,887] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> done in 0.01 s]
[2021-08-01 03:00:17,894] [INFO] [<<__main__.StandingsMetaBlock object at 0x105db9e80>_transform> start]
[2021-08-01 03:00:17,911] [INFO] [<<__main__.StandingsMetaBlock object at 0x105d

  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:18,935] [INFO] [<<__main__.PlayerStatsBlock object at 0x1d29499a0>_update> done in 0.85 s]
[2021-08-01 03:00:18,937] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:18,970] [INFO] [<<__main__.PlayerTwitterFollowerAggregateBlock object at 0x1d2949550>_update> done in 0.03 s]
[2021-08-01 03:00:18,972] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> start]


  0%|          | 0/1187 [00:00<?, ?it/s]

[2021-08-01 03:00:19,024] [INFO] [<<__main__.TeamTwitterFollowerAggregateBlock object at 0x1d29495b0>_update> done in 0.05 s]
[2021-08-01 03:00:19,027] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> start]
[2021-08-01 03:00:19,049] [INFO] [<<__main__.TargetAggregateBlock object at 0x105d41610>_transform> done in 0.02 s]
[2021-08-01 03:00:19,057] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> start]
[2021-08-01 03:00:19,077] [INFO] [<<__main__.RostersLabelEncodingBlock object at 0x105db9ca0>_transform> done in 0.02 s]
[2021-08-01 03:00:19,091] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> start]
[2021-08-01 03:00:19,099] [INFO] [<<__main__.PlayersLabelEncodingBlock object at 0x105db9040>_transform> done in 0.01 s]
[2021-08-01 03:00:19,109] [INFO] [<<__main__.StandingsMetaBlock object at 0x105db9e80>_transform> start]
[2021-08-01 03:00:19,128] [INFO] [<<__main__.StandingsMetaBlock object at 0x105d

In [20]:
prediction_df = env.prediction_df.copy()
prediction_df.columns = ["date_playerId", "target1_pred", "target2_pred", "target3_pred", "target4_pred"]
prediction_df["playerId"] = prediction_df["date_playerId"].map(lambda x: int(x.split("_")[1]))
prediction_df["engagementMetricsDate"] = prediction_df["date_playerId"].map(lambda x: x.split("_")[0])
prediction_df["engagementMetricsDate"] = prediction_df["engagementMetricsDate"].map(lambda x: x[:4] + "-" + x[4:6] + "-" + x[6:])
prediction_df = prediction_df.merge(mlb_train_ds.train_next.drop(columns="date"), on=["playerId", "engagementMetricsDate"], how="left")
prediction_df

Unnamed: 0,date_playerId,target1_pred,target2_pred,target3_pred,target4_pred,playerId,engagementMetricsDate,target1,target2,target3,target4
0,20210702_593590,0.010412,1.461297,1.082860e-03,0.413605,593590,2021-07-02,0.003043,0.721672,0.000431,0.052601
1,20210702_661269,0.052562,0.828938,2.015518e-02,0.459270,661269,2021-07-02,0.064273,0.527790,0.004307,0.086636
2,20210702_669212,0.013463,0.643550,7.451958e-03,0.345183,669212,2021-07-02,0.001331,1.863421,0.000431,0.108295
3,20210702_666201,0.020784,0.739822,9.754789e-02,0.528273,666201,2021-07-02,1.377115,30.377531,1.855146,7.057768
4,20210702_680911,0.005341,0.730530,7.117690e-03,0.365521,680911,2021-07-02,0.007796,0.250431,0.320031,0.068071
...,...,...,...,...,...,...,...,...,...,...,...
20174,20210718_667674,0.000011,0.094655,3.605929e-13,0.174543,667674,2021-07-18,0.000000,0.007180,0.000000,0.098678
20175,20210718_672695,0.000188,0.136616,7.987533e-02,0.193293,672695,2021-07-18,0.000570,0.020641,0.095770,0.026314
20176,20210718_676103,0.000002,0.047392,2.108936e-03,0.473413,676103,2021-07-18,0.000000,0.045769,0.006197,0.723637
20177,20210718_676755,0.000007,0.177807,3.605929e-13,0.195945,676755,2021-07-18,0.000000,0.058334,0.000000,0.138149


In [21]:
score = []
for i in CFG.TARGETS:
    pred = prediction_df[f"{i}_pred"]
    true = prediction_df[i]
    mae = mean_absolute_error(pred, true)
    print(f"{i} mae : {mae}")
    score.append(mae)

print(f"MCMAE: {np.mean(score)}")

target1 mae : 0.9074238057928222
target2 mae : 1.0942760344796019
target3 mae : 0.6864901912141205
target4 mae : 0.616698731960827
MCMAE: 0.8262221908618429


In [22]:
fig = px.histogram(prediction_df["target2_pred"], marginal="box")
fig.show()

In [23]:
fig = px.histogram(prediction_df["target2"], marginal="box")
fig.show()

### objective, train_start, eval_start, num_seed
### mae, 20210401, 20210701, 1
- target1 mae : 0.9128717986374614
- target2 mae : 1.1283504320327473
- target3 mae : 0.6900468012955854
- target4 mae : 0.6177521508948599
- MCMAE: `0.8372552957151634`
### mae, 20210401, 20210601, 1
- target1 mae : 1.0212936236876593
- target2 mae : 1.8342641262424106
- target3 mae : 0.8054767366631018
- target4 mae : 1.5092283617882403
- MCMAE: `1.292565712095353`
### mae, 20210401, 20210501, 1
- target1 mae : 1.139073636820008
- target2 mae : 2.1477705069460926
- target3 mae : 0.8713704105493966
- target4 mae : 1.2844785162978885
- MCMAE: `1.3606732676533464`
### mae, 20180101, 20210701, 1
- target1 mae : 0.9165710510887174
- target2 mae : 1.3264290864592856
- target3 mae : 0.69577650290206
- target4 mae : 0.6191253027322183
- MCMAE: `0.8894754857955703`
### mae, 20180101, 20210601, 1
- target1 mae : 1.0250149819496686
- target2 mae : 1.9375601324494858
- target3 mae : 0.8143215983036728
- target4 mae : 1.449480718437135
- MCMAE: 1.3065943577849906
### mae, 20180101, 20210601, 1, nb020_param
- target1 mae : 1.0234830566971027
- target2 mae : 1.9701266539794722
- target3 mae : 0.8055250040295286
- target4 mae : 1.4151376638098436
- MCMAE: `1.3035680946289867`
### mae, 20180101, 20210501, 1
- target1 mae : 1.106424604048002
- target2 mae : 2.171465680802542
- target3 mae : 0.8700259725675753
- target4 mae : 1.2259871158434414
- MCMAE: `1.3434758433153902`
### mae, 20180101, 20210501, 1, nb020_param
- target1 mae : 1.1084402703120875
- target2 mae : 2.1638784575658745
- target3 mae : 0.869082230167717
- target4 mae : 1.2237380608246546
- MCMAE: `1.3412847547175835`
### mae, 2018010(2020drop), 20210501, 1, nb020_param
- target1 mae : 1.3166670039795423
- target2 mae : 2.3847494915660588
- target3 mae : 0.8845128956142885
- target4 mae : 1.5355907042575094
- MCMAE: `1.5303800238543497`
### mae, 2018010(on season), 20210501, 1, nb020_param
- target1 mae : 1.3195009664745603
- target2 mae : 2.7765172545669965
- target3 mae : 0.8881481867642985
- target4 mae : 1.540486776366689
- MCMAE: `1.631163296043136`
### mae, 20210301, 20210701, 1
- target1 mae : 0.914735882257999
- target2 mae : 1.1552626168001527
- target3 mae : 0.6920478597452296
- target4 mae : 0.6410733556711191
- MCMAE: `0.8507799286186252`
### mae, 20210301, 20210601, 1
- target1 mae : 1.0232905958812242
- target2 mae : 1.8845605869948752
- target3 mae : 0.8079941037181132
- target4 mae : 1.470936372521796
- MCMAE: `1.296695414779002`
### mae, 20210301, 20210501, 1
- target1 mae : 1.1302500380483467
- target2 mae : 2.164689861939187
- target3 mae : 0.8734350917041106
- target4 mae : 1.2789805879856968
- MCMAE: `1.3618388949193352`