In [1]:
import pandas as pd
import numpy as np
from features import create, h2h_acessor

In [None]:
df = pd.read_csv(r'training_testing/dados/dados_eventos.csv')
df['date'] = pd.to_datetime(df['date'])
df.drop_duplicates()
df.sort_values(by='date')
df.info()

In [3]:
df = df.rename(columns= {
    'jogador_casa': 'home_player',
    'jogador_fora': 'away_player',
    'time_casa': 'home_team',
    'time_fora': 'away_team',
    'gols_casa': 'home_score',
    'gols_fora': 'away_score',
    'gols_totais': 'total_score',})

In [4]:
train_size = int(0.7 * len(df))
test_size = int(0.2 * len(df))
train_df = df.iloc[:train_size]
test_df = df.iloc[train_size:train_size + test_size]
val_df = df.iloc[train_size + test_size:]

In [5]:
train_featured = create.features(
    data=train_df,live=False
    ).dropna()

test_featured = create.features(
    data=test_df, live=False, 
    lookback_data=train_df
    ).dropna()

val_featured = create.features(
    data=val_df, live=False, 
    lookback_data=pd.concat(
        [train_df, test_df])
        ).dropna()

In [None]:
val_featured.tail(5)

In [None]:
val_df.tail(5)

In [8]:
from features.required import REQUIRED_FEATURES

train_X = train_featured[REQUIRED_FEATURES]
train_y = train_featured['total_score']

test_X = test_featured[REQUIRED_FEATURES]
test_y = test_featured['total_score']

val_X = val_featured[REQUIRED_FEATURES]
val_y = val_featured['total_score']

In [None]:
from model.config import grid_search
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from model import calculate

grid_search.fit(train_X, train_y)
best_model = grid_search.best_estimator_
best_params = grid_search.best_params_

final_model = XGBRegressor(
    **best_params,
    early_stopping_rounds=50,
    objective='count:poisson',
    eval_metric='poisson-nlogik'
    )

# Avaliação
test_pred = final_model.predict(test_X)
val_pred = final_model.predict(val_X)

print(60 * '-')
print('Teste:')
print(f"MAE: {mean_absolute_error(test_y, test_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(test_y, test_pred)):.4f}")
print(f"Log Loss: {calculate.poisson_log_loss(test_y, test_pred):.4f}")

print(60 * '-')
print('Validação:')
print(f"MAE: {mean_absolute_error(val_y, val_pred):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(val_y, val_pred)):.4f}")
print(f"Log Loss: {calculate.poisson_log_loss(val_y, val_pred):.4f}")