In [None]:
import pandas as pd

from Models.Pairs.Feature_engineering.dependences import DependenciesPlots
from Models.Pairs.Feature_engineering.model_explainer import ExplainModel
from Models.Pairs.Hyperparameters.optuna_hyperparameters import OptunaXgboost
from Models.Pairs.pairs_xgboost_model import TrainXgBoost
from Models.variables import PAIRS_DATASET


def create_model(data, hyperparameters=False):
    created_model = TrainXgBoost(data, test_range=[2024], hyperparameters=hyperparameters, prints=True, plots=False, details=False)
    created_model.create_classification_model()
    created_model.evaluate_classification()

    return created_model

def run_optuna(data, number_of_runs=5):
    results = list()
    for i in range(number_of_runs):
        hyper = OptunaXgboost(data, prints=False, accuracy_threshold=0.709)

        results.append([i + 1, hyper, hyper.pairs, hyper.accuracy, hyper.accuracy_with_bonuses, hyper.optuna_results])
        print(results[-1][:-1])

    df = pd.DataFrame(data=results, columns=['Run', 'Model', 'Pairs_accuracy', 'Accuracy', 'Accuracy_with_bonuses', 'Optuna_results'])
    df = df.sort_values(by=['Accuracy_with_bonuses'], ascending=False).reset_index(drop=True)

    df = df.loc[0,:]
    print(df['Accuracy_with_bonuses'])

    return df


def amended_dataset_test(data, tested_features, optuna_runs):
    create_model(data)
    tuned_model = run_optuna(data, optuna_runs)
    dependence_plots = DependenciesPlots(trained_model=tuned_model['Model'], features_to_plot=tested_features)

    for dp in dependence_plots.dependencies_plots:
        dp.show()

    # model_explainer = ExplainModel(tuned_model['Model'])
    # model_explainer.importances_heatmap.show()
    # model_explainer.permutation_importance.show()

In [None]:
# Creating base model for benchmark
dataset = pd.read_parquet(f"../../../../Dataset/Datasets/{PAIRS_DATASET}")
amended_dataset_test(dataset, [], 3)

In [None]:
dataset['Diff_overall_heats_no'] = dataset['Rider_overall_heats_no'] - dataset['Opponent_overall_heats_no']
dataset['Diff_comp_avg'] = dataset['Rider_comp_avg'] - dataset['Opponent_comp_avg']
dataset['Diff_season_avg'] = dataset['Rider_season_avg'] - dataset['Opponent_season_avg']
dataset['Diff_season_sum'] = dataset['Rider_season_sum'] - dataset['Opponent_season_sum']
dataset['Diff_gate_avg_year'] = dataset['Rider_gate_avg_year'] - dataset['Opponent_gate_avg_year']
dataset['Diff_home_away_track_avg_year'] = dataset['Rider_home_away_track_avg_year'] - dataset['Opponent_home_away_track_avg_year']

features = [
    'Diff_overall_heats_no',
    'Rider_overall_heats_no', 
    'Opponent_overall_heats_no',
    'Diff_comp_avg',
    'Rider_comp_avg',
    'Opponent_comp_avg',
    'Diff_season_avg',
    'Rider_season_avg',
    'Opponent_season_avg',
    'Diff_season_sum',
    'Rider_season_sum',
    'Opponent_season_sum',
    'Diff_gate_avg_year',
    'Rider_gate_avg_year',
    'Opponent_gate_avg_year',
    'Diff_home_away_track_avg_year',
    'Rider_home_away_track_avg_year',
    'Opponent_home_away_track_avg_year'
]

amended_dataset_test(dataset, features, 5)

In [None]:
# dataset.drop(['Diff_gate_avg_year', 'Rider_gate_avg_year', 'Opponent_gate_avg_year',
#               'Rider_home_away_track_avg_year', 'Opponent_home_away_track_avg_year',
#               'Rider_overall_heats_no', 'Opponent_overall_heats_no'], axis=1, inplace=True)

features = [
    'Diff_season_avg',
    'Rider_season_avg',
    'Opponent_season_avg',

    'Diff_season_sum',
    'Rider_season_sum',
    'Opponent_season_sum',

    'Diff_comp_avg',
    'Rider_comp_avg',
    'Opponent_comp_avg',
    
    'Diff_overall_heats_no',
    'Diff_home_away_track_avg_year',
]

amended_dataset_test(dataset, features, 10)

In [None]:
# dataset.drop(['Diff_gate_avg_year', 'Rider_gate_avg_year', 'Opponent_gate_avg_year',
#               'Rider_home_away_track_avg_year', 'Opponent_home_away_track_avg_year',
#               'Rider_overall_heats_no', 'Opponent_overall_heats_no'], axis=1, inplace=True)

features = [
    'Diff_season_avg',
    'Rider_season_avg',
    'Opponent_season_avg',

    'Diff_season_sum',
    'Rider_season_sum',
    'Opponent_season_sum',

    'Diff_comp_avg',
    'Rider_comp_avg',
    'Opponent_comp_avg',

    'Diff_overall_heats_no',
    'Diff_home_away_track_avg_year',
]

amended_dataset_test(dataset, features, 10)