In [None]:
import pandas as pd
import numpy as np
import penaltyblog as pb
from types import SimpleNamespace
from tqdm import tqdm

import plotly.express as px
import plotly.graph_objects as go

In [None]:
import sys
sys.path.append('../src')
from dixon_coles import create_team_ratings, simulate_league, process_simulation_results

In [None]:
results = pd.read_csv('../data/results_kevat25.csv')
results

In [None]:
fixtures = pd.read_csv('../data/fixtures_kevat25.csv')
fixtures

In [None]:
table = pd.read_csv('../data/table_syksy1_25.csv')
table

# Modelling

Re-sample results for each team and fit Dixon-Coles. Retain attacking and defensive coefficients.

Using sub-sampling without replacement. Removing ratings for teams that end up getting too few matches in the sub-sample.

In [None]:
nr_samples_per_season = 100
nr_season_samples = 10
all_samples = []
for i in range(nr_season_samples):
    sample_array = np.random.choice(results.index, size=nr_samples_per_season, replace=False)
    all_samples.append(sample_array)

In [None]:
# Create a Dixon-Coles model for each sample of the season

min_games_to_include_in_sample = 6
all_ratings = []
all_sample_results = []
args = SimpleNamespace(save_simulation_results=False, league=f'Kevät_2025',)
for ix in tqdm(range(len(all_samples)), desc='Fitting models and creating ratings'):
    
    sample_array = all_samples[ix]
    sample_results = results.loc[sample_array]
    all_sample_results.append(sample_results)

    clf = pb.models.DixonColesGoalModel(
        sample_results["goals_home"], 
        sample_results["goals_away"], 
        sample_results["team_home"], 
        sample_results["team_away"],
    )
    clf.fit()

    teams = results['team_home'].unique()    
    ratings_df = create_team_ratings(clf, teams, args, export_ratings=False)
    
    # filter out teams that did not play enough games in the sample
    home_counts = sample_results.team_home.value_counts()
    away_counts = sample_results.team_away.value_counts()
    team_counts = home_counts.add(away_counts, fill_value=0)
    team_counts_df = pd.DataFrame(team_counts).reset_index().rename(columns={'index': 'team', 'count': 'games_played'})
    ratings_df = pd.merge(ratings_df, team_counts_df, how='left', on='team')
    ratings_df = ratings_df[ratings_df.games_played >= min_games_to_include_in_sample]
    ratings_df['sample_index'] = ix

    all_ratings.append(ratings_df)

all_ratings = pd.concat(all_ratings, axis=0).reset_index(drop=True)
display(all_ratings)

In [None]:
all_ratings['color'] = ['red' if team == 'Trikiinit' else 'blue' for team in all_ratings['team']]

In [None]:
px.scatter(all_ratings, x='attack_rating', y='defense_rating', color='color')

In [None]:
all_ratings.sort_values(by=['attack_rating'], ascending=False)

In [None]:
all_ratings[all_ratings['team'] == 'Trikiinit']

In [None]:
all_ratings_grouped = all_ratings.groupby('team').mean()[['attack_rating', 'defense_rating']]
all_ratings_grouped['goal_difference_rating'] = all_ratings_grouped['attack_rating'] - all_ratings_grouped['defense_rating']
all_ratings_grouped = all_ratings_grouped.sort_values(by='goal_difference_rating', ascending=False)
all_ratings_grouped.reset_index(inplace=True)
all_ratings_grouped

In [None]:
# Extract values
attack_values = [value for key, value in clf.get_params().items() if key.startswith('attack')]
defence_values = [value for key, value in clf.get_params().items() if key.startswith('defence')]

# Create a combined figure manually
fig = go.Figure()

# Add attack histogram
fig.add_trace(go.Histogram(
    x=attack_values,
    name='Attack',
    opacity=0.6,
    marker_color='blue'
))

# Add defence histogram
fig.add_trace(go.Histogram(
    x=defence_values,
    name='Defence',
    opacity=0.6,
    marker_color='red'
))

# Update layout
fig.update_layout(
    barmode='overlay',  # or 'group' if you want them side by side
    title='Attack vs Defence Parameter Distributions',
    xaxis_title='Value',
    yaxis_title='Count',
    legend_title='Parameter Type'
)

fig.show()


# Use ratings from individual groups

In [None]:
data = pd.read_csv('../output/ratings_Kevät.csv')
data