# Importing Requirements

In [71]:
import pandas as pd
import numpy as np

# Declaring Helper Functions

In [72]:
def adjusted_probs(home_odds, draw_odds, away_odds):

    total_probs = sum([(1/odds) * 100 for odds in [home_odds, draw_odds, away_odds]])

    return tuple(((1/odds) * 100 )/total_probs for odds in [home_odds, draw_odds, away_odds])

In [73]:
adjusted_probs(1.67, 4, 5)[0]

0.5709391949757351

In [74]:
def theil_score(adj_home, adj_draw, adj_away):

    return sum([i * np.log(1/i) for i in [adj_home, adj_draw, adj_away]])

In [75]:
theil_score(0.368604, 0.255565, 0.375831)

1.0843346815152448

# Loading Data

In [76]:
fixtures_gameweek_15 = (

    pd.read_csv('../data/N1-2.csv')
    .filter(['Date', 'HomeTeam', 'AwayTeam', 'B365H', 'B365D', 'B365A'])
    .rename(columns=str.lower)
    .rename(columns = {
        'b365h': 'odds_home', 
        'b365d': 'odds_draw', 
        'b365a': 'odds_away'
    })
    .assign(
        date=lambda x: pd.to_datetime(x['date'], format='%d/%m/%Y'),
        adj_home=lambda x: x.apply(lambda row: adjusted_probs(row['odds_home'], row['odds_draw'], row['odds_away'])[0], axis=1),
        adj_draw=lambda x: x.apply(lambda row: adjusted_probs(row['odds_home'], row['odds_draw'], row['odds_away'])[1], axis=1),
        adj_away=lambda x: x.apply(lambda row: adjusted_probs(row['odds_home'], row['odds_draw'], row['odds_away'])[2], axis=1),
        abs_diff_adj_odds=lambda x: abs(x.adj_home - x.adj_away),
        theil_score = lambda x: x.apply(lambda row: theil_score(row['adj_home'], row['adj_draw'], row['adj_away']), axis=1),
    )
    .query('date >= "2023-12-01"') # selecting the latest round of games
)

In [77]:
standings_week_14 = pd.DataFrame(
    {
        'ranking':[i+1 for i in range(18)],
        'team': ["PSV Eindhoven", "Feyenoord", "AZ Alkmaar", "Twente", 
                 "Go Ahead Eagles", "Ajax", "Sparta Rotterdam", "Heerenveen", 
                 "Zwolle", "Excelsior", "For Sittard", "Nijmegen", 
                 "Heracles", "Utrecht", "Waalwijk", "Almere City", "Volendam", 
                 "Vitesse"],
        'points': [42, 32, 30, 30, 22, 21, 21, 19, 17, 16, 16, 15, 15, 13, 13, 13, 8, 8]
    }
)

In [83]:
enriched_fixtures_gameweek_15 = (
    fixtures_gameweek_15
    .merge(standings_week_14, left_on='hometeam', right_on='team', how='left')
    .rename(columns={
        'ranking': 'hometeam_ranking', 
        'team': 'hometeam_standings',
        'points': 'hometeam_points' 
    })
    .merge(standings_week_14, left_on='awayteam', right_on='team', how='left')
    .rename(columns={
        'ranking': 'awayteam_ranking', 
        'team': 'awayteam_standings',
        'points': 'awayteam_points'
    })
    .drop(columns = ['hometeam_standings', 'awayteam_standings'])
    .assign(rankingdiff = lambda df: abs(df.hometeam_ranking - df.awayteam_ranking))
)

In [162]:
standings_week_14

Unnamed: 0,ranking,team,points
0,1,PSV Eindhoven,42
1,2,Feyenoord,32
2,3,AZ Alkmaar,30
3,4,Twente,30
4,5,Go Ahead Eagles,22
5,6,Ajax,21
6,7,Sparta Rotterdam,21
7,8,Heerenveen,19
8,9,Zwolle,17
9,10,Excelsior,16


In [163]:
(
    enriched_fixtures_gameweek_15
    .assign(
        average_rank = lambda df: (df.hometeam_ranking + df.awayteam_ranking) / 2,
        composite_score = lambda df: (df.abs_diff_adj_odds  + df.average_rank)
    )
    .sort_values('composite_score', ascending=True)
    # .sort_values('abs_diff_adj_odds', ascending=True)
    # .reset_index(drop=True)
)

Unnamed: 0,date,hometeam,awayteam,odds_home,odds_draw,odds_away,adj_home,adj_draw,adj_away,abs_diff_adj_odds,theil_score,hometeam_ranking,hometeam_points,awayteam_ranking,awayteam_points,rankingdiff,average_rank,composite_score
5,2023-12-03,Feyenoord,PSV Eindhoven,2.2,3.6,3.0,0.42654,0.260664,0.312796,0.113744,1.077434,2,32,1,42,1,1.5,1.613744
6,2023-12-03,Go Ahead Eagles,Twente,3.8,4.0,1.85,0.249747,0.23726,0.512994,0.263247,1.030216,5,22,4,30,1,4.5,4.763247
8,2023-12-03,Utrecht,AZ Alkmaar,4.0,3.75,1.83,0.235158,0.250835,0.514007,0.278849,1.029367,14,13,3,30,11,8.5,8.778849
7,2023-12-03,Nijmegen,Ajax,4.33,4.33,1.7,0.219922,0.219922,0.560155,0.340233,0.990769,12,15,6,21,6,9.0,9.340233
4,2023-12-02,Heracles,Sparta Rotterdam,2.7,3.6,2.45,0.350626,0.26297,0.386404,0.035778,1.086142,13,15,7,21,6,10.0,10.035778
0,2023-12-01,Heerenveen,Almere City,1.67,4.0,5.0,0.570939,0.238367,0.190694,0.380246,0.977797,8,19,16,13,8,12.0,12.380246
3,2023-12-02,Waalwijk,Excelsior,2.05,4.0,3.25,0.466577,0.239121,0.294302,0.172275,1.057793,15,13,10,16,5,12.5,12.672275
1,2023-12-02,Volendam,Zwolle,2.6,3.75,2.55,0.368604,0.255565,0.375831,0.007228,1.084335,17,8,9,17,8,13.0,13.007228
2,2023-12-02,For Sittard,Vitesse,2.0,3.6,3.6,0.473684,0.263158,0.263158,0.210526,1.056576,11,16,18,8,7,14.5,14.710526


In [164]:
# Using average ranking of teams works ok but there is no hard cut-off for what is an interesting game
# Next step could be to use ELO-ratings, which are more sticky (high k factor)

# What we actually need is a cut-off value. Maybe use a ratio? If below 1 then it's an interesting game?

# http://clubelo.com/NED

In [None]:
# Maybe what we need is something more recent?
# We want Ron Jans's new Utrecht to also be highlighted
# Ajax's resurgance should also show