In [11]:
import pandas as pd
import numpy as np
from scipy.stats import poisson 
from scipy.optimize import curve_fit
import matplotlib.pyplot as plt
import time
import warnings
warnings.filterwarnings("ignore")

In [12]:
dict_matches_left = pd.read_csv("Remaining.csv")
matches_left = {'HomeTeam': [],
                'AwayTeam': []}
for i in range(len(dict_matches_left.axes[0])):
    matches_left['HomeTeam'].append(dict_matches_left['HomeTeam'][i])
    matches_left['AwayTeam'].append(dict_matches_left['AwayTeam'][i])

remaining = pd.DataFrame(matches_left)


In [13]:
leagues_short = ['la_liga']
dict_historical_data = {}

for league in leagues_short:
    frames = []
    df = pd.read_csv("SP"+str(1)+".csv")
    df = df[['Date', 'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG']]
    df = df.rename(columns={'FTHG': 'HomeGoals', 'FTAG': 'AwayGoals'})
    df = df.assign(Season=i)
    frames.append(df)
    df_historical_data = pd.concat(frames)
    df_historical_data['Date'] = pd.to_datetime(df_historical_data['Date'])
    dict_historical_data[league] = df_historical_data

for i in range(2, 5):
    frames = []
    df = pd.read_csv("SP"+str(i)+".csv")
    df = df[['Date', 'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG']]
    df = df.rename(columns={'FTHG': 'HomeGoals', 'FTAG': 'AwayGoals'})
    df = df.assign(Season=i)
    frames.append(df)
    df_historical_data = pd.concat(frames)
    df_historical_data['Date'] = pd.to_datetime(df_historical_data['Date'])
    dict_historical_data['la_liga'] = df_historical_data

In [14]:
dict_historical_data

{'la_liga':           Date     HomeTeam    AwayTeam  HomeGoals  AwayGoals  Season
 0   2018-08-17        Betis     Levante          0          3       4
 1   2018-08-17       Girona  Valladolid          0          0       4
 2   2018-08-18    Barcelona      Alaves          3          0       4
 3   2018-08-18        Celta     Espanol          1          1       4
 4   2018-08-18   Villarreal    Sociedad          1          2       4
 ..         ...          ...         ...        ...        ...     ...
 375 2019-05-18      Levante  Ath Madrid          2          2       4
 376 2019-05-18      Sevilla  Ath Bilbao          2          0       4
 377 2019-05-18   Valladolid    Valencia          0          2       4
 378 2019-05-19        Eibar   Barcelona          2          2       4
 379 2019-05-19  Real Madrid       Betis          0          2       4
 
 [380 rows x 6 columns]}

In [15]:
for i in dict_historical_data:
    dict_historical_data[i]['TotalGoals'] = dict_historical_data[i]['HomeGoals'] + dict_historical_data[i]['AwayGoals']

In [16]:
for i in dict_historical_data:
    print(i)
    print(dict_historical_data[i]['TotalGoals'].mean())

la_liga
2.586842105263158


In [17]:
# Team Strength 

def calculate_strength(league):
    home = dict_historical_data[league][['HomeTeam', 'HomeGoals', 'AwayGoals']].rename(
        columns={'HomeTeam':'Team', 'HomeGoals':'HomeScored', 'AwayGoals':'HomeConceded'}).groupby(
        ['Team'], as_index=False)[['HomeScored', 'HomeConceded']].mean()
    away = dict_historical_data[league][['AwayTeam', 'HomeGoals', 'AwayGoals']].rename(
        columns={'AwayTeam':'Team', 'HomeGoals':'AwayConceded', 'AwayGoals':'AwayScored'}).groupby(
        ['Team'], as_index=False)[['AwayScored', 'AwayConceded']].mean()

    df_league_strength = pd.merge(home, away, on='Team')
    average_home_scored, average_home_conceded = home['HomeScored'].mean(), home['HomeConceded'].mean()
    average_away_scored, average_away_conceded = away['AwayScored'].mean(), away['AwayConceded'].mean()

    df_league_strength['HomeScored'] /= average_home_scored
    df_league_strength['HomeConceded'] /= average_home_conceded
    df_league_strength['AwayScored'] /= average_away_scored
    df_league_strength['AwayConceded'] /= average_away_conceded

    df_league_strength.set_index('Team', inplace=True)
    return df_league_strength

In [18]:
def predict_points(home, away):
    if home in df_league_strength.index and away in df_league_strength.index:
        #home_scored * away_conceded
        lamb_home = df_league_strength.at[home,'HomeScored'] * df_league_strength.at[away,'AwayConceded']
        lamb_away = df_league_strength.at[away,'AwayScored'] * df_league_strength.at[home,'HomeConceded']
        prob_home, prob_away, prob_draw = 0, 0, 0
        for x in range(0,11): #number of goals home team
            for y in range(0, 11): #number of goals away team
                p = poisson.pmf(x, lamb_home) * poisson.pmf(y, lamb_away)
                if x == y:
                    prob_draw += p
                elif x > y:
                    prob_home += p
                else:
                    prob_away += p
        
        points_home = 3 * prob_home + prob_draw
        points_away = 3 * prob_away + prob_draw
        return (points_home, points_away)
    else:
        return (0, 0)

In [22]:
#  Simulate the matches to predict final standings
#league = 'la_liga' #'la_liga', 'premier_league', 'bundesliga', 'serie_a'
#df_league_strength = calculate_strength(league)

#list_points_home = []
#list_points_away = []
#for index, row in remaining[league].iterrows():
#    home, away = row['HomeTeam'], row['AwayTeam']
#    points_home, points_away = predict_points(home, away)
#    dict_table[league].loc[dict_table[league]['Team'] == home, 'Points'] += points_home
#    dict_table[league].loc[dict_table[league]['Team'] == away, 'Points'] += points_away
#    #storing every match result
#    list_points_home.append(round(points_home, 1))
#    list_points_away.append(round(points_away, 1))
    
#dict_table[league] = dict_table[league].sort_values('Points', ascending=False).reset_index()
#dict_table[league] = dict_table[league][['Team', 'Points']]
#dict_table[league].round(0)

In [10]:
result = calculate_strength("la_liga")