In [53]:
#Importing libraries

import pandas as pd 
import numpy as np
import seaborn as sns 
import matplotlib.pyplot as plt 
import requests

print("Libraries imported successfully......")

Libraries imported successfully......


In [54]:
teams = pd.read_csv('expanded_teams_data.csv')

print("Data loaded successfully..")

Data loaded successfully..


# Feature Engineering

### General Stats

Games played


In [55]:
# Convert 'Game_played' column to integers (1 for True, 0 for False)
teams['Game_played'].fillna(False, inplace=True)
teams['Game_played'] = teams['Game_played'].astype(int)

# Calculate cumulative 'Games_Count' for each row
teams['Games_Count'] = teams.groupby(['team_id', 'team_name'])['Game_played'].cumsum()



Total goals

In [56]:
teams['Total_Goals'] = teams.groupby(['team_id', 'team_name'])['team_score'].cumsum()


Goals per game

In [57]:
teams['Goals_per_game'] = teams['Total_Goals'] / teams['Games_Count']

Goals conceded

In [58]:
teams['Total_Conceded'] = teams.groupby(['team_id', 'team_name'])['opponent_score'].cumsum()


In [59]:
teams['Conceded_per_game'] = teams['Total_Conceded'] / teams['Games_Count']

Win

In [60]:
teams['Win'] = teams['team_score'] > teams['opponent_score'] 
teams['Win'] = teams['Win'].astype(int)


Draw

In [61]:
teams['Draw'] = teams['team_score'] == teams['opponent_score'] 
teams['Draw'] = teams['Draw'].astype(int)

Loss

In [62]:
teams['Loss'] = teams['team_score'] < teams['opponent_score'] 
teams['Loss'] = teams['Loss'].astype(int)

Total wins

In [63]:
teams['Total_wins'] = teams.groupby(['team_id', 'team_name'])['Win'].cumsum()


Win percentage

In [64]:
teams['Win_percentage'] = (teams['Total_wins'] / teams['Games_Count']) * 100

Game Results

In [65]:
teams['result'] = teams.apply(lambda row: 1 if row['Win'] else (-1 if row['Loss'] else 0), axis=1)


Form

In [66]:
def calculate_form(group, num_games):
    forms = []
    current_form = ""
    
    for i, row in group.iterrows():
        result = row['result']
        if row['Game_played'] == True:
            if result == 1:
                current_form += 'W'
            elif result == -1:
                current_form += 'L'
            else:
                current_form += 'D'
            
            if len(current_form) > num_games:
                current_form = current_form[1:]
            
            forms.append(current_form)

        else:
            forms.append(None)
    return forms

# Calculate the form for each specific 'team_id' and assign it to the 'Form' column for that team
unique_team_ids = teams['team_id'].unique()
for team_id in unique_team_ids:
    team_subset = teams[teams['team_id'] == team_id]
    team_forms = calculate_form(team_subset, 5)
    teams.loc[teams['team_id'] == team_id, 'Form'] = team_forms




Numeric Form

In [67]:
def calculate_numerical_form(group, num_games):
    numerical_forms = []
    current_form = []
    
    for i, row in group.iterrows():
        result = row['result']
        game_played = row['Game_played']
        
        if game_played:
            # Assign weights based on the position in the form
            weight = 1 + i / num_games
            
            if result == 1:
                value = 1
            elif result == -1:
                value = -1
            else:
                value = 0
            
            current_form.append(value * weight)
            
            if len(current_form) > num_games:
                current_form.pop(0)
            
            numerical_form = sum(current_form) / len(current_form)
            numerical_forms.append(numerical_form)
        
        else:
            numerical_forms.append(None)  # Append None for games that haven't been played
    
    return numerical_forms

# Calculate the numerical form for each specific 'team_id' and assign it to the 'Numerical_Form' column for that team
unique_team_ids = teams['team_id'].unique()
for team_id in unique_team_ids:
    team_subset = teams[teams['team_id'] == team_id]
    team_numerical_forms = calculate_numerical_form(team_subset, 5)
    teams.loc[teams['team_id'] == team_id, 'Numerical_Form'] = team_numerical_forms



Team strength

In [68]:
teams['Strength'] = teams['Numerical_Form']*2.5 + teams['fixture_difficulty']*2.0 + teams['Win_percentage']*2.0 + teams['Goals_per_game'] + teams['Conceded_per_game']*-1.0

In [69]:
Liverpool = teams[teams['team_id'] == 11]
Liverpool

Unnamed: 0,code,team_id,team_name,team_short_name,unavailable,pulse_id,event_id,fixture_id,fixture_difficulty,opponent_team,...,Conceded_per_game,Win,Draw,Loss,Total_wins,Win_percentage,result,Form,Numerical_Form,Strength
16,14,11,Liverpool,LIV,False,10,1,9.0,3.0,7.0,...,1.0,0,1,0,0,0.0,0,D,0.0,6.0
22,14,11,Liverpool,LIV,False,10,2,14.0,2.0,3.0,...,1.0,1,0,0,1,50.0,1,DW,2.7,111.75
56,14,11,Liverpool,LIV,False,10,3,29.0,4.0,15.0,...,1.0,1,0,0,2,66.666667,1,DWW,5.866667,157.0
72,14,11,Liverpool,LIV,False,10,4,37.0,3.0,2.0,...,0.75,1,0,0,3,75.0,1,DWWW,8.25,178.125
78,14,11,Liverpool,LIV,False,10,5,50.0,2.0,20.0,...,0.8,1,0,0,4,80.0,1,DWWWW,9.92,190.4
115,14,11,Liverpool,LIV,False,10,6,57.0,2.0,19.0,...,0.833333,1,0,0,5,83.333333,1,WWWWW,14.72,209.133333
132,14,11,Liverpool,LIV,False,10,7,68.0,3.0,18.0,...,1.0,0,0,1,5,71.428571,-1,WWWWL,8.16,170.542857
150,14,11,Liverpool,LIV,False,10,8,72.0,3.0,5.0,...,1.125,0,1,0,5,62.5,0,WWWLD,5.72,146.425
159,14,11,Liverpool,LIV,False,10,9,85.0,2.0,9.0,...,,0,0,0,5,62.5,0,,,
190,14,11,Liverpool,LIV,False,10,10,97.0,2.0,16.0,...,,0,0,0,5,62.5,0,,,


### Home Stats

Games played home

In [70]:
# Calculate cumulative 'Games_Count' for each row
teams['Home_Count'] = teams[teams['is_home'] == True].groupby(['team_id', 'team_name'])['Game_played'].cumsum()



Goals at home

In [71]:
teams['Home_Goals'] = teams[teams['is_home'] == True].groupby(['team_id', 'team_name'])['team_score'].cumsum()


Goals per home game

In [72]:
teams['Goals_per_home'] = teams.apply(lambda row: row['Total_Goals'] / row['Home_Count'] if row['is_home'] else None, axis=1)


Goals conceded at home

In [73]:
teams['Home_Conceded'] = teams[teams['is_home'] == True].groupby(['team_id', 'team_name'])['opponent_score'].cumsum()


Conceded per home game 

In [74]:
teams['Conceded_per_home'] = teams.apply(lambda row: row['Home_Conceded'] / row['Home_Count'] if row['is_home'] else None, axis=1)


Total home wins

In [75]:
teams['Total_Home_wins'] = teams[teams['is_home'] == True].groupby(['team_id', 'team_name'])['Win'].cumsum()


Home Win percentage

In [76]:
teams['Home_Win_percentage'] = (teams['Total_Home_wins'] / teams['Home_Count']) * 100

Home Form

In [77]:
def calculate_form_home(group, num_games):
    forms = []
    current_form = ""
    
    for i, row in group.iterrows():
        result = row['result']
        if row['Game_played'] == True and row['is_home'] == True:
            if result == 1:
                current_form += 'W'
            elif result == -1:
                current_form += 'L'
            else:
                current_form += 'D'
            
            if len(current_form) > num_games:
                current_form = current_form[1:]
            
            forms.append(current_form)

        else:
            forms.append(None)
    return forms

# Calculate the form for each specific 'team_id' and assign it to the 'Form' column for that team
unique_team_ids = teams['team_id'].unique()
for team_id in unique_team_ids:
    team_subset = teams[teams['team_id'] == team_id]
    team_forms = calculate_form_home(team_subset, 5)
    teams.loc[teams['team_id'] == team_id, 'Home_Form'] = team_forms




Home Numeric Form

In [78]:
def calculate_numerical_form_home(group, num_games):
    numerical_forms = []
    current_form = []
    
    for i, row in group.iterrows():
        result = row['result']
        game_played = row['Game_played']
        
        if game_played and row['is_home'] == True:
            # Assign weights based on the position in the form
            weight = 1 + i / num_games
            
            if result == 1:
                value = 1
            elif result == -1:
                value = -1
            else:
                value = 0
            
            current_form.append(value * weight)
            
            if len(current_form) > num_games:
                current_form.pop(0)
            
            numerical_form = sum(current_form) / len(current_form)
            numerical_forms.append(numerical_form)
        
        else:
            numerical_forms.append(None)  # Append None for games that haven't been played
    
    return numerical_forms

# Calculate the numerical form for each specific 'team_id' and assign it to the 'Numerical_Form' column for that team
unique_team_ids = teams['team_id'].unique()
for team_id in unique_team_ids:
    team_subset = teams[teams['team_id'] == team_id]
    team_numerical_forms = calculate_numerical_form_home(team_subset, 5)
    teams.loc[teams['team_id'] == team_id, 'Home_Numerical_Form'] = team_numerical_forms



In [79]:
teams['Home_Strength'] = teams['Home_Numerical_Form']*2.5 + teams['fixture_difficulty']*2.0 + teams['Home_Win_percentage']*2.0 + teams['Goals_per_home'] + teams['Conceded_per_home']*-1.0

### Away Stats

Games played away

In [80]:
# Calculate cumulative 'Games_Count' for each row
teams['Away_Count'] = teams[teams['is_home'] == False].groupby(['team_id', 'team_name'])['Game_played'].cumsum()



Goals away

In [81]:
teams['Away_Goals'] = teams[teams['is_home'] == False].groupby(['team_id', 'team_name'])['team_score'].cumsum()


Goals per away game

In [82]:
teams['Goals_per_away'] = teams.apply(lambda row: row['Total_Goals'] / row['Away_Count'] if row['is_home']==False else None, axis=1)


Goals conceded away

In [83]:
teams['Away_Conceded'] = teams[teams['is_home'] == False].groupby(['team_id', 'team_name'])['opponent_score'].cumsum()


Conceded per away game 

In [84]:
teams['Conceded_per_away'] = teams.apply(lambda row: row['Away_Conceded'] / row['Away_Count'] if row['is_home'] ==False else None, axis=1)


Total Away wins

In [85]:
teams['Total_Away_wins'] = teams[teams['is_home'] == False].groupby(['team_id', 'team_name'])['Win'].cumsum()


Away Win percentage

In [86]:
teams['Away_Win_percentage'] = (teams['Total_Away_wins'] / teams['Away_Count']) * 100

Away Form

In [87]:
def calculate_form_away(group, num_games):
    forms = []
    current_form = ""
    
    for i, row in group.iterrows():
        result = row['result']
        if row['Game_played'] == True and row['is_home'] == False:
            if result == 1:
                current_form += 'W'
            elif result == -1:
                current_form += 'L'
            else:
                current_form += 'D'
            
            if len(current_form) > num_games:
                current_form = current_form[1:]
            
            forms.append(current_form)

        else:
            forms.append(None)
    return forms

# Calculate the form for each specific 'team_id' and assign it to the 'Form' column for that team
unique_team_ids = teams['team_id'].unique()
for team_id in unique_team_ids:
    team_subset = teams[teams['team_id'] == team_id]
    team_forms = calculate_form_away(team_subset, 5)
    teams.loc[teams['team_id'] == team_id, 'Away_Form'] = team_forms




Away Numeric Form

In [88]:
def calculate_numerical_form_away(group, num_games):
    numerical_forms = []
    current_form = []
    
    for i, row in group.iterrows():
        result = row['result']
        game_played = row['Game_played']
        
        if game_played and row['is_home'] == False:
            # Assign weights based on the position in the form
            weight = 1 + i / num_games
            
            if result == 1:
                value = 1
            elif result == -1:
                value = -1
            else:
                value = 0
            
            current_form.append(value * weight)
            
            if len(current_form) > num_games:
                current_form.pop(0)
            
            numerical_form = sum(current_form) / len(current_form)
            numerical_forms.append(numerical_form)
        
        else:
            numerical_forms.append(None)  # Append None for games that haven't been played
    
    return numerical_forms

# Calculate the numerical form for each specific 'team_id' and assign it to the 'Numerical_Form' column for that team
unique_team_ids = teams['team_id'].unique()
for team_id in unique_team_ids:
    team_subset = teams[teams['team_id'] == team_id]
    team_numerical_forms = calculate_numerical_form_away(team_subset, 5)
    teams.loc[teams['team_id'] == team_id, 'Away_Numerical_Form'] = team_numerical_forms



In [89]:
teams['Away_Strength'] = teams['Away_Numerical_Form']*2.5 + teams['fixture_difficulty']*2.0 + teams['Away_Win_percentage']*2.0 + teams['Goals_per_away'] + teams['Conceded_per_away']*-1.0

## Defense Strength stats

### Overall

In [90]:
teams['Defence_Strength'] = teams['Numerical_Form']*2.0 + teams['fixture_difficulty']*1.5 + teams['Conceded_per_game']*-1.5 + teams['Win_percentage']*1.5

Home

In [91]:
teams['Home_Defence_Strength'] = teams['Home_Numerical_Form']*2.0 + teams['fixture_difficulty']*1.5 + teams['Conceded_per_home']*-1.5 + teams['Home_Win_percentage']*1.5

Away

In [92]:
teams['Away_Defence_Strength'] = teams['Away_Numerical_Form']*2.0 + teams['fixture_difficulty']*1.5 + teams['Conceded_per_away']*-1.5 + teams['Away_Win_percentage']*1.5

## Attack Strength stats

### Overall

In [93]:
teams['Attack_Strength'] = teams['Numerical_Form']*2.5 + teams['fixture_difficulty']*2.0 + teams['Win_percentage']*0.8 + teams['Goals_per_game']*2.0

Home

In [94]:
teams['Home_Attack_Strength'] = teams['Home_Numerical_Form']*2.5 + teams['fixture_difficulty']*2.0 + teams['Home_Win_percentage']*0.8 + teams['Goals_per_home']*2.0

Away

In [95]:
teams['Away_Attack_Strength'] = teams['Away_Numerical_Form']*2.5 + teams['fixture_difficulty']*2.0 + teams['Away_Win_percentage']*0.8 + teams['Goals_per_away']*2.0

# Preprocessing

In [96]:
teams.columns

Index(['code', 'team_id', 'team_name', 'team_short_name', 'unavailable',
       'pulse_id', 'event_id', 'fixture_id', 'fixture_difficulty',
       'opponent_team', 'opponent_score', 'team_score', 'Game_played',
       'is_home', 'kickoff_time', 'started', 'Games_Count', 'Total_Goals',
       'Goals_per_game', 'Total_Conceded', 'Conceded_per_game', 'Win', 'Draw',
       'Loss', 'Total_wins', 'Win_percentage', 'result', 'Form',
       'Numerical_Form', 'Strength', 'Home_Count', 'Home_Goals',
       'Goals_per_home', 'Home_Conceded', 'Conceded_per_home',
       'Total_Home_wins', 'Home_Win_percentage', 'Home_Form',
       'Home_Numerical_Form', 'Home_Strength', 'Away_Count', 'Away_Goals',
       'Goals_per_away', 'Away_Conceded', 'Conceded_per_away',
       'Total_Away_wins', 'Away_Win_percentage', 'Away_Form',
       'Away_Numerical_Form', 'Away_Strength', 'Defence_Strength',
       'Home_Defence_Strength', 'Away_Defence_Strength', 'Attack_Strength',
       'Home_Attack_Strength', 'Away_

In [97]:
num_cols_normalize = ['Numerical_Form', 'Home_Numerical_Form','Away_Numerical_Form', 'Strength', 'Home_Strength', 'Away_Strength', 'Defence_Strength', 'Home_Defence_Strength', 'Away_Defence_Strength', 'Away_Attack_Strength', 'Home_Attack_Strength', 'Attack_Strength']

In [98]:
from sklearn.preprocessing import MinMaxScaler


# Fill null values with previous values
teams[num_cols_normalize] = teams[num_cols_normalize]
# Initialize the Min-Max Scaler
scaler = MinMaxScaler()

# Fit and transform the selected columns to scale them between 0 and 1
teams[num_cols_normalize] = scaler.fit_transform(teams[num_cols_normalize])



In [99]:
teams.columns

Index(['code', 'team_id', 'team_name', 'team_short_name', 'unavailable',
       'pulse_id', 'event_id', 'fixture_id', 'fixture_difficulty',
       'opponent_team', 'opponent_score', 'team_score', 'Game_played',
       'is_home', 'kickoff_time', 'started', 'Games_Count', 'Total_Goals',
       'Goals_per_game', 'Total_Conceded', 'Conceded_per_game', 'Win', 'Draw',
       'Loss', 'Total_wins', 'Win_percentage', 'result', 'Form',
       'Numerical_Form', 'Strength', 'Home_Count', 'Home_Goals',
       'Goals_per_home', 'Home_Conceded', 'Conceded_per_home',
       'Total_Home_wins', 'Home_Win_percentage', 'Home_Form',
       'Home_Numerical_Form', 'Home_Strength', 'Away_Count', 'Away_Goals',
       'Goals_per_away', 'Away_Conceded', 'Conceded_per_away',
       'Total_Away_wins', 'Away_Win_percentage', 'Away_Form',
       'Away_Numerical_Form', 'Away_Strength', 'Defence_Strength',
       'Home_Defence_Strength', 'Away_Defence_Strength', 'Attack_Strength',
       'Home_Attack_Strength', 'Away_

In [103]:
filtered_teams = teams.loc[:, ['team_id', 'team_name', 'team_short_name', 
    'unavailable','event_id', 'fixture_id', 'fixture_difficulty','is_home','kickoff_time', 'started',
    'Attack_Strength','team_score',
    'Form', 'Numerical_Form','Win_percentage', 'Strength', 'Defence_Strength',
    'Home_Form','Home_Numerical_Form','Home_Win_percentage','Home_Strength', 'Home_Defence_Strength', 'Home_Attack_Strength',
    'Away_Form', 'Away_Numerical_Form', 'Away_Win_percentage','Away_Strength', 'Away_Defence_Strength', 'Away_Attack_Strength'
    ]]

In [104]:
filtered_teams.to_csv('filtered_teams.csv', index=False)

In [102]:
Man_United = filtered_teams[teams['team_short_name']=='MUN']
Man_United.loc[:,['Form','Numerical_Form','Win_percentage', 'Strength', 'Defence_Strength',
    'Home_Form','Home_Numerical_Form','Home_Win_percentage','Home_Strength', 'Home_Defence_Strength', 'Home_Attack_Strength',
    'Away_Form', 'Away_Numerical_Form', 'Away_Win_percentage','Away_Strength', 'Away_Defence_Strength', 'Away_Attack_Strength'
    ]]

Unnamed: 0,Form,Numerical_Form,Win_percentage,Strength,Defence_Strength,Home_Form,Home_Numerical_Form,Home_Win_percentage,Home_Strength,Home_Defence_Strength,Home_Attack_Strength,Away_Form,Away_Numerical_Form,Away_Win_percentage,Away_Strength,Away_Defence_Strength,Away_Attack_Strength
19,W,0.626943,100.0,0.915354,0.919169,W,0.616911,100.0,0.86299,0.875341,0.740445,,,,,,
29,WL,0.476684,50.0,0.519338,0.526131,,,,,,,L,0.333626,0.0,0.111049,0.11856,0.179464
44,WLW,0.569948,66.666667,0.661186,0.663195,WW,0.691346,100.0,0.884309,0.891073,0.787507,,,,,,
76,WLWL,0.448187,50.0,0.516732,0.517848,,,,,,,LL,0.20967,0.0,0.081935,0.080822,0.142333
81,WLWLL,0.369948,40.0,0.413038,0.413847,WWL,0.448194,66.666667,0.591814,0.595157,0.530329,,,,,,
106,LWLLW,0.460104,50.0,0.506832,0.508975,,,,,,,LLW,0.505934,33.333333,0.386108,0.389347,0.419665
125,WLLWL,0.360622,42.857143,0.423609,0.42614,WWLL,0.261116,50.0,0.417425,0.42068,0.350399,,,,,,
142,LLWLW,0.462176,50.0,0.514909,0.517644,WWLLW,0.478761,60.0,0.555316,0.561236,0.512265,,,,,,
172,,,50.0,,,,,,,,,,,33.333333,,,
196,,,50.0,,,,,60.0,,,,,,,,,
