In [1]:
#Importing libraries

import pandas as pd 
import numpy as np
import seaborn as sns 
import matplotlib.pyplot as plt 
import requests

print("Libraries imported successfully......")

Libraries imported successfully......


In [2]:
teams = pd.read_csv('expanded_teams_data.csv')

print("Data loaded successfully..")

Data loaded successfully..


# Feature Engineering

### General Stats

Games played


In [3]:
# Convert 'Game_played' column to integers (1 for True, 0 for False)
teams['Game_played'].fillna(False, inplace=True)
teams['Game_played'] = teams['Game_played'].astype(int)

# Calculate cumulative 'Games_Count' for each row
teams['Games_Count'] = teams.groupby(['team_id', 'team_name'])['Game_played'].cumsum()



Total goals

In [4]:
teams['Total_Goals'] = teams.groupby(['team_id', 'team_name'])['team_score'].cumsum()


Goals per game

In [5]:
teams['Goals_per_game'] = teams['Total_Goals'] / teams['Games_Count']

Goals conceded

In [6]:
teams['Total_Conceded'] = teams.groupby(['team_id', 'team_name'])['opponent_score'].cumsum()


In [7]:
teams['Conceded_per_game'] = teams['Total_Conceded'] / teams['Games_Count']

Win

In [8]:
teams['Win'] = teams['team_score'] > teams['opponent_score'] 
teams['Win'] = teams['Win'].astype(int)


Draw

In [9]:
teams['Draw'] = teams['team_score'] == teams['opponent_score'] 
teams['Draw'] = teams['Draw'].astype(int)

Loss

In [10]:
teams['Loss'] = teams['team_score'] < teams['opponent_score'] 
teams['Loss'] = teams['Loss'].astype(int)

Total wins

In [11]:
teams['Total_wins'] = teams.groupby(['team_id', 'team_name'])['Win'].cumsum()


Win percentage

In [12]:
teams['Win_percentage'] = (teams['Total_wins'] / teams['Games_Count']) * 100

Game Results

In [13]:
teams['result'] = teams.apply(lambda row: 1 if row['Win'] else (-1 if row['Loss'] else 0), axis=1)


Form

In [14]:
def calculate_form(group, num_games):
    forms = []
    current_form = ""
    
    for i, row in group.iterrows():
        result = row['result']
        if row['Game_played'] == True:
            if result == 1:
                current_form += 'W'
            elif result == -1:
                current_form += 'L'
            else:
                current_form += 'D'
            
            if len(current_form) > num_games:
                current_form = current_form[1:]
            
            forms.append(current_form)

        else:
            forms.append(None)
    return forms

# Calculate the form for each specific 'team_id' and assign it to the 'Form' column for that team
unique_team_ids = teams['team_id'].unique()
for team_id in unique_team_ids:
    team_subset = teams[teams['team_id'] == team_id]
    team_forms = calculate_form(team_subset, 5)
    teams.loc[teams['team_id'] == team_id, 'Form'] = team_forms




Numeric Form

In [15]:
def calculate_numerical_form(group, num_games):
    numerical_forms = []
    current_form = []
    
    for i, row in group.iterrows():
        result = row['result']
        game_played = row['Game_played']
        
        if game_played:
            # Assign weights based on the position in the form
            weight = 1 + i / num_games
            
            if result == 1:
                value = 1
            elif result == -1:
                value = -1
            else:
                value = 0
            
            current_form.append(value * weight)
            
            if len(current_form) > num_games:
                current_form.pop(0)
            
            numerical_form = sum(current_form) / len(current_form)
            numerical_forms.append(numerical_form)
        
        else:
            numerical_forms.append(None)  # Append None for games that haven't been played
    
    return numerical_forms

# Calculate the numerical form for each specific 'team_id' and assign it to the 'Numerical_Form' column for that team
unique_team_ids = teams['team_id'].unique()
for team_id in unique_team_ids:
    team_subset = teams[teams['team_id'] == team_id]
    team_numerical_forms = calculate_numerical_form(team_subset, 5)
    teams.loc[teams['team_id'] == team_id, 'Numerical_Form'] = team_numerical_forms



Team strength

In [16]:
teams['Strength'] = teams['Numerical_Form']*2.5 + teams['fixture_difficulty']*2.0 + teams['Win_percentage']*2.0 + teams['Goals_per_game'] + teams['Conceded_per_game']*-1.0

In [17]:
Liverpool = teams[teams['team_id'] == 11]
Liverpool

Unnamed: 0,code,team_id,team_name,team_short_name,unavailable,pulse_id,event_id,fixture_id,fixture_difficulty,opponent_team,...,Conceded_per_game,Win,Draw,Loss,Total_wins,Win_percentage,result,Form,Numerical_Form,Strength
10,14,11,Liverpool,LIV,False,10,1,9.0,3.0,7.0,...,1.0,0,1,0,0,0.0,0,D,0.0,6.0
30,14,11,Liverpool,LIV,False,10,2,14.0,2.0,3.0,...,1.0,1,0,0,1,50.0,1,DW,3.5,113.75
50,14,11,Liverpool,LIV,False,10,3,29.0,4.0,15.0,...,1.0,1,0,0,2,66.666667,1,DWW,6.0,157.333333
70,14,11,Liverpool,LIV,False,10,4,37.0,3.0,2.0,...,0.75,1,0,0,3,75.0,1,DWWW,8.25,178.125
90,14,11,Liverpool,LIV,False,10,5,50.0,2.0,20.0,...,0.8,1,0,0,4,80.0,1,DWWWW,10.4,191.6
110,14,11,Liverpool,LIV,False,10,6,57.0,2.0,19.0,...,0.833333,1,0,0,5,83.333333,1,WWWWW,15.0,209.833333
130,14,11,Liverpool,LIV,False,10,7,68.0,3.0,18.0,...,1.0,0,0,1,5,71.428571,-1,WWWWL,8.2,170.642857
150,14,11,Liverpool,LIV,False,10,8,72.0,3.0,5.0,...,1.125,0,1,0,5,62.5,0,WWWLD,6.0,147.125
170,14,11,Liverpool,LIV,False,10,9,85.0,2.0,9.0,...,,0,0,0,5,62.5,0,,,
190,14,11,Liverpool,LIV,False,10,10,97.0,2.0,16.0,...,,0,0,0,5,62.5,0,,,


### Home Stats

Games played home

In [18]:
# Calculate cumulative 'Games_Count' for each row
teams['Home_Count'] = teams[teams['is_home'] == True].groupby(['team_id', 'team_name'])['Game_played'].cumsum()



Goals at home

In [19]:
teams['Home_Goals'] = teams[teams['is_home'] == True].groupby(['team_id', 'team_name'])['team_score'].cumsum()


Goals per home game

In [20]:
teams['Goals_per_home'] = teams.apply(lambda row: row['Total_Goals'] / row['Home_Count'] if row['is_home'] else None, axis=1)


Goals conceded at home

In [21]:
teams['Home_Conceded'] = teams[teams['is_home'] == True].groupby(['team_id', 'team_name'])['opponent_score'].cumsum()


Conceded per home game 

In [22]:
teams['Conceded_per_home'] = teams.apply(lambda row: row['Home_Conceded'] / row['Home_Count'] if row['is_home'] else None, axis=1)


Total home wins

In [23]:
teams['Total_Home_wins'] = teams[teams['is_home'] == True].groupby(['team_id', 'team_name'])['Win'].cumsum()


Home Win percentage

In [24]:
teams['Home_Win_percentage'] = (teams['Total_Home_wins'] / teams['Home_Count']) * 100

Home Form

In [25]:
def calculate_form_home(group, num_games):
    forms = []
    current_form = ""
    
    for i, row in group.iterrows():
        result = row['result']
        if row['Game_played'] == True and row['is_home'] == True:
            if result == 1:
                current_form += 'W'
            elif result == -1:
                current_form += 'L'
            else:
                current_form += 'D'
            
            if len(current_form) > num_games:
                current_form = current_form[1:]
            
            forms.append(current_form)

        else:
            forms.append(None)
    return forms

# Calculate the form for each specific 'team_id' and assign it to the 'Form' column for that team
unique_team_ids = teams['team_id'].unique()
for team_id in unique_team_ids:
    team_subset = teams[teams['team_id'] == team_id]
    team_forms = calculate_form_home(team_subset, 5)
    teams.loc[teams['team_id'] == team_id, 'Home_Form'] = team_forms




Home Numeric Form

In [26]:
def calculate_numerical_form_home(group, num_games):
    numerical_forms = []
    current_form = []
    
    for i, row in group.iterrows():
        result = row['result']
        game_played = row['Game_played']
        
        if game_played and row['is_home'] == True:
            # Assign weights based on the position in the form
            weight = 1 + i / num_games
            
            if result == 1:
                value = 1
            elif result == -1:
                value = -1
            else:
                value = 0
            
            current_form.append(value * weight)
            
            if len(current_form) > num_games:
                current_form.pop(0)
            
            numerical_form = sum(current_form) / len(current_form)
            numerical_forms.append(numerical_form)
        
        else:
            numerical_forms.append(None)  # Append None for games that haven't been played
    
    return numerical_forms

# Calculate the numerical form for each specific 'team_id' and assign it to the 'Numerical_Form' column for that team
unique_team_ids = teams['team_id'].unique()
for team_id in unique_team_ids:
    team_subset = teams[teams['team_id'] == team_id]
    team_numerical_forms = calculate_numerical_form_home(team_subset, 5)
    teams.loc[teams['team_id'] == team_id, 'Home_Numerical_Form'] = team_numerical_forms



In [27]:
teams['Home_Strength'] = teams['Home_Numerical_Form']*2.5 + teams['fixture_difficulty']*2.0 + teams['Home_Win_percentage']*2.0 + teams['Goals_per_home'] + teams['Conceded_per_home']*-1.0

### Away Stats

Games played away

In [28]:
# Calculate cumulative 'Games_Count' for each row
teams['Away_Count'] = teams[teams['is_home'] == False].groupby(['team_id', 'team_name'])['Game_played'].cumsum()



Goals away

In [29]:
teams['Away_Goals'] = teams[teams['is_home'] == False].groupby(['team_id', 'team_name'])['team_score'].cumsum()


Goals per away game

In [30]:
teams['Goals_per_away'] = teams.apply(lambda row: row['Total_Goals'] / row['Away_Count'] if row['is_home']==False else None, axis=1)


Goals conceded away

In [31]:
teams['Away_Conceded'] = teams[teams['is_home'] == False].groupby(['team_id', 'team_name'])['opponent_score'].cumsum()


Conceded per away game 

In [32]:
teams['Conceded_per_away'] = teams.apply(lambda row: row['Away_Conceded'] / row['Away_Count'] if row['is_home'] ==False else None, axis=1)


Total Away wins

In [33]:
teams['Total_Away_wins'] = teams[teams['is_home'] == False].groupby(['team_id', 'team_name'])['Win'].cumsum()


Away Win percentage

In [34]:
teams['Away_Win_percentage'] = (teams['Total_Away_wins'] / teams['Away_Count']) * 100

Away Form

In [35]:
def calculate_form_away(group, num_games):
    forms = []
    current_form = ""
    
    for i, row in group.iterrows():
        result = row['result']
        if row['Game_played'] == True and row['is_home'] == False:
            if result == 1:
                current_form += 'W'
            elif result == -1:
                current_form += 'L'
            else:
                current_form += 'D'
            
            if len(current_form) > num_games:
                current_form = current_form[1:]
            
            forms.append(current_form)

        else:
            forms.append(None)
    return forms

# Calculate the form for each specific 'team_id' and assign it to the 'Form' column for that team
unique_team_ids = teams['team_id'].unique()
for team_id in unique_team_ids:
    team_subset = teams[teams['team_id'] == team_id]
    team_forms = calculate_form_away(team_subset, 5)
    teams.loc[teams['team_id'] == team_id, 'Away_Form'] = team_forms




Away Numeric Form

In [36]:
def calculate_numerical_form_away(group, num_games):
    numerical_forms = []
    current_form = []
    
    for i, row in group.iterrows():
        result = row['result']
        game_played = row['Game_played']
        
        if game_played and row['is_home'] == False:
            # Assign weights based on the position in the form
            weight = 1 + i / num_games
            
            if result == 1:
                value = 1
            elif result == -1:
                value = -1
            else:
                value = 0
            
            current_form.append(value * weight)
            
            if len(current_form) > num_games:
                current_form.pop(0)
            
            numerical_form = sum(current_form) / len(current_form)
            numerical_forms.append(numerical_form)
        
        else:
            numerical_forms.append(None)  # Append None for games that haven't been played
    
    return numerical_forms

# Calculate the numerical form for each specific 'team_id' and assign it to the 'Numerical_Form' column for that team
unique_team_ids = teams['team_id'].unique()
for team_id in unique_team_ids:
    team_subset = teams[teams['team_id'] == team_id]
    team_numerical_forms = calculate_numerical_form_away(team_subset, 5)
    teams.loc[teams['team_id'] == team_id, 'Away_Numerical_Form'] = team_numerical_forms



In [37]:
teams['Away_Strength'] = teams['Away_Numerical_Form']*2.5 + teams['fixture_difficulty']*2.0 + teams['Away_Win_percentage']*2.0 + teams['Goals_per_away'] + teams['Conceded_per_away']*-1.0

## Defense Strength stats

### Overall

In [38]:
teams['Defence_Strength'] = teams['Numerical_Form']*2.0 + teams['fixture_difficulty']*1.5 + teams['Conceded_per_game']*-1.5 + teams['Win_percentage']*1.5

Home

In [39]:
teams['Home_Defence_Strength'] = teams['Home_Numerical_Form']*2.0 + teams['fixture_difficulty']*1.5 + teams['Conceded_per_home']*-1.5 + teams['Home_Win_percentage']*1.5

Away

In [40]:
teams['Away_Defence_Strength'] = teams['Away_Numerical_Form']*2.0 + teams['fixture_difficulty']*1.5 + teams['Conceded_per_away']*-1.5 + teams['Away_Win_percentage']*1.5

## Attack Strength stats

### Overall

In [41]:
teams['Attack_Strength'] = teams['Numerical_Form']*2.5 + teams['fixture_difficulty']*2.0 + teams['Win_percentage']*0.8 + teams['Goals_per_game']*2.0

Home

In [42]:
teams['Home_Attack_Strength'] = teams['Home_Numerical_Form']*2.5 + teams['fixture_difficulty']*2.0 + teams['Home_Win_percentage']*0.8 + teams['Goals_per_home']*2.0

Away

In [43]:
teams['Away_Attack_Strength'] = teams['Away_Numerical_Form']*2.5 + teams['fixture_difficulty']*2.0 + teams['Away_Win_percentage']*0.8 + teams['Goals_per_away']*2.0

# Preprocessing

In [None]:
teams.columns

In [45]:
num_cols_normalize = ['Numerical_Form', 'Home_Numerical_Form','Away_Numerical_Form', 'Strength', 'Home_Strength', 'Away_Strength', 'Defence_Strength', 'Home_Defence_Strength', 'Away_Defence_Strength', 'Away_Attack_Strength', 'Home_Attack_Strength', 'Attack_Strength']

In [46]:
from sklearn.preprocessing import MinMaxScaler


# Fill null values with previous values
teams[num_cols_normalize] = teams[num_cols_normalize]
# Initialize the Min-Max Scaler
scaler = MinMaxScaler()

# Fit and transform the selected columns to scale them between 0 and 1
teams[num_cols_normalize] = scaler.fit_transform(teams[num_cols_normalize])



In [47]:
teams.columns

Index(['code', 'team_id', 'team_name', 'team_short_name', 'unavailable',
       'pulse_id', 'event_id', 'fixture_id', 'fixture_difficulty',
       'opponent_team', 'opponent_score', 'team_score', 'Game_played',
       'is_home', 'Games_Count', 'Total_Goals', 'Goals_per_game',
       'Total_Conceded', 'Conceded_per_game', 'Win', 'Draw', 'Loss',
       'Total_wins', 'Win_percentage', 'result', 'Form', 'Numerical_Form',
       'Strength', 'Home_Count', 'Home_Goals', 'Goals_per_home',
       'Home_Conceded', 'Conceded_per_home', 'Total_Home_wins',
       'Home_Win_percentage', 'Home_Form', 'Home_Numerical_Form',
       'Home_Strength', 'Away_Count', 'Away_Goals', 'Goals_per_away',
       'Away_Conceded', 'Conceded_per_away', 'Total_Away_wins',
       'Away_Win_percentage', 'Away_Form', 'Away_Numerical_Form',
       'Away_Strength', 'Defence_Strength', 'Home_Defence_Strength',
       'Away_Defence_Strength', 'Attack_Strength', 'Home_Attack_Strength',
       'Away_Attack_Strength'],
      dt

In [56]:
filtered_teams = teams.loc[:, ['team_id', 'team_name', 'team_short_name', 
    'unavailable','event_id', 'fixture_id', 'fixture_difficulty',
    'Attack_Strength',
    'Form', 'Numerical_Form','Win_percentage', 'Strength', 'Defence_Strength',
    'Home_Form','Home_Numerical_Form','Home_Win_percentage','Home_Strength', 'Home_Defence_Strength', 'Home_Attack_Strength',
    'Away_Form', 'Away_Numerical_Form', 'Away_Win_percentage','Away_Strength', 'Away_Defence_Strength', 'Away_Attack_Strength'
    ]]

In [57]:
filtered_teams.to_csv('filtered_teams.csv', index=False)

In [58]:
Man_United = filtered_teams[teams['team_short_name']=='MUN']
Man_United.loc[:,['Form','Numerical_Form','Win_percentage', 'Strength', 'Defence_Strength',
    'Home_Form','Home_Numerical_Form','Home_Win_percentage','Home_Strength', 'Home_Defence_Strength', 'Home_Attack_Strength',
    'Away_Form', 'Away_Numerical_Form', 'Away_Win_percentage','Away_Strength', 'Away_Defence_Strength', 'Away_Attack_Strength'
    ]]

Unnamed: 0,Form,Numerical_Form,Win_percentage,Strength,Defence_Strength,Home_Form,Home_Numerical_Form,Home_Win_percentage,Home_Strength,Home_Defence_Strength,Home_Attack_Strength,Away_Form,Away_Numerical_Form,Away_Win_percentage,Away_Strength,Away_Defence_Strength,Away_Attack_Strength
13,W,0.606719,100.0,0.896069,0.898936,W,0.553846,100.0,0.842271,0.853391,0.707865,,,,,,
33,WL,0.468379,50.0,0.51141,0.517681,,,,,,,L,0.385563,0.0,0.127323,0.135635,0.206422
53,WLW,0.580369,66.666667,0.657704,0.659509,WW,0.669231,100.0,0.87607,0.882327,0.775281,,,,,,
73,WLWL,0.468379,50.0,0.518188,0.519339,,,,,,,LL,0.27993,0.0,0.104022,0.10399,0.178899
93,WLWLL,0.381423,40.0,0.412788,0.413565,WWL,0.407692,66.666667,0.578639,0.581244,0.509363,,,,,,
113,LWLLW,0.480237,50.0,0.508699,0.510869,,,,,,,LLW,0.589789,33.333333,0.412483,0.417015,0.463303
133,WLLWL,0.381423,42.857143,0.426151,0.428729,WWLL,0.219231,50.0,0.403729,0.406218,0.328652,,,,,,
153,LLWLW,0.480237,50.0,0.5159,0.518609,WWLLW,0.447692,60.0,0.545516,0.550853,0.496629,,,,,,
173,,,50.0,,,,,,,,,,,33.333333,,,
193,,,50.0,,,,,60.0,,,,,,,,,
