In [None]:
#Importing libraries

import pandas as pd 
import numpy as np
import seaborn as sns 
import matplotlib.pyplot as plt 
import requests

print("Libraries imported successfully......")

In [None]:
teams = pd.read_csv('expanded_teams_data.csv')

print("Data loaded successfully..")

# Feature Engineering

### General Stats

Games played


In [None]:
# Convert 'Game_played' column to integers (1 for True, 0 for False)
teams['Game_played'].fillna(False, inplace=True)
teams['Game_played'] = teams['Game_played'].astype(int)

# Calculate cumulative 'Games_Count' for each row
teams['Games_Count'] = teams.groupby(['team_id', 'team_name'])['Game_played'].cumsum()



Total goals

In [None]:
teams['Total_Goals'] = teams.groupby(['team_id', 'team_name'])['team_score'].cumsum()


Goals per game

In [None]:
teams['Goals_per_game'] = teams['Total_Goals'] / teams['Games_Count']

Goals conceded

In [None]:
teams['Total_Conceded'] = teams.groupby(['team_id', 'team_name'])['opponent_score'].cumsum()


In [None]:
teams['Conceded_per_game'] = teams['Total_Conceded'] / teams['Games_Count']

Win

In [None]:
teams['Win'] = teams['team_score'] > teams['opponent_score'] 
teams['Win'] = teams['Win'].astype(int)


Draw

In [None]:
teams['Draw'] = teams['team_score'] == teams['opponent_score'] 
teams['Draw'] = teams['Draw'].astype(int)

Loss

In [None]:
teams['Loss'] = teams['team_score'] < teams['opponent_score'] 
teams['Loss'] = teams['Loss'].astype(int)

Total wins

In [None]:
teams['Total_wins'] = teams.groupby(['team_id', 'team_name'])['Win'].cumsum()


Win percentage

In [None]:
teams['Win_percentage'] = (teams['Total_wins'] / teams['Games_Count']) * 100

Game Results

In [None]:
teams['result'] = teams.apply(lambda row: 1 if row['Win'] else (-1 if row['Loss'] else 0), axis=1)


Form

In [None]:
def calculate_form(group, num_games):
    forms = []
    current_form = ""
    
    for i, row in group.iterrows():
        result = row['result']
        if row['Game_played'] == True:
            if result == 1:
                current_form += 'W'
            elif result == -1:
                current_form += 'L'
            else:
                current_form += 'D'
            
            if len(current_form) > num_games:
                current_form = current_form[1:]
            
            forms.append(current_form)

        else:
            forms.append(None)
    return forms

# Calculate the form for each specific 'team_id' and assign it to the 'Form' column for that team
unique_team_ids = teams['team_id'].unique()
for team_id in unique_team_ids:
    team_subset = teams[teams['team_id'] == team_id]
    team_forms = calculate_form(team_subset, 5)
    teams.loc[teams['team_id'] == team_id, 'Form'] = team_forms




Numeric Form

In [None]:
def calculate_numerical_form(group, num_games):
    numerical_forms = []
    current_form = []
    
    for i, row in group.iterrows():
        result = row['result']
        game_played = row['Game_played']
        
        if game_played:
            # Assign weights based on the position in the form
            weight = 1 + i / num_games
            
            if result == 1:
                value = 1
            elif result == -1:
                value = -1
            else:
                value = 0
            
            current_form.append(value * weight)
            
            if len(current_form) > num_games:
                current_form.pop(0)
            
            numerical_form = sum(current_form) / len(current_form)
            numerical_forms.append(numerical_form)
        
        else:
            numerical_forms.append(None)  # Append None for games that haven't been played
    
    return numerical_forms

# Calculate the numerical form for each specific 'team_id' and assign it to the 'Numerical_Form' column for that team
unique_team_ids = teams['team_id'].unique()
for team_id in unique_team_ids:
    team_subset = teams[teams['team_id'] == team_id]
    team_numerical_forms = calculate_numerical_form(team_subset, 5)
    teams.loc[teams['team_id'] == team_id, 'Numerical_Form'] = team_numerical_forms

print(teams)


Team strength

In [None]:
teams['Strength'] = teams['Numerical_Form']*2.5 + teams['fixture_difficulty']*2.0 + teams['Win_percentage']*2.0 + teams['Goals_per_game'] + teams['Conceded_per_game']*-1.0

In [None]:
Liverpool = teams[teams['team_id'] == 11]
Liverpool

### Home Stats

Games played home

In [None]:
# Calculate cumulative 'Games_Count' for each row
teams['Home_Count'] = teams[teams['is_home'] == True].groupby(['team_id', 'team_name'])['Game_played'].cumsum()



Goals at home

In [None]:
teams['Home_Goals'] = teams[teams['is_home'] == True].groupby(['team_id', 'team_name'])['team_score'].cumsum()


Goals per home game

In [None]:
teams['Goals_per_home'] = teams.apply(lambda row: row['Total_Goals'] / row['Home_Count'] if row['is_home'] else None, axis=1)


Goals conceded at home

In [None]:
teams['Home_Conceded'] = teams[teams['is_home'] == True].groupby(['team_id', 'team_name'])['opponent_score'].cumsum()


Conceded per home game 

In [None]:
teams['Conceded_per_home'] = teams.apply(lambda row: row['Home_Conceded'] / row['Home_Count'] if row['is_home'] else None, axis=1)


In [None]:
Liverpool = teams[teams['team_id'] == 11]
Liverpool

In [50]:
##
