The notebook is for pipeline all data preparation of the football match data

In [40]:
import re
import pandas as pd
import os
import numpy as np
from csv import reader
import plotly.express as px
import missingno as msno
import pickle

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.linear_model import LogisticRegression

pd.options.mode.chained_assignment = None

In [41]:
# define recent matches meaning
RECENT_PREFORMANCE_MATCH_COUNT = 3

Functions to get total goals so far for home team and away team in each game. The dataframe will call the apply() so that it will loop all records in the dataframe. And it will filter all records which round < current round and seperated with home and away for each team. The filter records then can calculate home total goals and away total goals

In [42]:
def getLeagueSeasonTeamBeforeRoundTotalGoal(data, league, season, team, round):
    # determine home or away and get the score 
    # get home game of the team
    home_pd = data[(data["League"]==league) & (data["Home_Team"]==team) & (data["Season"]==season) & (data["Round"]<round)]
    home_total_score = home_pd['Home_Score'].astype('Int64').sum()

    # get away game of the team
    away_pd = data[(data["League"]==league) & (data["Away_Team"]==team) & (data["Season"]==season) & (data["Round"]<round)]
    away_total_score = home_pd['Away_Score'].astype('Int64').sum()

    # calculate total goals
    return home_total_score, away_total_score


def fillWithTotalGoalSoFar(record, data):
    # get home team and away team and round
    league = record['League']
    season = record['Season']
    round = record['Round']
    hteam = record['Home_Team']
    ateam = record['Away_Team']
    
    hometeam_home_goal_so_far, hometeam_away_goal_so_far = getLeagueSeasonTeamBeforeRoundTotalGoal(data, league, season, hteam, round)
    awayteam_home_goal_so_far, awayteam_away_goal_so_far = getLeagueSeasonTeamBeforeRoundTotalGoal(data, league, season, ateam, round)

    return [hometeam_home_goal_so_far, hometeam_away_goal_so_far, awayteam_home_goal_so_far, awayteam_away_goal_so_far]

Function to get recent performance with apply() similarly

In [43]:
def findRecentPreviousRounds(currentRound, limit):
    if currentRound<=limit:
        return None
    else:
        r = []
        for l in range(limit):
            r.append(currentRound - (limit-l))
        return r


def findLeagueSeasonTeamRecentPreviousRounds(data, league, season, team, round):
    rounds = findRecentPreviousRounds(round, RECENT_PREFORMANCE_MATCH_COUNT)         # can change for optimization
    if rounds is None:
        return None

    previous_matches_pd =  data[(data["League"]==league) & ((data["Home_Team"]==team) | (data["Away_Team"]==team)) & (data["Season"]==season) & (data["Round"].isin(rounds))]
    recent_perf = 0
    for index, row in previous_matches_pd.iterrows():
        hteam = row['Home_Team']
        ateam = row['Away_Team']
        if hteam==team:
            recent_perf = recent_perf + (row['Home_Score']-row['Away_Score'])
        else:
            recent_perf = recent_perf + (row['Away_Score']-row['Home_Score'])

    return recent_perf


def fillWithRecentPerformance(record, data):
    # get home team and away team and round
    league = record['League']
    season = record['Season']
    round = record['Round']
    hteam = record['Home_Team']
    ateam = record['Away_Team']
    
    home_team_goal_diff = findLeagueSeasonTeamRecentPreviousRounds(data, league, season, hteam, round)
    away_team_goal_diff = findLeagueSeasonTeamRecentPreviousRounds(data, league, season, ateam, round)

    return [home_team_goal_diff, away_team_goal_diff]

In [16]:
# load all directory as league name list
dir = "./Results"
leagues = [name for name in os.listdir(dir) if os.path.isdir(os.path.join(dir, name))]

# loop to open csv
result_with_goal_sofar_pd = pd.DataFrame()
for league in leagues:
    league_folder = os.path.join(dir, league)
    csv_file_for_league = [os.path.join(league_folder, name) for name in os.listdir(league_folder) if name.endswith('.csv')]
    
    for csv_filename in csv_file_for_league:
        print(league, csv_filename)

        current_league_season_pd = pd.read_csv(csv_filename, skiprows=[0], names=["Home_Team", "Away_Team", "Result", "Link", "Season", "Round", "League"])

        # Divide result into home_score and away_score
        df_score =  current_league_season_pd['Result'].str.extract(r'(\d)-(\d)')
        current_league_season_pd.insert(loc=3, column="Home_Score", value=df_score[0].astype('Int64'))     # use Int64 as it support NaN
        current_league_season_pd.insert(loc=4, column="Away_Score", value=df_score[1].astype('Int64')) 

        if len(current_league_season_pd)>0:

            # get home team and away team total goal so far
            home_away_total_goal_sofar = current_league_season_pd.apply(fillWithTotalGoalSoFar, data=current_league_season_pd, axis=1)
            goal_so_far_list = np.array(home_away_total_goal_sofar.values.tolist())         # convert to list
            home_away_total_goal_sofar_pd = pd.DataFrame(goal_so_far_list, columns=["HOMETEAM_HOME_GOAL_SO_FAR", "HOMETEAM_AWAY_GOAL_SO_FAR", "AWAYTEAM_HOME_GOAL_SO_FAR", "AWAYTEAM_AWAY_GOAL_SO_FAR"])    # convert to dataframe
            current_league_season_pd.insert(loc=5, column="HOMETEAM_HOME_GOAL_SO_FAR", value=home_away_total_goal_sofar_pd["HOMETEAM_HOME_GOAL_SO_FAR"].astype('Int64')) 
            current_league_season_pd.insert(loc=6, column="HOMETEAM_AWAY_GOAL_SO_FAR", value=home_away_total_goal_sofar_pd["HOMETEAM_AWAY_GOAL_SO_FAR"].astype('Int64')) 
            current_league_season_pd.insert(loc=7, column="AWAYTEAM_HOME_GOAL_SO_FAR", value=home_away_total_goal_sofar_pd["AWAYTEAM_HOME_GOAL_SO_FAR"].astype('Int64'))     
            current_league_season_pd.insert(loc=8, column="AWAYTEAM_AWAY_GOAL_SO_FAR", value=home_away_total_goal_sofar_pd["AWAYTEAM_AWAY_GOAL_SO_FAR"].astype('Int64'))     

            # get recent performance
            home_away_recent_perf = current_league_season_pd.apply(fillWithRecentPerformance, data=current_league_season_pd, axis=1)
            perf_list = np.array(home_away_recent_perf.values.tolist())
            home_away_perf_pd = pd.DataFrame(perf_list, columns=["HOME_LASTEST_GOAL_DIFF", "AWAY_LASTEST_GOAL_DIFF"])
            current_league_season_pd.insert(loc=9, column="HOME_LASTEST_GOAL_DIFF", value=home_away_perf_pd["HOME_LASTEST_GOAL_DIFF"].astype('Int64')) 
            current_league_season_pd.insert(loc=10, column="AWAY_LASTEST_GOAL_DIFF", value=home_away_perf_pd["AWAY_LASTEST_GOAL_DIFF"].astype('Int64')) 

            result_with_goal_sofar_pd = pd.concat([result_with_goal_sofar_pd, current_league_season_pd])


result_with_goal_sofar_pd

championship ./Results/championship/Results_2014_championship.csv
championship ./Results/championship/Results_2021_championship.csv
championship ./Results/championship/Results_1991_championship.csv
championship ./Results/championship/Results_2000_championship.csv
championship ./Results/championship/Results_2004_championship.csv
championship ./Results/championship/Results_1995_championship.csv
championship ./Results/championship/Results_2010_championship.csv
championship ./Results/championship/Results_2015_championship.csv
championship ./Results/championship/Results_2001_championship.csv
championship ./Results/championship/Results_1990_championship.csv
championship ./Results/championship/Results_2020_championship.csv
championship ./Results/championship/Results_1994_championship.csv
championship ./Results/championship/Results_2005_championship.csv
championship ./Results/championship/Results_2011_championship.csv
championship ./Results/championship/Results_1993_championship.csv
championsh

Unnamed: 0,Home_Team,Away_Team,Result,Home_Score,Away_Score,HOMETEAM_HOME_GOAL_SO_FAR,HOMETEAM_AWAY_GOAL_SO_FAR,AWAYTEAM_HOME_GOAL_SO_FAR,AWAYTEAM_AWAY_GOAL_SO_FAR,HOME_LASTEST_GOAL_DIFF,AWAY_LASTEST_GOAL_DIFF,Link,Season,Round,League
0,Watford,Middlesbrough,1-0,1,0,0,0,0,0,,,https://www.besoccer.com/match/watford-fc/midd...,2021,1,championship
1,Birmingham City,Brentford,1-0,1,0,0,0,0,0,,,https://www.besoccer.com/match/birmingham-city...,2021,1,championship
2,Wycombe Wanderers,Rotherham United,0-1,0,1,0,0,0,0,,,https://www.besoccer.com/match/wycombe-wandere...,2021,1,championship
3,AFC Bournemouth,Blackburn Rovers,3-2,3,2,0,0,0,0,,,https://www.besoccer.com/match/afc-bournemouth...,2021,1,championship
4,Barnsley,Luton Town,0-1,0,1,0,0,0,0,,,https://www.besoccer.com/match/barnsley-fc/lut...,2021,1,championship
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,Pescara,Padova,1-2,1,2,32,15,22,15,5,-3,https://www.besoccer.com/match/pescara-calcio/...,1997,38,serie_b
376,Genoa,Palermo FC,4-1,4,1,33,12,24,24,2,-1,https://www.besoccer.com/match/genoa/palermo/1...,1997,38,serie_b
377,Torino,Ravenna FC,0-4,0,4,27,23,22,18,-2,-2,https://www.besoccer.com/match/torino-fc/raven...,1997,38,serie_b
378,Salernitana,Reggina,1-3,1,3,20,7,23,18,-2,3,https://www.besoccer.com/match/salernitana-cal...,1997,38,serie_b


In [17]:
result_with_goal_sofar_pd = result_with_goal_sofar_pd.dropna()
result_with_goal_sofar_pd

Unnamed: 0,Home_Team,Away_Team,Result,Home_Score,Away_Score,HOMETEAM_HOME_GOAL_SO_FAR,HOMETEAM_AWAY_GOAL_SO_FAR,AWAYTEAM_HOME_GOAL_SO_FAR,AWAYTEAM_AWAY_GOAL_SO_FAR,HOME_LASTEST_GOAL_DIFF,AWAY_LASTEST_GOAL_DIFF,Link,Season,Round,League
36,Coventry City,AFC Bournemouth,1-3,1,3,3,2,4,2,0,2,https://www.besoccer.com/match/coventry-city/a...,2021,4,championship
37,Norwich City,Derby County,0-1,0,1,2,2,0,6,0,-7,https://www.besoccer.com/match/norwich-city-fc...,2021,4,championship
38,Blackburn Rovers,Cardiff City,0-0,0,0,5,0,1,4,8,-1,https://www.besoccer.com/match/blackburn-rover...,2021,4,championship
39,Luton Town,Wycombe Wanderers,2-0,2,0,2,1,0,3,1,-8,https://www.besoccer.com/match/luton-town-fc/w...,2021,4,championship
40,Middlesbrough,Barnsley,2-1,2,1,1,1,0,1,-1,-3,https://www.besoccer.com/match/middlesbrough-f...,2021,4,championship
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,Pescara,Padova,1-2,1,2,32,15,22,15,5,-3,https://www.besoccer.com/match/pescara-calcio/...,1997,38,serie_b
376,Genoa,Palermo FC,4-1,4,1,33,12,24,24,2,-1,https://www.besoccer.com/match/genoa/palermo/1...,1997,38,serie_b
377,Torino,Ravenna FC,0-4,0,4,27,23,22,18,-2,-2,https://www.besoccer.com/match/torino-fc/raven...,1997,38,serie_b
378,Salernitana,Reggina,1-3,1,3,20,7,23,18,-2,3,https://www.besoccer.com/match/salernitana-cal...,1997,38,serie_b


In [18]:
result_with_goal_sofar_pd.count()

Home_Team                    133710
Away_Team                    133710
Result                       133710
Home_Score                   133710
Away_Score                   133710
HOMETEAM_HOME_GOAL_SO_FAR    133710
HOMETEAM_AWAY_GOAL_SO_FAR    133710
AWAYTEAM_HOME_GOAL_SO_FAR    133710
AWAYTEAM_AWAY_GOAL_SO_FAR    133710
HOME_LASTEST_GOAL_DIFF       133710
AWAY_LASTEST_GOAL_DIFF       133710
Link                         133710
Season                       133710
Round                        133710
League                       133710
dtype: int64

In [19]:
# load pickle and read content
d = pickle.load(open('./ELO/elo_dict.pkl', 'rb'))
d

{'https://www.besoccer.com/match/saarbrucken/stuttgarter-kickers/19903487': {'Elo_home': 56.0,
  'Elo_away': 59.0},
 'https://www.besoccer.com/match/sc-freiburg/unterhaching/19903488': {'Elo_home': 53.0,
  'Elo_away': 55.0},
 'https://www.besoccer.com/match/vfl-osnabruck/meppen/19903489': {'Elo_home': 52.0,
  'Elo_away': 53.0},
 'https://www.besoccer.com/match/rot-weiss-essen/schalke-04/19903490': {'Elo_home': 53.0,
  'Elo_away': 62.0},
 'https://www.besoccer.com/match/alemannia-aachen/msv-duisburg/19903491': {'Elo_home': 57.0,
  'Elo_away': 52.0},
 'https://www.besoccer.com/match/hessen-kassel/blau-weiss-1890-berlin/19903492': {'Elo_home': 51.0,
  'Elo_away': 56.0},
 'https://www.besoccer.com/match/wattenscheid-09/bayreuth-spvgg/19903493': {'Elo_home': 53.0,
  'Elo_away': 51.0},
 'https://www.besoccer.com/match/fortuna-koln/darmstadt-98/19903494': {'Elo_home': 56.0,
  'Elo_away': 55.0},
 'https://www.besoccer.com/match/hertha-bsc/preuben-munster/19903495': {'Elo_home': 55.0,
  'Elo_aw

In [20]:
elo_key_df = pd.DataFrame(d.keys(), columns=["link"])
elo_key_df

Unnamed: 0,link
0,https://www.besoccer.com/match/saarbrucken/stu...
1,https://www.besoccer.com/match/sc-freiburg/unt...
2,https://www.besoccer.com/match/vfl-osnabruck/m...
3,https://www.besoccer.com/match/rot-weiss-essen...
4,https://www.besoccer.com/match/alemannia-aache...
...,...
132106,https://www.besoccer.com/match/lecce/spal-1907...
132107,https://www.besoccer.com/match/frosinone-calci...
132108,https://www.besoccer.com/match/reggina/vicenza...
132109,https://www.besoccer.com/match/venezia/nuova-c...


In [21]:
elo_val_df = pd.DataFrame.from_dict(d.values())
elo_val_df

Unnamed: 0,Elo_home,Elo_away
0,56.0,59.0
1,53.0,55.0
2,52.0,53.0
3,53.0,62.0
4,57.0,52.0
...,...,...
132106,59.0,59.0
132107,58.0,57.0
132108,40.0,49.0
132109,54.0,47.0


In [22]:
elo_df = elo_key_df.join(elo_val_df)
elo_df

Unnamed: 0,link,Elo_home,Elo_away
0,https://www.besoccer.com/match/saarbrucken/stu...,56.0,59.0
1,https://www.besoccer.com/match/sc-freiburg/unt...,53.0,55.0
2,https://www.besoccer.com/match/vfl-osnabruck/m...,52.0,53.0
3,https://www.besoccer.com/match/rot-weiss-essen...,53.0,62.0
4,https://www.besoccer.com/match/alemannia-aache...,57.0,52.0
...,...,...,...
132106,https://www.besoccer.com/match/lecce/spal-1907...,59.0,59.0
132107,https://www.besoccer.com/match/frosinone-calci...,58.0,57.0
132108,https://www.besoccer.com/match/reggina/vicenza...,40.0,49.0
132109,https://www.besoccer.com/match/venezia/nuova-c...,54.0,47.0


In [None]:
"""
def fillWithELO(link):
    if link not in d:
        return [pd.NA, pd.NA]
    else:
        return [d[link]['Elo_home'], d[link]['Elo_away']]
"""

In [23]:
full_pd = result_with_goal_sofar_pd.merge(elo_df, left_on='Link', right_on='link')
full_pd

Unnamed: 0,Home_Team,Away_Team,Result,Home_Score,Away_Score,HOMETEAM_HOME_GOAL_SO_FAR,HOMETEAM_AWAY_GOAL_SO_FAR,AWAYTEAM_HOME_GOAL_SO_FAR,AWAYTEAM_AWAY_GOAL_SO_FAR,HOME_LASTEST_GOAL_DIFF,AWAY_LASTEST_GOAL_DIFF,Link,Season,Round,League,link,Elo_home,Elo_away
0,Coventry City,AFC Bournemouth,1-3,1,3,3,2,4,2,0,2,https://www.besoccer.com/match/coventry-city/a...,2021,4,championship,https://www.besoccer.com/match/coventry-city/a...,46.0,62.0
1,Norwich City,Derby County,0-1,0,1,2,2,0,6,0,-7,https://www.besoccer.com/match/norwich-city-fc...,2021,4,championship,https://www.besoccer.com/match/norwich-city-fc...,62.0,60.0
2,Blackburn Rovers,Cardiff City,0-0,0,0,5,0,1,4,8,-1,https://www.besoccer.com/match/blackburn-rover...,2021,4,championship,https://www.besoccer.com/match/blackburn-rover...,58.0,60.0
3,Luton Town,Wycombe Wanderers,2-0,2,0,2,1,0,3,1,-8,https://www.besoccer.com/match/luton-town-fc/w...,2021,4,championship,https://www.besoccer.com/match/luton-town-fc/w...,51.0,41.0
4,Middlesbrough,Barnsley,2-1,2,1,1,1,0,1,-1,-3,https://www.besoccer.com/match/middlesbrough-f...,2021,4,championship,https://www.besoccer.com/match/middlesbrough-f...,61.0,46.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120553,Pescara,Padova,1-2,1,2,32,15,22,15,5,-3,https://www.besoccer.com/match/pescara-calcio/...,1997,38,serie_b,https://www.besoccer.com/match/pescara-calcio/...,59.0,54.0
120554,Genoa,Palermo FC,4-1,4,1,33,12,24,24,2,-1,https://www.besoccer.com/match/genoa/palermo/1...,1997,38,serie_b,https://www.besoccer.com/match/genoa/palermo/1...,61.0,58.0
120555,Torino,Ravenna FC,0-4,0,4,27,23,22,18,-2,-2,https://www.besoccer.com/match/torino-fc/raven...,1997,38,serie_b,https://www.besoccer.com/match/torino-fc/raven...,63.0,54.0
120556,Salernitana,Reggina,1-3,1,3,20,7,23,18,-2,3,https://www.besoccer.com/match/salernitana-cal...,1997,38,serie_b,https://www.besoccer.com/match/salernitana-cal...,52.0,52.0


In [None]:
full_pd = full_pd.dropna()

In [24]:
# delete no value column
full_pd.drop('Result', inplace=True, axis=1)
full_pd.drop('Link', inplace=True, axis=1)
full_pd.drop('link', inplace=True, axis=1)
full_pd

Unnamed: 0,Home_Team,Away_Team,Home_Score,Away_Score,HOMETEAM_HOME_GOAL_SO_FAR,HOMETEAM_AWAY_GOAL_SO_FAR,AWAYTEAM_HOME_GOAL_SO_FAR,AWAYTEAM_AWAY_GOAL_SO_FAR,HOME_LASTEST_GOAL_DIFF,AWAY_LASTEST_GOAL_DIFF,Season,Round,League,Elo_home,Elo_away
0,Coventry City,AFC Bournemouth,1,3,3,2,4,2,0,2,2021,4,championship,46.0,62.0
1,Norwich City,Derby County,0,1,2,2,0,6,0,-7,2021,4,championship,62.0,60.0
2,Blackburn Rovers,Cardiff City,0,0,5,0,1,4,8,-1,2021,4,championship,58.0,60.0
3,Luton Town,Wycombe Wanderers,2,0,2,1,0,3,1,-8,2021,4,championship,51.0,41.0
4,Middlesbrough,Barnsley,2,1,1,1,0,1,-1,-3,2021,4,championship,61.0,46.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120553,Pescara,Padova,1,2,32,15,22,15,5,-3,1997,38,serie_b,59.0,54.0
120554,Genoa,Palermo FC,4,1,33,12,24,24,2,-1,1997,38,serie_b,61.0,58.0
120555,Torino,Ravenna FC,0,4,27,23,22,18,-2,-2,1997,38,serie_b,63.0,54.0
120556,Salernitana,Reggina,1,3,20,7,23,18,-2,3,1997,38,serie_b,52.0,52.0


In [32]:
# find who win H:Home A:Away D:Draw
def get_result(record):
    hscore = record['Home_Score']
    ascore = record['Away_Score']
    if hscore is pd.NA or ascore is pd.NA:
        return pd.NA
    if hscore==ascore:
        return 0
    elif hscore>ascore:
        return 1
    else:
        return -1

result_pd = full_pd.apply(get_result, axis=1)

#result_with_goal_sofar_pd.drop('Home_Score', inplace=True, axis=1)
#result_with_goal_sofar_pd.drop('Away_Score', inplace=True, axis=1)

full_pd.insert(loc=len(full_pd.columns), column="Result", value=result_pd.astype('Int64')) 
full_pd

Unnamed: 0,League,Season,Round,Home_Team,Away_Team,Elo_home,Elo_away,Home_Score,Away_Score,HOMETEAM_HOME_GOAL_SO_FAR,HOMETEAM_AWAY_GOAL_SO_FAR,AWAYTEAM_HOME_GOAL_SO_FAR,AWAYTEAM_AWAY_GOAL_SO_FAR,HOME_LASTEST_GOAL_DIFF,AWAY_LASTEST_GOAL_DIFF,Result
0,championship,2021,4,Coventry City,AFC Bournemouth,46.0,62.0,1,3,3,2,4,2,0,2,-1
1,championship,2021,4,Norwich City,Derby County,62.0,60.0,0,1,2,2,0,6,0,-7,-1
2,championship,2021,4,Blackburn Rovers,Cardiff City,58.0,60.0,0,0,5,0,1,4,8,-1,0
3,championship,2021,4,Luton Town,Wycombe Wanderers,51.0,41.0,2,0,2,1,0,3,1,-8,1
4,championship,2021,4,Middlesbrough,Barnsley,61.0,46.0,2,1,1,1,0,1,-1,-3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120553,serie_b,1997,38,Pescara,Padova,59.0,54.0,1,2,32,15,22,15,5,-3,-1
120554,serie_b,1997,38,Genoa,Palermo FC,61.0,58.0,4,1,33,12,24,24,2,-1,1
120555,serie_b,1997,38,Torino,Ravenna FC,63.0,54.0,0,4,27,23,22,18,-2,-2,-1
120556,serie_b,1997,38,Salernitana,Reggina,52.0,52.0,1,3,20,7,23,18,-2,3,-1


In [37]:
# reorder dataframe column
full_pd.insert(0, 'League', full_pd.pop('League'))
full_pd.insert(1, 'Season', full_pd.pop('Season'))
full_pd.insert(2, 'Round', full_pd.pop('Round'))
full_pd.insert(3, 'Home_Team', full_pd.pop('Home_Team'))
full_pd.insert(4, 'Away_Team', full_pd.pop('Away_Team'))
full_pd.insert(5, 'Elo_home', full_pd.pop('Elo_home'))
full_pd.insert(6, 'Elo_away', full_pd.pop('Elo_away'))
full_pd

Unnamed: 0,League,Season,Round,Home_Team,Away_Team,Elo_home,Elo_away,HOMETEAM_HOME_GOAL_SO_FAR,HOMETEAM_AWAY_GOAL_SO_FAR,AWAYTEAM_HOME_GOAL_SO_FAR,AWAYTEAM_AWAY_GOAL_SO_FAR,HOME_LASTEST_GOAL_DIFF,AWAY_LASTEST_GOAL_DIFF,Home_Score,Away_Score,Result
0,championship,2021,4,Coventry City,AFC Bournemouth,46.0,62.0,3,2,4,2,0,2,1,3,-1
1,championship,2021,4,Norwich City,Derby County,62.0,60.0,2,2,0,6,0,-7,0,1,-1
2,championship,2021,4,Blackburn Rovers,Cardiff City,58.0,60.0,5,0,1,4,8,-1,0,0,0
3,championship,2021,4,Luton Town,Wycombe Wanderers,51.0,41.0,2,1,0,3,1,-8,2,0,1
4,championship,2021,4,Middlesbrough,Barnsley,61.0,46.0,1,1,0,1,-1,-3,2,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120553,serie_b,1997,38,Pescara,Padova,59.0,54.0,32,15,22,15,5,-3,1,2,-1
120554,serie_b,1997,38,Genoa,Palermo FC,61.0,58.0,33,12,24,24,2,-1,4,1,1
120555,serie_b,1997,38,Torino,Ravenna FC,63.0,54.0,27,23,22,18,-2,-2,0,4,-1
120556,serie_b,1997,38,Salernitana,Reggina,52.0,52.0,20,7,23,18,-2,3,1,3,-1


In [38]:
full_pd.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 111652 entries, 0 to 120557
Data columns (total 16 columns):
 #   Column                     Non-Null Count   Dtype  
---  ------                     --------------   -----  
 0   League                     111652 non-null  object 
 1   Season                     111652 non-null  int64  
 2   Round                      111652 non-null  int64  
 3   Home_Team                  111652 non-null  object 
 4   Away_Team                  111652 non-null  object 
 5   Elo_home                   111652 non-null  float64
 6   Elo_away                   111652 non-null  float64
 7   HOMETEAM_HOME_GOAL_SO_FAR  111652 non-null  Int64  
 8   HOMETEAM_AWAY_GOAL_SO_FAR  111652 non-null  Int64  
 9   AWAYTEAM_HOME_GOAL_SO_FAR  111652 non-null  Int64  
 10  AWAYTEAM_AWAY_GOAL_SO_FAR  111652 non-null  Int64  
 11  HOME_LASTEST_GOAL_DIFF     111652 non-null  Int64  
 12  AWAY_LASTEST_GOAL_DIFF     111652 non-null  Int64  
 13  Home_Score                 11

In [39]:
# export to csv
full_pd.to_csv('cleaned_dataset.csv', index=False)

In [89]:
#*********************************************************************** END HERE