In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import zipfile
import os
from pickle import dump,load
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression as LR
from sklearn.metrics import plot_confusion_matrix
from sklearn.model_selection import GridSearchCV

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

In [105]:
#loading data set from fivethirtyeight.com
zf = zipfile.ZipFile('soccer-spi.zip')
raw_data=pd.read_csv(zf.open("soccer-spi/spi_matches_latest.csv"))
try:
    os.mkdir("data_sets")
except:
    "File Already Exists"
    pass


In [3]:
raw_data.head()

Unnamed: 0,season,date,league_id,league,team1,team2,spi1,spi2,prob1,prob2,probtie,proj_score1,proj_score2,importance1,importance2,score1,score2,xg1,xg2,nsxg1,nsxg2,adj_score1,adj_score2
0,2019,2019-03-01,1979,Chinese Super League,Shandong Luneng,Guizhou Renhe,48.22,37.83,0.5755,0.174,0.2505,1.75,0.84,45.9,22.1,1.0,0.0,1.39,0.26,2.05,0.54,1.05,0.0
1,2019,2019-03-01,1979,Chinese Super League,Guangzhou Evergrande,Tianjin Quanujian,65.59,39.99,0.7832,0.0673,0.1495,2.58,0.62,77.1,28.8,3.0,0.0,0.49,0.45,1.05,0.75,3.15,0.0
2,2019,2019-03-01,1979,Chinese Super League,Shanghai Greenland,Shanghai SIPG,39.81,60.08,0.2387,0.5203,0.241,1.22,1.89,25.6,63.4,0.0,4.0,0.57,2.76,0.8,1.5,0.0,3.26
3,2019,2019-03-01,1979,Chinese Super League,Wuhan Zall,Beijing Guoan,32.25,54.82,0.2276,0.5226,0.2498,1.1,1.79,35.8,58.9,0.0,1.0,1.12,0.97,1.51,0.94,0.0,1.05
4,2019,2019-03-01,1979,Chinese Super League,Chongqing Lifan,Guangzhou RF,38.24,40.45,0.4403,0.2932,0.2665,1.57,1.24,26.2,21.3,2.0,2.0,2.77,3.17,1.05,2.08,2.1,2.1


In [5]:
leagues = ['Belgian Jupiler League', 'French Ligue 1', 'Turkish Turkcell Super Lig', 'Barclays Premier League', 'Spanish Primera Division', 'Dutch Eredivisie', 'Portuguese Liga', 'German Bundesliga', 'Italy Serie A']

In [7]:
#Collecting indexs of relevant leagues
indexes = []
for league in leagues:
    indexes.extend(raw_data["league"][lambda x: x == league ].index)
    
#creating the data set
leagues_datas = raw_data.loc[indexes].rename(columns = { "team1" : "HomeTeam" ,"team2" : "AwayTeam" }).reset_index(drop = True)
leagues_datas["date"]= leagues_datas.apply(lambda row: "/".join(row["date"].split("-")[::-1])[0:6] + "/".join(row["date"].split("-")[::-1])[-2:], axis=1)
leagues_datas["league"] = leagues_datas["league"].map({'Belgian Jupiler League' : 'belgium', 'French Ligue 1' : 'france', 'Turkish Turkcell Super Lig' : 'turkey', 
                                                       'Barclays Premier League' : 'england', 'Spanish Primera Division' : 'spain', 
                                                       'Dutch Eredivisie' : 'netherland', 'Portuguese Liga' : 'portugal', 
                                                       'German Bundesliga' : 'germany', 'Italy Serie A' : 'italy'}) 

In [8]:
#Leagues Datas With Relevant Features
leagues_datas = leagues_datas.iloc[:,:17]
leagues_datas

Unnamed: 0,season,date,league_id,league,HomeTeam,AwayTeam,spi1,spi2,prob1,prob2,probtie,proj_score1,proj_score2,importance1,importance2,score1,score2
0,2020,08/08/20,1832,belgium,Club Brugge,Sporting de Charleroi,69.94,51.66,0.6516,0.1283,0.2201,1.87,0.68,,,0.0,1.0
1,2020,08/08/20,1832,belgium,Standard Liege,Cercle Brugge,51.06,32.24,0.6592,0.1316,0.2092,1.99,0.76,,,1.0,0.0
2,2020,08/08/20,1832,belgium,Antwerp,Mouscron-Peruwelz,48.33,37.83,0.5693,0.1831,0.2477,1.68,0.83,,,1.0,1.0
3,2020,09/08/20,1832,belgium,St. Truidense,KAA Gent,36.33,58.04,0.2581,0.5019,0.2401,1.21,1.77,,,2.0,1.0
4,2020,09/08/20,1832,belgium,SV Zulte Waregem,Genk,36.87,53.32,0.3032,0.4619,0.2349,1.43,1.81,,,1.0,2.0
5,2020,09/08/20,1832,belgium,KV Mechelen,Anderlecht,40.68,57.68,0.2939,0.4487,0.2574,1.21,1.55,,,2.0,2.0
6,2020,09/08/20,1832,belgium,KV Kortrijk,Waasland-Beveren,39.46,26.05,0.6135,0.159,0.2275,1.85,0.82,,,1.0,3.0
7,2020,10/08/20,1832,belgium,OH Leuven,Eupen,32.4,31.52,0.4717,0.2632,0.2651,1.5,1.05,,,1.0,1.0
8,2020,10/08/20,1832,belgium,KV Oostende,KFCO Beerschot-Wilrijk,28.49,32.79,0.4121,0.3182,0.2697,1.4,1.19,,,1.0,2.0
9,2020,14/08/20,1832,belgium,Mouscron-Peruwelz,KV Mechelen,37.83,40.68,0.4347,0.3013,0.264,1.48,1.18,,,0.0,1.0


## Aligning team names from two different data set

In [9]:
'''team_names_uk= []
for direction in os.listdir("football_data_uk"):
    
    league = pd.read_csv("football_data_uk/{}".format(direction))
    team_names_uk.extend(list(league["HomeTeam"].unique()))'''

'team_names_uk= []\nfor direction in os.listdir("football_data_uk"):\n    \n    league = pd.read_csv("football_data_uk/{}".format(direction))\n    team_names_uk.extend(list(league["HomeTeam"].unique()))'

In [10]:
#Team names from data set from fivethirtyeight.com
'''team_names_five = list(leagues_datas["HomeTeam"].unique())
team_names_dict = {}

for team_five in team_names_five:
    for team_uk in team_names_uk:
        if team_five == team_uk: 
            #adding match team names in the dictionary
            team_names_dict[team_five] = team_uk
            #removing matching teams for detecting non_mathces
            team_names_uk.remove(team_uk)
            team_names_five.remove(team_five)'''
  

'team_names_five = list(leagues_datas["HomeTeam"].unique())\nteam_names_dict = {}\n\nfor team_five in team_names_five:\n    for team_uk in team_names_uk:\n        if team_five == team_uk: \n            #adding match team names in the dictionary\n            team_names_dict[team_five] = team_uk\n            #removing matching teams for detecting non_mathces\n            team_names_uk.remove(team_uk)\n            team_names_five.remove(team_five)'

In [11]:
'''for i in team_names_uk:
    print(i)'''

'for i in team_names_uk:\n    print(i)'

In [12]:
'''for i in team_names_five:
    
    x = input(" {} : ".format(i)  )
    
    team_names_dict[i] = x
'''    

'for i in team_names_five:\n    \n    x = input(" {} : ".format(i)  )\n    \n    team_names_dict[i] = x\n'

In [13]:
#filehandler = open("team_dicts", "wb")
#dump(team_names_dict,filehandler)

In [14]:
#loading pickled dictonary of team names 
filehandler = open("team_dicts", "rb")
team_names_dict = load(filehandler)
team_names_dict

{'Club Brugge': 'Club Brugge',
 'Antwerp': 'Antwerp',
 'Genk': 'Genk',
 'Cercle Brugge': 'Cercle Brugge',
 'Eupen': 'Eupen',
 'Waasland-Beveren': 'Waasland-Beveren',
 'Lille': 'Lille',
 'Nimes': 'Nimes',
 'Nice': 'Nice',
 'Strasbourg': 'Strasbourg',
 'Nantes': 'Nantes',
 'Metz': 'Metz',
 'Brest': 'Brest',
 'Montpellier': 'Montpellier',
 'Marseille': 'Marseille',
 'Sivasspor': 'Sivasspor',
 'Galatasaray': 'Galatasaray',
 'Kayserispor': 'Kayserispor',
 'Trabzonspor': 'Trabzonspor',
 'Yeni Malatyaspor': 'Yeni Malatyaspor',
 'Denizlispor': 'Denizlispor',
 'Alanyaspor': 'Alanyaspor',
 'Kasimpasa': 'Kasimpasa',
 'Fenerbahce': 'Fenerbahce',
 'Fulham': 'Fulham',
 'Liverpool': 'Liverpool',
 'Sheffield United': 'Sheffield United',
 'Everton': 'Everton',
 'Arsenal': 'Arsenal',
 'Newcastle': 'Newcastle',
 'Aston Villa': 'Aston Villa',
 'Burnley': 'Burnley',
 'Eibar': 'Eibar',
 'Cadiz': 'Cadiz',
 'Villarreal': 'Villarreal',
 'Getafe': 'Getafe',
 'Elche': 'Elche',
 'Barcelona': 'Barcelona',
 'Levant

In [15]:
#Changing varied team names into one for a team
leagues_datas["HomeTeam"] = leagues_datas["HomeTeam"].map( team_names_dict)
leagues_datas["AwayTeam"] = leagues_datas["AwayTeam"].map(team_names_dict)

try:
    os.mkdir("league_datas")
except:
    "File Already Exists"
    pass
leagues = list(leagues_datas["league"].unique())
for league in leagues:
    data_index = leagues_datas["league"][lambda x: x == league].index
    data_frame = leagues_datas.loc[data_index]
    csv_dir= "{}".format(league)
    data_frame.to_csv("league_datas/{}.csv".format( csv_dir),index=False)

## Combining Two Data Sets Together

In [16]:
def column_placer(data_set,updated_data,league ) :
    
    away_home = {"away" : "AwayTeam", "home" : "HomeTeam"}
    
    #Features will be seperated according dictionaries below
    away_features = { "AW" : "W", "AD" : "D", "AL" : "L", "FTAG" : "FTG", "HTAG" : "HTG", "FTAGC" : "FTGC", "HTAGC" : "HTGC",
                     "AS" : "S", "AST" : "ST", "AF": "F", "AC" : "C", "AY" : "Y", "AR" : "R"}
    home_features = { "HW" : "W", "HD" : "D", "HL" : "L", "FTHG" : "FTG", "FTHGC" : "FTGC", "HTHGC" : "HTGC",
                     "HTHG" : "HTG", "HS" : "S", "HST" : "ST", "HF": "F", "HC" : "C", "HY" : "Y", "HR" : "R"}
    
    
    #Condition according to choice of "data_set" argument  
    
    if away_home[data_set] == "AwayTeam":
        
        data_features = away_features
    else:
        
        data_features = home_features
    
    #Itreation of each data feature
    for feature in data_features:
        
        column_scalars= []
        
        for i in range(updated_data.shape[0]):
            
            team = updated_data[ away_home[data_set] ][i]
            date = updated_data["date"][i]
            
            #Loading indiviual team datas
            data = pd.read_csv("team_league_data/{}/{}.csv".format(league, team ))
            
            #Reshaping "date" column scalars accoring to updated data set. 
            if len(data["Date"][0])>8:
                data["Date"] = data.apply(lambda row: row["Date"][0:6] + row["Date"][-2:], axis=1)
            
            #Error handing for nan valued scalar in a team's data set
            try: 
                match_index = data["Date"][data["Date"] == date].index[0]
                column_scalars.append(data[data_features[feature]].loc[match_index])
            except IndexError:
                print("An Error Occured : ", date, team)
                column_scalars.append(None)
        
        #Adding new features with its scalars
        updated_data[feature] = column_scalars
    
    updated_data.to_csv("data_sets/{}.csv".format(league),index = False)

## Creating Belgium Training Data Set

In [17]:
#Data generated from fivethirtytwo data set
belgium_five = pd.read_csv("league_datas/belgium.csv")

In [18]:
belgium_five.shape

(306, 17)

In [19]:
belgium_uk = pd.read_csv("football_data_uk/belgium.csv")

In [20]:
belgium_five

Unnamed: 0,season,date,league_id,league,HomeTeam,AwayTeam,spi1,spi2,prob1,prob2,probtie,proj_score1,proj_score2,importance1,importance2,score1,score2
0,2020,08/08/20,1832,belgium,Club Brugge,Charleroi,69.94,51.66,0.6516,0.1283,0.2201,1.87,0.68,,,0.0,1.0
1,2020,08/08/20,1832,belgium,Standard,Cercle Brugge,51.06,32.24,0.6592,0.1316,0.2092,1.99,0.76,,,1.0,0.0
2,2020,08/08/20,1832,belgium,Antwerp,Mouscron,48.33,37.83,0.5693,0.1831,0.2477,1.68,0.83,,,1.0,1.0
3,2020,09/08/20,1832,belgium,St Truiden,Gent,36.33,58.04,0.2581,0.5019,0.2401,1.21,1.77,,,2.0,1.0
4,2020,09/08/20,1832,belgium,Waregem,Genk,36.87,53.32,0.3032,0.4619,0.2349,1.43,1.81,,,1.0,2.0
5,2020,09/08/20,1832,belgium,Mechelen,Anderlecht,40.68,57.68,0.2939,0.4487,0.2574,1.21,1.55,,,2.0,2.0
6,2020,09/08/20,1832,belgium,Kortrijk,Waasland-Beveren,39.46,26.05,0.6135,0.159,0.2275,1.85,0.82,,,1.0,3.0
7,2020,10/08/20,1832,belgium,Oud-Heverlee Leuven,Eupen,32.4,31.52,0.4717,0.2632,0.2651,1.5,1.05,,,1.0,1.0
8,2020,10/08/20,1832,belgium,Oostende,Beerschot VA,28.49,32.79,0.4121,0.3182,0.2697,1.4,1.19,,,1.0,2.0
9,2020,14/08/20,1832,belgium,Mouscron,Mechelen,37.83,40.68,0.4347,0.3013,0.264,1.48,1.18,,,0.0,1.0


In [21]:
belgium_uk

Unnamed: 0,Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR,B365H,B365D,B365A,BWH,BWD,BWA,IWH,IWD,IWA,PSH,PSD,PSA,WHH,WHD,WHA,VCH,VCD,VCA,MaxH,MaxD,MaxA,AvgH,AvgD,AvgA,B365>2.5,B365<2.5,P>2.5,P<2.5,Max>2.5,Max<2.5,Avg>2.5,Avg<2.5,AHh,B365AHH,B365AHA,PAHH,PAHA,MaxAHH,MaxAHA,AvgAHH,AvgAHA,B365CH,B365CD,B365CA,BWCH,BWCD,BWCA,IWCH,IWCD,IWCA,PSCH,PSCD,PSCA,WHCH,WHCD,WHCA,VCCH,VCCD,VCCA,MaxCH,MaxCD,MaxCA,AvgCH,AvgCD,AvgCA,B365C>2.5,B365C<2.5,PC>2.5,PC<2.5,MaxC>2.5,MaxC<2.5,AvgC>2.5,AvgC<2.5,AHCh,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA
0,B1,08/08/2020,15:30,Club Brugge,Charleroi,0,1,A,0,0,D,17,6,5,4,10,11,10,2,2,3,0,0,1.5,3.8,6.0,1.5,4.0,6.5,1.53,3.8,6.0,1.53,4.13,6.81,1.5,4.0,6.5,1.53,4.0,6.5,1.59,4.33,7.1,1.52,4.0,6.26,1.95,1.85,1.93,1.9,2.0,1.99,1.91,1.87,-1.0,1.95,1.9,1.96,1.88,2.02,1.97,1.94,1.89,1.57,3.75,5.25,1.62,3.8,5.75,1.63,3.6,5.5,1.63,3.97,5.75,1.57,3.8,6.0,1.62,3.9,5.75,1.67,4.05,6.45,1.61,3.81,5.63,2.05,1.75,2.05,1.83,2.13,1.88,2.03,1.78,-0.75,1.77,2.1,1.82,2.09,1.85,2.12,1.79,2.06
1,B1,08/08/2020,18:00,Antwerp,Mouscron,1,1,D,0,0,D,12,4,9,2,8,5,7,3,2,1,0,0,1.3,5.0,10.0,1.3,5.25,9.0,1.33,4.8,8.75,1.32,5.34,10.09,1.29,5.0,11.0,,,,1.37,5.5,11.5,1.31,5.11,9.27,1.66,2.15,1.72,2.17,1.78,2.27,1.68,2.14,-1.5,2.0,1.85,2.09,1.79,2.12,1.9,1.99,1.84,1.44,4.2,6.0,1.48,4.6,6.25,1.47,4.1,6.25,1.52,4.41,6.35,1.44,4.5,6.5,1.45,4.6,7.0,1.52,5.13,7.7,1.46,4.42,6.52,1.7,2.1,1.73,2.19,1.78,2.27,1.69,2.13,-1.0,1.83,2.02,1.89,2.01,1.9,2.25,1.79,2.05
2,B1,08/08/2020,18:00,Standard,Cercle Brugge,1,0,H,0,0,D,11,6,8,1,10,11,5,4,1,1,0,0,1.4,4.5,6.5,1.4,4.75,6.75,1.43,4.4,6.75,1.43,4.75,7.44,1.4,4.5,7.5,1.44,4.5,7.0,1.5,4.98,7.5,1.43,4.56,6.84,1.7,2.1,1.72,2.18,1.78,2.28,1.69,2.14,-1.25,2.0,1.85,2.03,1.82,2.06,1.9,1.98,1.85,1.45,4.5,6.5,1.48,4.4,6.75,1.45,4.1,6.75,1.51,4.16,7.25,1.44,4.33,7.0,1.5,4.33,6.5,1.54,4.65,7.5,1.48,4.26,6.6,1.72,2.07,1.77,2.12,1.83,2.19,1.73,2.09,-1.0,1.85,2.0,1.91,1.99,1.97,2.13,1.86,1.99
3,B1,09/08/2020,12:30,St Truiden,Gent,2,1,H,1,1,D,11,13,8,2,19,13,6,3,2,2,0,0,4.5,3.8,1.65,4.6,4.0,1.67,4.3,3.85,1.7,4.54,4.19,1.72,4.33,4.0,1.7,,,,4.7,4.5,1.76,4.36,4.05,1.7,1.53,2.4,1.56,2.47,1.57,2.53,1.54,2.43,0.75,1.93,1.93,1.96,1.88,1.97,1.97,1.93,1.89,5.5,4.0,1.6,4.5,4.25,1.62,4.9,3.8,1.63,5.05,4.13,1.66,5.25,4.0,1.6,5.25,4.1,1.62,5.55,4.33,1.69,4.96,4.02,1.63,1.75,2.05,1.81,2.07,1.82,2.21,1.73,2.09,0.75,2.05,1.8,2.06,1.84,2.13,1.85,2.04,1.81
4,B1,09/08/2020,15:00,Waregem,Genk,1,2,A,1,0,H,8,15,6,4,15,16,9,8,3,1,0,0,3.75,3.6,1.9,3.7,3.75,1.87,3.45,3.65,1.95,3.65,3.91,1.97,3.5,3.8,1.91,,,,3.75,4.2,2.01,3.52,3.79,1.93,1.5,2.5,1.53,2.57,1.53,2.6,1.49,2.52,0.5,1.93,1.93,1.91,1.94,1.97,1.98,1.89,1.92,3.8,4.0,1.8,3.6,3.75,1.9,3.65,3.65,1.85,3.99,4.03,1.85,3.75,3.9,1.83,4.0,3.9,1.85,4.05,4.15,2.66,3.79,3.88,1.85,1.53,2.4,1.58,2.48,1.93,2.6,1.54,2.43,0.5,2.05,1.8,2.05,1.85,2.1,1.87,2.01,1.83
5,B1,09/08/2020,17:15,Mechelen,Anderlecht,2,2,D,0,0,D,10,7,4,2,21,17,2,7,3,3,0,0,4.5,3.5,1.8,4.4,3.5,1.8,4.0,3.4,1.85,4.33,3.6,1.89,4.2,3.5,1.83,,,,4.6,3.86,1.9,4.14,3.53,1.84,1.75,2.05,1.81,2.02,1.88,2.14,1.77,2.03,0.5,1.98,1.88,1.98,1.86,2.05,1.95,1.97,1.85,3.4,3.4,2.1,3.3,3.4,2.1,3.05,3.25,2.25,3.24,3.44,2.27,3.2,3.3,2.2,3.13,3.5,2.25,3.56,3.54,2.31,3.19,3.37,2.2,1.8,2.0,1.83,2.06,1.9,2.1,1.8,2.0,0.25,1.88,1.98,1.93,1.97,1.98,2.03,1.91,1.94
6,B1,09/08/2020,19:45,Kortrijk,Waasland-Beveren,1,3,A,0,1,A,11,8,4,6,15,14,4,1,2,4,0,0,1.6,3.8,4.75,1.67,4.0,4.5,1.63,3.85,4.8,1.65,4.18,5.08,1.62,4.0,5.0,,,,1.7,4.45,5.4,1.63,4.02,4.89,1.65,2.2,1.66,2.28,1.67,2.28,1.64,2.21,-1.0,2.07,1.72,2.12,1.78,2.19,1.85,2.09,1.76,1.53,4.0,5.75,1.6,4.1,5.0,1.57,3.85,5.5,1.6,4.05,5.85,1.52,4.0,6.0,1.6,4.2,5.5,1.64,4.3,6.6,1.57,4.02,5.64,1.72,2.07,1.76,2.13,1.81,2.16,1.72,2.1,-1.0,2.05,1.8,2.03,1.87,2.12,1.94,2.0,1.84
7,B1,10/08/2020,18:00,Oud-Heverlee Leuven,Eupen,1,1,D,0,0,D,4,16,3,6,12,11,5,8,1,2,0,0,2.3,3.4,3.0,2.3,3.4,2.9,2.35,3.25,2.85,2.4,3.56,2.92,2.35,3.4,2.88,,,,2.48,3.7,3.04,2.35,3.42,2.86,1.85,1.95,1.85,1.97,1.94,2.05,1.84,1.95,-0.25,2.05,1.8,2.06,1.79,2.14,1.84,2.05,1.78,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8,B1,10/08/2020,18:00,Oostende,Beerschot VA,1,2,A,0,1,A,6,6,2,5,18,12,12,7,3,2,0,0,2.3,3.4,3.0,2.3,3.5,2.9,2.3,3.3,2.9,2.34,3.63,2.97,2.3,3.5,2.9,,,,2.42,3.76,3.1,2.29,3.47,2.93,1.8,2.0,1.79,2.07,1.9,2.1,1.79,2.01,-0.25,2.0,1.85,2.01,1.83,2.08,1.9,1.99,1.83,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9,B1,14/08/2020,18:00,Mouscron,Mechelen,0,1,A,0,1,A,9,7,1,4,9,18,5,5,3,3,0,0,3.5,3.4,2.05,3.6,3.5,2.05,3.35,3.3,2.1,3.54,3.62,2.08,3.5,3.4,2.05,3.5,3.5,2.1,3.7,3.62,2.2,3.42,3.48,2.08,1.8,2.0,1.85,2.03,1.91,2.09,1.82,1.98,0.25,2.05,1.8,2.09,1.81,2.14,1.86,2.03,1.81,4.0,3.5,1.9,3.6,3.5,2.05,3.75,3.45,1.9,3.92,3.86,1.91,3.3,3.4,2.1,3.9,3.7,1.9,4.2,3.86,2.1,3.78,3.61,1.92,1.8,2.0,1.88,2.0,1.92,2.11,1.82,1.98,0.5,1.98,1.88,1.99,1.91,2.06,1.94,1.94,1.89


In [22]:
#Indices until first "nan" score
data_updated = belgium_five.loc[:257]
data_updated

Unnamed: 0,season,date,league_id,league,HomeTeam,AwayTeam,spi1,spi2,prob1,prob2,probtie,proj_score1,proj_score2,importance1,importance2,score1,score2
0,2020,08/08/20,1832,belgium,Club Brugge,Charleroi,69.94,51.66,0.6516,0.1283,0.2201,1.87,0.68,,,0.0,1.0
1,2020,08/08/20,1832,belgium,Standard,Cercle Brugge,51.06,32.24,0.6592,0.1316,0.2092,1.99,0.76,,,1.0,0.0
2,2020,08/08/20,1832,belgium,Antwerp,Mouscron,48.33,37.83,0.5693,0.1831,0.2477,1.68,0.83,,,1.0,1.0
3,2020,09/08/20,1832,belgium,St Truiden,Gent,36.33,58.04,0.2581,0.5019,0.2401,1.21,1.77,,,2.0,1.0
4,2020,09/08/20,1832,belgium,Waregem,Genk,36.87,53.32,0.3032,0.4619,0.2349,1.43,1.81,,,1.0,2.0
5,2020,09/08/20,1832,belgium,Mechelen,Anderlecht,40.68,57.68,0.2939,0.4487,0.2574,1.21,1.55,,,2.0,2.0
6,2020,09/08/20,1832,belgium,Kortrijk,Waasland-Beveren,39.46,26.05,0.6135,0.159,0.2275,1.85,0.82,,,1.0,3.0
7,2020,10/08/20,1832,belgium,Oud-Heverlee Leuven,Eupen,32.4,31.52,0.4717,0.2632,0.2651,1.5,1.05,,,1.0,1.0
8,2020,10/08/20,1832,belgium,Oostende,Beerschot VA,28.49,32.79,0.4121,0.3182,0.2697,1.4,1.19,,,1.0,2.0
9,2020,14/08/20,1832,belgium,Mouscron,Mechelen,37.83,40.68,0.4347,0.3013,0.264,1.48,1.18,,,0.0,1.0


In [23]:
column_placer("home", data_updated, "belgium")
column_placer("away", data_updated, "belgium")

An Error Occured :  14/02/21 Charleroi


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


An Error Occured :  14/02/21 Charleroi
An Error Occured :  14/02/21 Charleroi
An Error Occured :  14/02/21 Charleroi
An Error Occured :  14/02/21 Charleroi
An Error Occured :  14/02/21 Charleroi
An Error Occured :  14/02/21 Charleroi
An Error Occured :  14/02/21 Charleroi
An Error Occured :  14/02/21 Charleroi
An Error Occured :  14/02/21 Charleroi
An Error Occured :  14/02/21 Charleroi
An Error Occured :  14/02/21 Charleroi
An Error Occured :  14/02/21 Charleroi
An Error Occured :  14/02/21 Club Brugge
An Error Occured :  14/02/21 Club Brugge
An Error Occured :  14/02/21 Club Brugge
An Error Occured :  14/02/21 Club Brugge
An Error Occured :  14/02/21 Club Brugge
An Error Occured :  14/02/21 Club Brugge
An Error Occured :  14/02/21 Club Brugge
An Error Occured :  14/02/21 Club Brugge
An Error Occured :  14/02/21 Club Brugge
An Error Occured :  14/02/21 Club Brugge
An Error Occured :  14/02/21 Club Brugge
An Error Occured :  14/02/21 Club Brugge
An Error Occured :  14/02/21 Club Brugge

## Creating England Training Data Set

In [24]:
england_five = pd.read_csv("league_datas/england.csv")
#england_five

In [25]:
england_uk = pd.read_csv("football_data_uk/england.csv")
#england_uk

In [26]:
data_updated = england_five.loc[:262]


In [27]:
column_placer("home", data_updated, "england")
column_placer("away", data_updated, "england")

An Error Occured :  17/01/21 Aston Villa


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


An Error Occured :  17/01/21 Aston Villa
An Error Occured :  17/01/21 Aston Villa
An Error Occured :  17/01/21 Aston Villa
An Error Occured :  17/01/21 Aston Villa
An Error Occured :  17/01/21 Aston Villa
An Error Occured :  17/01/21 Aston Villa
An Error Occured :  17/01/21 Aston Villa
An Error Occured :  17/01/21 Aston Villa
An Error Occured :  17/01/21 Aston Villa
An Error Occured :  17/01/21 Aston Villa
An Error Occured :  17/01/21 Aston Villa
An Error Occured :  17/01/21 Aston Villa
An Error Occured :  17/01/21 Everton
An Error Occured :  17/01/21 Everton
An Error Occured :  17/01/21 Everton
An Error Occured :  17/01/21 Everton
An Error Occured :  17/01/21 Everton
An Error Occured :  17/01/21 Everton
An Error Occured :  17/01/21 Everton
An Error Occured :  17/01/21 Everton
An Error Occured :  17/01/21 Everton
An Error Occured :  17/01/21 Everton
An Error Occured :  17/01/21 Everton
An Error Occured :  17/01/21 Everton
An Error Occured :  17/01/21 Everton


## Creating France Training Data Set

In [28]:
france_five = pd.read_csv("league_datas/france.csv")
france_uk = pd.read_csv("football_data_uk/france.csv")

In [29]:
france_five

Unnamed: 0,season,date,league_id,league,HomeTeam,AwayTeam,spi1,spi2,prob1,prob2,probtie,proj_score1,proj_score2,importance1,importance2,score1,score2
0,2020,21/08/20,1843,france,Bordeaux,Nantes,59.92,59.31,0.4538,0.2626,0.2836,1.38,0.98,,,0.0,0.0
1,2020,22/08/20,1843,france,Dijon,Angers,54.91,59.5,0.4038,0.3013,0.2948,1.24,1.03,,,0.0,1.0
2,2020,22/08/20,1843,france,Lille,Rennes,70.39,65.18,0.5088,0.2242,0.267,1.54,0.93,,,1.0,1.0
3,2020,23/08/20,1843,france,Monaco,Reims,67.42,58.47,0.5583,0.194,0.2477,1.72,0.91,,,2.0,2.0
4,2020,23/08/20,1843,france,Nimes,Brest,54.6,54.9,0.4498,0.2865,0.2637,1.53,1.17,,,4.0,0.0
5,2020,23/08/20,1843,france,Lorient,Strasbourg,52.87,60.44,0.3774,0.3471,0.2754,1.33,1.26,,,3.0,1.0
6,2020,23/08/20,1843,france,Nice,Lens,59.29,53.53,0.5143,0.2338,0.2519,1.69,1.05,,,2.0,1.0
7,2020,28/08/20,1843,france,Lyon,Dijon,73.68,53.5,0.6947,0.1019,0.2035,2.0,0.61,58.2,28.4,4.0,1.0
8,2020,29/08/20,1843,france,Rennes,Montpellier,66.79,59.83,0.536,0.2,0.264,1.57,0.85,38.5,23.2,2.0,1.0
9,2020,29/08/20,1843,france,Strasbourg,Nice,58.5,57.34,0.4677,0.2674,0.2649,1.54,1.1,25.2,18.3,0.0,2.0


In [30]:
#france_uk

In [31]:
data_updated = france_five.loc[:278]

In [32]:
column_placer("home", data_updated, "france")
column_placer("away", data_updated, "france")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


## Creating Germany Training Data Set

In [33]:
germany_five = pd.read_csv("league_datas/germany.csv")
data_updated = germany_five.loc[:205]

In [34]:
column_placer("home", data_updated, "germany")
column_placer("away", data_updated, "germany")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


## Creating Italy Training Data Set

In [35]:
italy_five = pd.read_csv("league_datas/italy.csv")
data_updated = italy_five.loc[:246]

In [36]:
column_placer("home", data_updated, "italy")
column_placer("away", data_updated, "italy")

An Error Occured :  02/03/21 Lazio


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


An Error Occured :  02/03/21 Lazio
An Error Occured :  02/03/21 Lazio
An Error Occured :  02/03/21 Lazio
An Error Occured :  02/03/21 Lazio
An Error Occured :  02/03/21 Lazio
An Error Occured :  02/03/21 Lazio
An Error Occured :  02/03/21 Lazio
An Error Occured :  02/03/21 Lazio
An Error Occured :  02/03/21 Lazio
An Error Occured :  02/03/21 Lazio
An Error Occured :  02/03/21 Lazio
An Error Occured :  02/03/21 Lazio
An Error Occured :  02/03/21 Torino
An Error Occured :  02/03/21 Torino
An Error Occured :  02/03/21 Torino
An Error Occured :  02/03/21 Torino
An Error Occured :  02/03/21 Torino
An Error Occured :  02/03/21 Torino
An Error Occured :  02/03/21 Torino
An Error Occured :  02/03/21 Torino
An Error Occured :  02/03/21 Torino
An Error Occured :  02/03/21 Torino
An Error Occured :  02/03/21 Torino
An Error Occured :  02/03/21 Torino
An Error Occured :  02/03/21 Torino


## Creating Netherland Training Data Set

In [37]:
netherland_five = pd.read_csv("league_datas/netherland.csv")
#netherland_five

In [38]:
data_updated = netherland_five.loc[:214]
column_placer("home", data_updated, "netherland")
column_placer("away", data_updated, "netherland")

An Error Occured :  07/02/21 Ajax


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


An Error Occured :  07/02/21 Ajax
An Error Occured :  07/02/21 Ajax
An Error Occured :  07/02/21 Ajax
An Error Occured :  07/02/21 Ajax
An Error Occured :  07/02/21 Ajax
An Error Occured :  07/02/21 Ajax
An Error Occured :  07/02/21 Ajax
An Error Occured :  07/02/21 Ajax
An Error Occured :  07/02/21 Ajax
An Error Occured :  07/02/21 Ajax
An Error Occured :  07/02/21 Ajax
An Error Occured :  07/02/21 Ajax
An Error Occured :  07/02/21 Utrecht
An Error Occured :  07/02/21 Utrecht
An Error Occured :  07/02/21 Utrecht
An Error Occured :  07/02/21 Utrecht
An Error Occured :  07/02/21 Utrecht
An Error Occured :  07/02/21 Utrecht
An Error Occured :  07/02/21 Utrecht
An Error Occured :  07/02/21 Utrecht
An Error Occured :  07/02/21 Utrecht
An Error Occured :  07/02/21 Utrecht
An Error Occured :  07/02/21 Utrecht
An Error Occured :  07/02/21 Utrecht
An Error Occured :  07/02/21 Utrecht


## Creating Portugal Training Data Set

In [39]:
portugal_five = pd.read_csv("league_datas/portugal.csv")
#portugal_five

In [40]:
data_updated = portugal_five.loc[:188]
column_placer("home", data_updated, "portugal")
column_placer("away", data_updated, "portugal")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


## Creating Spain Training Data Set

In [41]:
spain_five = pd.read_csv("league_datas/spain.csv")
#spain_five

In [42]:
data_updated = spain_five.loc[:247]
column_placer("home", data_updated, "spain")
column_placer("away", data_updated, "spain")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


## Creating Turkey Training Data Set

In [43]:
turkey_five = pd.read_csv("league_datas/turkey.csv")
turkey_five

Unnamed: 0,season,date,league_id,league,HomeTeam,AwayTeam,spi1,spi2,prob1,prob2,probtie,proj_score1,proj_score2,importance1,importance2,score1,score2
0,2020,11/09/20,1882,turkey,Rizespor,Fenerbahce,35.79,53.99,0.2791,0.4795,0.2414,1.31,1.78,,,1.0,2.0
1,2020,12/09/20,1882,turkey,Sivasspor,Alanyaspor,45.58,49.17,0.4071,0.3323,0.2606,1.5,1.33,18.0,29.9,0.0,2.0
2,2020,12/09/20,1882,turkey,Karagumruk,Yeni Malatyaspor,31.06,35.13,0.4097,0.3144,0.2759,1.38,1.17,39.4,34.5,3.0,0.0
3,2020,12/09/20,1882,turkey,Goztep,Denizlispor,38.3,31.3,0.5139,0.2217,0.2644,1.56,0.93,29.0,37.9,5.0,1.0
4,2020,12/09/20,1882,turkey,Galatasaray,Gaziantep,57.76,41.08,0.6505,0.1425,0.207,2.09,0.86,50.2,17.8,3.0,1.0
5,2020,13/09/20,1882,turkey,Ankaragucu,Erzurum BB,31.55,31.42,0.4516,0.2705,0.278,1.41,1.03,42.3,40.4,1.0,2.0
6,2020,13/09/20,1882,turkey,Kayserispor,Kasimpasa,32.56,41.8,0.3726,0.3823,0.2452,1.57,1.6,36.5,19.5,1.0,0.0
7,2020,13/09/20,1882,turkey,Antalyaspor,Genclerbirligi,38.35,32.67,0.5058,0.2304,0.2638,1.57,0.97,29.8,34.9,2.0,0.0
8,2020,13/09/20,1882,turkey,Trabzonspor,Besiktas,62.46,57.76,0.5132,0.2627,0.2241,2.02,1.4,69.9,57.7,1.0,3.0
9,2020,14/09/20,1882,turkey,Hatayspor,Buyuksehyr,33.28,58.89,0.204,0.5597,0.2364,1.02,1.84,36.0,53.2,2.0,0.0


In [44]:
data_updated = turkey_five.loc[:276]
column_placer("home", data_updated, "turkey")
column_placer("away", data_updated, "turkey")

An Error Occured :  30/01/21 Alanyaspor


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


An Error Occured :  30/01/21 Alanyaspor
An Error Occured :  30/01/21 Alanyaspor
An Error Occured :  30/01/21 Alanyaspor
An Error Occured :  30/01/21 Alanyaspor
An Error Occured :  30/01/21 Alanyaspor
An Error Occured :  30/01/21 Alanyaspor
An Error Occured :  30/01/21 Alanyaspor
An Error Occured :  30/01/21 Alanyaspor
An Error Occured :  30/01/21 Alanyaspor
An Error Occured :  30/01/21 Alanyaspor
An Error Occured :  30/01/21 Alanyaspor
An Error Occured :  30/01/21 Alanyaspor
An Error Occured :  30/01/21 Sivasspor
An Error Occured :  30/01/21 Sivasspor
An Error Occured :  30/01/21 Sivasspor
An Error Occured :  30/01/21 Sivasspor
An Error Occured :  30/01/21 Sivasspor
An Error Occured :  30/01/21 Sivasspor
An Error Occured :  30/01/21 Sivasspor
An Error Occured :  30/01/21 Sivasspor
An Error Occured :  30/01/21 Sivasspor
An Error Occured :  30/01/21 Sivasspor
An Error Occured :  30/01/21 Sivasspor
An Error Occured :  30/01/21 Sivasspor
An Error Occured :  30/01/21 Sivasspor
