# Get predictions
Website: https://projects.fivethirtyeight.com/soccer-predictions/  
Data source (git): https://github.com/fivethirtyeight/data/tree/master/soccer-spi/

Get's predictions of all leagues (my selection) for the matches of today and the next 30 days.

In [1]:
import pandas as pd
from datetime import date, timedelta

# show all columns
pd.options.display.max_columns = 0

In [2]:
# read their entire CSV
df = pd.read_csv('https://projects.fivethirtyeight.com/soccer-api/club/spi_matches.csv', parse_dates=[0])

# transform text fields to lowercase
df.league = df.league.str.lower()
df.team1 = df.team1.str.lower()
df.team2 = df.team2.str.lower()

# show
print(df.shape)
df.head()

(20898, 22)


Unnamed: 0,date,league_id,league,team1,team2,spi1,spi2,prob1,prob2,probtie,proj_score1,proj_score2,importance1,importance2,score1,score2,xg1,xg2,nsxg1,nsxg2,adj_score1,adj_score2
0,2016-08-12,1843,french ligue 1,bastia,paris saint-germain,51.16,85.68,0.0463,0.838,0.1157,0.91,2.36,32.4,67.7,0.0,1.0,0.97,0.63,0.43,0.45,0.0,1.05
1,2016-08-12,1843,french ligue 1,as monaco,guingamp,68.85,56.48,0.5714,0.1669,0.2617,1.82,0.86,53.7,22.9,2.0,2.0,2.45,0.77,1.75,0.42,2.1,2.1
2,2016-08-13,2411,barclays premier league,hull city,leicester city,53.57,66.81,0.3459,0.3621,0.2921,1.16,1.24,38.1,22.2,2.0,1.0,0.85,2.77,0.17,1.25,2.1,1.05
3,2016-08-13,2411,barclays premier league,burnley,swansea city,58.98,59.74,0.4482,0.2663,0.2854,1.37,1.05,36.5,29.1,0.0,1.0,1.24,1.84,1.71,1.56,0.0,1.05
4,2016-08-13,2411,barclays premier league,middlesbrough,stoke city,56.32,60.35,0.438,0.2692,0.2927,1.3,1.01,33.9,32.5,1.0,1.0,1.4,0.55,1.13,1.06,1.05,1.05


In [3]:
# use only some leagues
leagues = pd.DataFrame({'league_id': [1849, 1843, 1845, 1854, 1869, 2411]})

# append league names
leagues = leagues.merge(df[['league_id', 'league']], on='league_id', how='left').drop_duplicates().reset_index(drop=True)

# show
leagues

Unnamed: 0,league_id,league
0,1849,dutch eredivisie
1,1843,french ligue 1
2,1845,german bundesliga
3,1854,italy serie a
4,1869,spanish primera division
5,2411,barclays premier league


In [4]:
# filter only these leagues
df = df[df.league_id.isin(leagues.league_id)]
print(df.shape)

(6090, 22)


In [5]:
# filter matches from today up to 30 days from now
today = date.today()
delta = timedelta(30)
print(today, ':', today+delta)
df = df[(df.date.dt.date >= today) & (df.date.dt.date <= today+delta)]
print(df.shape)

2018-10-29 : 2018-11-28
(177, 22)


In [6]:
# sort by league_id and date
df = df.sort_values(by=['league_id', 'date'])

In [7]:
# show
df.head()

Unnamed: 0,date,league_id,league,team1,team2,spi1,spi2,prob1,prob2,probtie,proj_score1,proj_score2,importance1,importance2,score1,score2,xg1,xg2,nsxg1,nsxg2,adj_score1,adj_score2
15101,2018-11-02,1843,french ligue 1,paris saint-germain,lille,90.33,63.93,0.8037,0.0639,0.1324,2.78,0.67,12.4,65.7,,,,,,,,
15192,2018-11-03,1843,french ligue 1,lyon,bordeaux,73.47,58.71,0.6671,0.1256,0.2072,2.06,0.76,68.8,17.9,,,,,,,,
15216,2018-11-03,1843,french ligue 1,strasbourg,toulouse,59.63,54.25,0.4875,0.2367,0.2758,1.48,0.94,16.3,33.7,,,,,,,,
15217,2018-11-03,1843,french ligue 1,nice,amiens,61.76,52.89,0.5192,0.199,0.2818,1.45,0.78,19.2,38.6,,,,,,,,
15218,2018-11-03,1843,french ligue 1,reims,as monaco,56.15,64.82,0.3535,0.3559,0.2906,1.19,1.2,27.9,26.1,,,,,,,,


In [8]:
# save to CSV
df.to_csv('./data/This_month_prediction.csv', index=False)