# Get the predictions
Website: https://projects.fivethirtyeight.com/soccer-predictions/  
Data source (git): https://github.com/fivethirtyeight/data/tree/master/soccer-spi/

Gets predictions of all leagues (my selection) for the matches of today and the next 30 days.

Stores them in `./data/This_months_predictions.csv`.

In [None]:
import pandas as pd
from datetime import date, timedelta

# show all columns
pd.options.display.max_columns = 0

In [None]:
# read their entire CSV
df = pd.read_csv('https://projects.fivethirtyeight.com/soccer-api/club/spi_matches.csv', parse_dates=[0])

# transform text fields to lowercase
df.league = df.league.str.lower()
df.team1 = df.team1.str.lower()
df.team2 = df.team2.str.lower()

# show
print(df.shape)
df.head()

In [None]:
# use only some leagues
leagues = pd.DataFrame({'league_id': [1849, 1843, 1845, 1854, 1869, 2411]})

# append league names
leagues = leagues.merge(df[['league_id', 'league']], on='league_id', how='left').drop_duplicates().reset_index(drop=True)

# show
leagues

In [None]:
# filter only these leagues
df = df[df.league_id.isin(leagues.league_id)]
print(df.shape)

In [None]:
# filter matches from today up to 30 days from now
today = date.today()
delta = timedelta(30)
print(today, ':', today+delta)
df = df[(df.date.dt.date >= today) & (df.date.dt.date <= today+delta)]
print(df.shape)

In [None]:
# sort by league_id and date
df = df.sort_values(by=['league_id', 'date'])

In [None]:
# show
df.head()

In [None]:
# save to CSV
df.to_csv('./data/This_months_predictions.csv', index=False)