In [10]:
import requests
import json
import pandas as pd
import datetime as dt

In [11]:
# Read stats from NWSL website
stats_columns = ['Home Goals', 'Away Goals', 'Home Fouls', 'Away Fouls', 'Home Yellow Cards', 'Away Yellow Cards', 'Home Red Cards', 'Away Red Cards']

def read_stats(url):
    page = requests.get(url)
    response = json.loads(page.content)

    home_stats = response['data']['team']['stats']
    away_stats = response['data']['opponent']['stats']
    results = response['data']['results']

    return [
        results['team_score'],
        results['opponent_score'],
        home_stats['fouls'],
        away_stats['fouls'],
        home_stats['yellow_cards'],
        away_stats['yellow_cards'],
        home_stats['red_cards'],
        away_stats['red_cards']
    ]

In [12]:
# Generate dataframe from 
cols = ['Date', 'Home Team', 'Away Team', 'Home Goals', 'Away Goals', 'Home Fouls', 'Away Fouls', 'Home Yellow Cards', 'Away Yellow Cards', 'Home Red Cards', 'Away Red Cards']
df = pd.DataFrame([], columns=cols)
df

Unnamed: 0,Date,Home Team,Away Team,Home Goals,Away Goals,Home Fouls,Away Fouls,Home Yellow Cards,Away Yellow Cards,Home Red Cards,Away Red Cards


In [13]:
# Read in FBRef data and input it into the dataframe
df_fbref = pd.read_csv('matches_fbref.csv')
df_fbref.dropna(subset=['Score'],inplace=True)
df_fbref['Local Time'] = df_fbref['Time'].str.split(' ').str[0]
df_fbref['Date'] = pd.to_datetime(df_fbref['Date'] + ' ' + df_fbref['Local Time'])
df_fbref.rename(columns={'Home': 'Home Team', 'Away': 'Away Team'}, inplace=True)

In [14]:
# Create mapping from team to URL param
mapping = {
    'Chicago': 'chicago-red-stars',
    'Washington': 'washington-spirit',
    'Reign': 'ol-reign',
    'North Carolina': 'north-carolina-courage',
    'Houston': 'houston-dash',
    'Racing Louisville': 'racing-louisville-fc',
    'Gotham FC': 'nj-ny-gotham-fc',
    'Kansas City': 'kansas-city',
    'Orlando': 'orlando-pride',
    'Portland': 'portland-thorns-fc',
}

In [15]:
# Generate URLs for loading data and add it to df
for index, row in df_fbref.iterrows():
    param = mapping[row['Home Team']] + '-vs-' + mapping[row['Away Team']] + '-' + row['Date'].strftime('%Y-%m-%d')
    url = 'https://d2nkt8hgeld8zj.cloudfront.net/services/nwsl.ashx/match?slug=' + param
    out = read_stats(url)
    df = df.append(pd.DataFrame([[row['Date'], row['Home Team'], row['Away Team']] + out], columns=cols))

In [16]:
df.to_csv('matches.csv', index=False)