In [43]:
import pymc3 as pm
import theano as t
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline

In [2]:
import requests
response = requests.get("https://www.mlssoccer.com/results")
response.status_code

200

In [4]:
from bs4 import BeautifulSoup
soup = BeautifulSoup(response.text, 'lxml')
table = soup.find('div', 'results-map')
bool(table)

True

In [5]:
tables = table.find_all('table')
len(tables)

2

In [8]:
teams = [line.text.strip() for line in tables[0].find_all('tr')[1:]]
", ".join(teams)

'ATL, CHI, CLB, COL, DAL, DC, HOU, LA, MIN, MTL, NE, NY, NYC, ORL, PHI, POR, RSL, SEA, SJ, SKC, TOR, VAN'

In [36]:
results = [[result.text.strip().split("\n") for result in line.find_all('div', 'result-map-tip-info')[:32]] for line in tables[1].find_all('tr')[1:]]
results[0]

[['Sunday, March 5', 'vs. NY Red Bulls', '1-2'],
 ['Sunday, March 12', 'at Minnesota', '1-6'],
 ['Saturday, March 18', 'vs. Chicago', '4-0'],
 ['Friday, March 31', 'at Seattle', '0-0'],
 ['Saturday, April 8', 'at Toronto FC', '2-2'],
 ['Saturday, April 15', 'at Montreal', '2-1'],
 ['Saturday, April 22', 'at Real Salt Lake', '1-3'],
 ['Sunday, April 30', 'vs. D.C. United', '1-3'],
 ['Sunday, May 7', 'at NYCFC', '3-1'],
 ['Sunday, May 14', 'at Portland', '1-1'],
 ['Saturday, May 20', 'vs. Houston', '4-1'],
 ['Sunday, May 28', 'vs. NYCFC', '3-1'],
 ['Saturday, June 3', 'at Vancouver', '3-1'],
 ['Saturday, June 10', 'at Chicago', '2-0'],
 ['Saturday, June 17', 'vs. Columbus', '3-1'],
 ['Wednesday, June 21', 'at D.C. United', '2-1'],
 ['Saturday, June 24', 'vs. Colorado', '1-0'],
 ['Saturday, July 1', 'at Columbus', '0-2'],
 ['Tuesday, July 4', 'vs. San Jose', '4-2'],
 ['Friday, July 21', 'at Orlando', '0-1'],
 ['Saturday, July 29', 'vs. Orlando', '1-1'],
 ['Sunday, August 6', 'at Sporting 

In [37]:
opponents = [" ".join(item[1].split()[1:]) for result in results for item in result]
opponents = np.unique(opponents)
opponents[4], opponents[5] = opponents[5], opponents[4]
opponents[10], opponents[11], opponents[12] = opponents[12], opponents[10], opponents[11]
opponents[-5], opponents[-4] = opponents[-4], opponents[-5]
opponents

array(['Atlanta', 'Chicago', 'Colorado', 'Columbus', 'FC Dallas',
       'D.C. United', 'Houston', 'LA Galaxy', 'Minnesota', 'Montreal',
       'New England', 'NY Red Bulls', 'NYCFC', 'Orlando', 'Philadelphia',
       'Portland', 'Real Salt Lake', 'Seattle', 'San Jose', 'Sporting KC',
       'Toronto FC', 'Vancouver'], 
      dtype='<U14')

In [38]:
mapping = list(zip(teams, opponents))
mapping = {long: short for short, long in mapping}
mapping

{'Atlanta': 'ATL',
 'Chicago': 'CHI',
 'Colorado': 'CLB',
 'Columbus': 'COL',
 'D.C. United': 'DC',
 'FC Dallas': 'DAL',
 'Houston': 'HOU',
 'LA Galaxy': 'LA',
 'Minnesota': 'MIN',
 'Montreal': 'MTL',
 'NY Red Bulls': 'NY',
 'NYCFC': 'NYC',
 'New England': 'NE',
 'Orlando': 'ORL',
 'Philadelphia': 'PHI',
 'Portland': 'POR',
 'Real Salt Lake': 'RSL',
 'San Jose': 'SJ',
 'Seattle': 'SEA',
 'Sporting KC': 'SKC',
 'Toronto FC': 'TOR',
 'Vancouver': 'VAN'}

In [42]:
def parse_game(team, game):
    opponent = mapping[" ".join(game[1].split()[1:])]
    home = 0 if game[1].split()[0].startswith("at") else 1
    date = game[0]
    home_score, away_score = map(int, game[2].split("-"))
    
    if home:
        hometeam = team
        awayteam = opponent
    else:
        hometeam = opponent
        awayteam = team
        
    result = {
        "hometeam": hometeam,
        "awayteam": awayteam,
        "homescore": home_score,
        "awayscore": away_score,
        "gamedate": date
    }
    
    return result

games = [parse_game(team, game) for team, games in zip(teams, results) for game in games]
games[0]

{'awayscore': 2,
 'awayteam': 'NY',
 'gamedate': 'Sunday, March 5',
 'homescore': 1,
 'hometeam': 'ATL'}

In [52]:
intmapping = {abbr : i for i, abbr in enumerate(sorted(mapping.values()))}
intmapping

{'ATL': 0,
 'CHI': 1,
 'CLB': 2,
 'COL': 3,
 'DAL': 4,
 'DC': 5,
 'HOU': 6,
 'LA': 7,
 'MIN': 8,
 'MTL': 9,
 'NE': 10,
 'NY': 11,
 'NYC': 12,
 'ORL': 13,
 'PHI': 14,
 'POR': 15,
 'RSL': 16,
 'SEA': 17,
 'SJ': 18,
 'SKC': 19,
 'TOR': 20,
 'VAN': 21}

In [53]:
import pandas as pd
df = pd.DataFrame(games)
df["awayteamint"] = df.awayteam.map(intmapping)
df["hometeamint"] = df.hometeam.map(intmapping)
df.head()

Unnamed: 0,awayscore,awayteam,gamedate,homescore,hometeam,awayteamint,hometeamint
0,2,NY,"Sunday, March 5",1,ATL,11,0
1,6,ATL,"Sunday, March 12",1,MIN,0,8
2,0,CHI,"Saturday, March 18",4,ATL,1,0
3,0,ATL,"Friday, March 31",0,SEA,0,17
4,2,ATL,"Saturday, April 8",2,TOR,0,20


In [None]:
with pm.Model() as model:
    offense = pm.Poisson("offense", 2, shape = len(teams))
    defense = pm.Poisson("defense", 1, shape = len(teams))
    home_adv = pm.Poisson("home_adv", 1, shape = len(teams))
    
    home_teams = pm.Deterministic("home_teams", )
    away_teams = pm.Categorical("away_teams", shape = len(games))
    