In [2]:
from functools import lru_cache
import json
import xml.etree.ElementTree as ET

import pandas as pd
import py2neo
import requests

In [3]:
def parse_xml(xml_str):
    parsed = ET.fromstring(xml_str)
    gms = parsed.find('gms')
    week = gms.get('w')
    year = gms.get('y')
    for gm in gms:
        yield {
            'home_team': gm.get('hnn'),
            'home_score': gm.get('hs'),
            'away_team': gm.get('vnn'),
            'away_score': gm.get('vs'),
            'week': week,
            'year': year
        }

In [4]:
url = 'http://nfl.com/ajax/scorestrip/'

def get_scores(season, seasonType, week):
    params = dict(
        season=str(season), 
        seasonType=seasonType, 
        week=str(week))
    data = requests.get(url, params=params)
    return data.text

In [4]:
season = [get_scores('2017', 'REG', str(week)) for week in range(1, 18)]

In [5]:
season[0]

'<?xml version="1.0" encoding="UTF-8"?>\n<ss><gms gd="0" w="1" y="2017" t="R"><g eid="2017090700" gsis="57234" d="Thu" t="8:30" q="P" k="" h="NE" hnn="patriots" hs="" v="KC" vnn="chiefs" vs="" p="" rz="" ga="" gt="REG"/><g eid="2017091000" gsis="57235" d="Sun" t="1:00" q="P" k="" h="BUF" hnn="bills" hs="" v="NYJ" vnn="jets" vs="" p="" rz="" ga="" gt="REG"/><g eid="2017091001" gsis="57236" d="Sun" t="1:00" q="P" k="" h="CHI" hnn="bears" hs="" v="ATL" vnn="falcons" vs="" p="" rz="" ga="" gt="REG"/><g eid="2017091002" gsis="57237" d="Sun" t="1:00" q="P" k="" h="CIN" hnn="bengals" hs="" v="BAL" vnn="ravens" vs="" p="" rz="" ga="" gt="REG"/><g eid="2017091003" gsis="57238" d="Sun" t="1:00" q="P" k="" h="CLE" hnn="browns" hs="" v="PIT" vnn="steelers" vs="" p="" rz="" ga="" gt="REG"/><g eid="2017091004" gsis="57239" d="Sun" t="1:00" q="P" k="" h="DET" hnn="lions" hs="" v="ARI" vnn="cardinals" vs="" p="" rz="" ga="" gt="REG"/><g eid="2017091005" gsis="57240" d="Sun" t="1:00" q="P" k="" h="HOU"

In [6]:
parsed = [game for week in season for game in parse_xml(week)]

In [9]:
len(parsed)

256

In [10]:
parsed[0], parsed[-1]

({'away_score': '',
  'away_team': 'chiefs',
  'home_score': '',
  'home_team': 'patriots',
  'week': '1',
  'year': '2017'},
 {'away_score': '',
  'away_team': '49ers',
  'home_score': '',
  'home_team': 'rams',
  'week': '17',
  'year': '2017'})

In [7]:
with open('./data/nfl_schedule_2017.json') as f:
    json.dump(parsed, f)

# Pandas Experiments

In [27]:
df = pd.DataFrame(parsed)

In [36]:
df['home_score'] = pd.to_numeric(df['home_score'])
df['away_score'] = pd.to_numeric(df['away_score'])

In [37]:
eagles = df[(df.away_team == 'eagles') | (df.home_team == 'eagles')].copy()

In [38]:
def f(team, row):
    if row.home_team == team:
        return row.home_score - row.away_score
    elif row.away_team == team:
        return row.away_score - row.home_score
    else:
        return None

In [None]:
f_p = functools.partial(f, 'eagles')
eagles.apply(f_p)

# Neo4j Experiments

In [42]:
graph = py2neo.Graph(bolt=True, password='theone')

In [51]:
@lru_cache(maxsize=None)
def mkteam(team):
    team_node = py2neo.Node('Team', name=team)
    graph.create(team_node)
    return team_node

In [None]:
def mkgame(week):
    game_node = py2neo.Node('Game', week=week)
    graph.create(game_node)
    return game_node

In [None]:
@lru_cache(maxsize=None)
def mkweek(week):
    week_node = py2neo.Node('Week', week=week)
    graph.create(week_node)
    return week_node

In [None]:
@lru_cache(maxsize=None)
def mkseason(year):
    season_node = py2neo.Node('Season', year=year)
    graph.create(season_node)
    return season_node

In [52]:
for game in parsed:
    season_node = mkseason(game['year'])
    week_node = mkweek(game['week'])
    home_team_node = mkteam(game['home_team'])
    away_team_node = mkteam(game['away_team'])
    game_node = mkgame(game['week'])
    
    graph.create(py2neo.Relationship(home_team_node, 'PLAYED', game_node, points=game['home_score']))
    graph.create(py2neo.Relationship(away_team_node, 'PLAYED', game_node, points=game['away_score']))
    graph.create(py2neo.Relationship(game_node, 'PART_OF', week_node))
    graph.create(py2neo.Relationship(week_node, 'PART_OF', season_node))

In [214]:
query = """
match (team:Team)-[:PLAYED]->(game:Game)<-[:PLAYED]-(opponent:Team)
where team.name = 'eagles'
return team, game, opponent;
"""

In [None]:
result = graph.run(query)

In [215]:
for item in result:
    print(item.data())

{'team': (eagles:Team {name:"eagles"}), 'game': (cdf6dc5:Game), 'opponent': (cowboys:Team {name:"cowboys"})}
{'team': (eagles:Team {name:"eagles"}), 'game': (db7bac8:Game), 'opponent': (giants:Team {name:"giants"})}
{'team': (eagles:Team {name:"eagles"}), 'game': (f15307b:Game), 'opponent': (ravens:Team {name:"ravens"})}
{'team': (eagles:Team {name:"eagles"}), 'game': (d2eb9c8:Game), 'opponent': (redskins:Team {name:"redskins"})}
{'team': (eagles:Team {name:"eagles"}), 'game': (aed8af4:Game), 'opponent': (bengals:Team {name:"bengals"})}
{'team': (eagles:Team {name:"eagles"}), 'game': (a35cef9:Game), 'opponent': (packers:Team {name:"packers"})}
{'team': (eagles:Team {name:"eagles"}), 'game': (fc7f6df:Game), 'opponent': (seahawks:Team {name:"seahawks"})}
{'team': (eagles:Team {name:"eagles"}), 'game': (cbafa58:Game), 'opponent': (falcons:Team {name:"falcons"})}
{'team': (eagles:Team {name:"eagles"}), 'game': (f078bb0:Game), 'opponent': (giants:Team {name:"giants"})}
{'team': (eagles:Team

In [119]:
query = """
match path=(:Team {name:{team1}})-[:PLAYED*0..4]-(:Team {name:{team2}})
return path
"""

In [1]:
result = graph.run(query, team1='eagles', team2='giants')

In [120]:
result_lst = list(result)

In [192]:
def chunk(arr, size):
    for i in range(0, len(arr), size):
        yield arr[i: i+size]

In [216]:
for ch in chunk([1, 2, 3, 4, 5, 6, 7, 8, 9], 2):
    print(ch)

[1, 2]
[3, 4]
[5, 6]
[7, 8]
[9]


In [213]:
for path in result_lst:
    rels = []
    for pth in py2neo.walk(path['path']):
        _rels = pth.relationships()
        if _rels:
            rels.append(_rels)
    full_msg = ''
    for rel in chunk(rels, 2):
        pth1 = rel[0][0]
        pth2 = rel[1][0]
 
        t1 = pth1.start_node().get('name')
        t1pts = int(dict(pth1).get('points'))
        
        t2 = pth2.start_node().get('name')
        t2pts = int(dict(pth2).get('points'))
        
        diff = t1pts - t2pts
        outcome = 'beat' if diff > 0 else 'lost to'
        
        msg = f'{t1} {outcome} {t2} by {abs(diff)} in week ?'
        full_msg += msg + ' and '
    print(full_msg.rstrip(' and '))

eagles lost to cowboys by 6 in week ? and cowboys lost to giants by 1 in week ?
eagles beat cowboys by 14 in week ? and cowboys lost to giants by 1 in week ?
eagles lost to redskins by 7 in week ? and redskins beat giants by 2 in week ?
eagles lost to redskins by 5 in week ? and redskins beat giants by 2 in week ?
eagles beat vikings by 11 in week ? and vikings beat giants by 14 in week ?
eagles lost to packers by 14 in week ? and packers beat giants by 7 in week ?
eagles lost to ravens by 1 in week ? and ravens lost to giants by 4 in week ?
eagles lost to giants by 5 in week ?
eagles lost to bengals by 18 in week ? and bengals lost to giants by 1 in week ?
eagles beat bears by 15 in week ? and bears lost to giants by 6 in week ?
eagles beat browns by 19 in week ? and browns lost to giants by 14 in week ?
eagles beat steelers by 31 in week ? and steelers beat giants by 10 in week ?
eagles lost to cowboys by 6 in week ? and cowboys lost to giants by 3 in week ?
eagles beat cowboys by 14