In [26]:
import asyncio
import datetime
import functools
from functools import lru_cache
import json
import os
from pprint import pprint
import xml.etree.ElementTree as ET

import aiohttp
import async_timeout
import pandas as pd
import py2neo
import requests

In [65]:
pd.set_option('notebook_repr_html', False)

In [74]:
def parse_xml(xml_str):
    parsed = ET.fromstring(xml_str)
    gms = parsed.find('gms')
    week = int(gms.get('w'))
    year = int(gms.get('y'))
    for gm in gms:
        yield {
            'home_team': gm.get('hnn'),
            'home_score': int(gm.get('hs')),
            'away_team': gm.get('vnn'),
            'away_score': int(gm.get('vs')),
            'week': week,
            'year': year
        }

In [28]:
def get_scores_params(week, season, season_type):
    if isinstance(week, str):
        week = int(week)
    
    if week < 1 or week > 18:
        raise ValueError('Week should be in the range: (1, 18)')
    
    now = datetime.datetime.now()
    
    if season is None:
        season = now.year
    else:
        if isinstance(season, str):
            season = int(season)

        if season < 1970 or season > now.year:
            raise ValueError(f'Season should be in the range: (1970, {now.year})')
        
    return dict(
        season=str(season),
        seasonType=season_type,
        week=str(week)
    )

In [29]:
def get_scores_sync(week, season=None, season_type='REG'):
    params = get_scores_params(week, season, season_type)
    r = requests.get('http://nfl.com/ajax/scorestrip/', params=params)
    
    if r.status_code == requests.status_codes.codes.ok:
        return parse_xml(r.text)

In [30]:
async def get_scores_async(week, season=None, season_type='REG'):
    params = get_scores_params(week, season, season_type)
    async with aiohttp.ClientSession() as session:
        async with async_timeout.timeout(10):
            async with session.get('http://nfl.com/ajax/scorestrip/', params=params) as resp:
                return await resp.text()

In [99]:
async def main(async_scores):
    result = await asyncio.gather(*(get_scores_async(week, season='2016') for week in range(1, 18)))
    for item in result:
        async_scores.extend(list(parse_xml(item)))

In [89]:
%%time
async_scores = []
loop = asyncio.get_event_loop()
loop.run_until_complete(main(async_scores))

CPU times: user 156 ms, sys: 24 ms, total: 180 ms
Wall time: 5.94 s


In [90]:
len(async_scores)

256

In [69]:
%%time
sync_scores = [game for week in range(1, 18) for game in get_scores_sync(week, season='2016')]

CPU times: user 504 ms, sys: 4 ms, total: 508 ms
Wall time: 14.5 s


In [94]:
len(sync_scores)

256

In [8]:
nfl_scores_2016 = './data/nfl_scores_2016.json'

In [101]:
with open(nfl_scores_2016, 'w') as f:
    json.dump(async_scores, f)

In [9]:
with open(nfl_scores_2016, 'r') as f:
    df = pd.DataFrame(json.load(f))

In [143]:
df.head()

   away_score   away_team  home_score home_team  week  year
0          20    panthers          21   broncos     1  2016
1          31  buccaneers          24   falcons     1  2016
2           7       bills          13    ravens     1  2016
3          14       bears          23    texans     1  2016
4          27     packers          23   jaguars     1  2016

In [106]:
df.dtypes

away_score     int64
away_team     object
home_score     int64
home_team     object
week           int64
year           int64
dtype: object

In [142]:
def team_details(df, team):
    team_df = df[(df.away_team == team) | (df.home_team == team)].copy()
    
    def f(row):
        if row.home_team == team:
            return row.home_score - row.away_score
        elif row.away_team == team:
            return row.away_score - row.home_score
        else:
            return None
    
    team_df['score_diff'] = team_df.apply(f, axis=1)
    return team_df

In [115]:
eagles = team_details(df, 'eagles')

In [116]:
eagles

     away_score away_team  home_score home_team  week  year  score_diff
8            10    browns          29    eagles     1  2016          19
31           29    eagles          14     bears     2  2016          15
43            3  steelers          34    eagles     3  2016          31
65           23    eagles          24     lions     5  2016          -1
86           20    eagles          27  redskins     6  2016          -7
100          10   vikings          21    eagles     7  2016          11
118          23    eagles          29   cowboys     8  2016          -6
126          23    eagles          28    giants     9  2016          -5
137          15   falcons          24    eagles    10  2016           9
158          15    eagles          26  seahawks    11  2016         -11
176          27   packers          13    eagles    12  2016         -14
181          14    eagles          32   bengals    13  2016         -18
200          27  redskins          22    eagles    14  2016     

In [117]:
eagles.score_diff.mean()

2.25

In [118]:
eagles['total_game_score'] = eagles.away_score + eagles.home_score

In [119]:
eagles.total_game_score.mean()

43.625

In [120]:
games_lost = (eagles.score_diff < 0).sum()
games_won = len(eagles.score_diff) - games_lost
print(f'eagles won {games_won} games and lost {games_lost} games.')

eagles won 7 games and lost 9 games.


# Neo4j Experiment

In [2]:
graph = py2neo.Graph(bolt=True, password=os.environ.get('NEO4J_PASSWORD', 'neo4j'))

In [37]:
graph.dbms.kernel_version

(3, 3, 0)

In [67]:
@lru_cache(maxsize=None)
def mkteam(name):
    team_node = py2neo.Node('Team', name=name)
    graph.create(team_node)
    return team_node

In [59]:
def mkgame(week):
    game_node = py2neo.Node('Game', week=week)
    graph.create(game_node)
    return game_node

In [60]:
@lru_cache(maxsize=None)
def mkweek(week):
    week_node = py2neo.Node('Week', week=week)
    graph.create(week_node)
    return week_node

In [61]:
@lru_cache(maxsize=None)
def mkseason(year):
    season_node = py2neo.Node('Season', year=year)
    graph.create(season_node)
    return season_node

In [None]:
result = graph.run("""
match(node)
detach delete node;
""")

In [69]:
%%time

mkseason.cache_clear()
mkweek.cache_clear()
mkteam.cache_clear()

tx = graph.begin()
for game in df.itertuples():
    season_node = mkseason(int(game.year))
    week_node = mkweek(int(game.week))
    home_team_node = mkteam(game.home_team)
    away_team_node = mkteam(game.away_team)
    game_node = mkgame(int(game.week))
    
    tx.create(py2neo.Relationship(home_team_node, 'PLAYED', game_node, points=int(game.home_score), was_home=True))
    tx.create(py2neo.Relationship(away_team_node, 'PLAYED', game_node, points=int(game.away_score), was_home=False))
    tx.create(py2neo.Relationship(game_node, 'PART_OF', week_node))
    tx.create(py2neo.Relationship(week_node, 'PART_OF', season_node))
tx.commit()

CPU times: user 4.52 s, sys: 32 ms, total: 4.56 s
Wall time: 19.9 s


## Get counts of all node labels

In [88]:
query = """
match (n)
unwind labels(n) as label
return label, count(*) as count
"""

In [89]:
result = graph.run(query)
pprint(list(result))

[('label': 'Team', 'count': 32),
 ('label': 'Season', 'count': 1),
 ('label': 'Game', 'count': 256),
 ('label': 'Week', 'count': 17)]


## Get summary of all games of games of games between two teams

In [90]:
query = """
match p = (:Team {name: $t1})-[:PLAYED*0..6]-(:Team {name: $t2})
unwind filter(node in nodes(p) where node:Game) AS gameNode
with distinct gameNode AS gameNode
match ()-[r:PLAYED]-(gameNode)
with id(gameNode) AS gid,
     collect(r.points) AS pts
with reduce(x=0, i in pts | x + i) AS tot
return count(*) as n,
       avg(tot) AS avg,
       min(tot) AS min,
       max(tot) AS max,
       stdev(tot) AS stdev;
"""

In [91]:
result = graph.run(query, t1='eagles', t2='giants')
pprint(list(result))

[('n': 82, 'avg': 43.82926829268291, 'min': 12, 'max': 77, 'stdev': 12.235149605858835)]


## Get average total score and abs diff grouped by week

In [96]:
query = """
match ()-[r:PLAYED]-(g:Game)
with g.week as week,
     id(g) as gid,
     collect(r.points) as pts
with week,
     gid,
     reduce(x=0, i in pts | x + i) as tot,
     abs(pts[0] - pts[1]) as diff
return week,
       round(avg(tot)) as avg_tot,
       round(avg(diff)) as avg_abs_diff
order by week asc;  
"""

In [97]:
result = graph.run(query)
pprint(list(result))

[('week': 1, 'avg_tot': 45.0, 'avg_abs_diff': 8.0),
 ('week': 2, 'avg_tot': 45.0, 'avg_abs_diff': 10.0),
 ('week': 3, 'avg_tot': 47.0, 'avg_abs_diff': 12.0),
 ('week': 4, 'avg_tot': 45.0, 'avg_abs_diff': 10.0),
 ('week': 5, 'avg_tot': 45.0, 'avg_abs_diff': 10.0),
 ('week': 6, 'avg_tot': 48.0, 'avg_abs_diff': 10.0),
 ('week': 7, 'avg_tot': 42.0, 'avg_abs_diff': 9.0),
 ('week': 8, 'avg_tot': 50.0, 'avg_abs_diff': 8.0),
 ('week': 9, 'avg_tot': 50.0, 'avg_abs_diff': 9.0),
 ('week': 10, 'avg_tot': 46.0, 'avg_abs_diff': 8.0),
 ('week': 11, 'avg_tot': 42.0, 'avg_abs_diff': 8.0),
 ('week': 12, 'avg_tot': 46.0, 'avg_abs_diff': 10.0),
 ('week': 13, 'avg_tot': 44.0, 'avg_abs_diff': 15.0),
 ('week': 14, 'avg_tot': 40.0, 'avg_abs_diff': 9.0),
 ('week': 15, 'avg_tot': 44.0, 'avg_abs_diff': 11.0),
 ('week': 16, 'avg_tot': 52.0, 'avg_abs_diff': 12.0),
 ('week': 17, 'avg_tot': 46.0, 'avg_abs_diff': 13.0)]


## Get average total score and diff grouped by week

In [98]:
query = """
match ()-[r1:PLAYED {was_home: true}]-(g:Game)-[r2:PLAYED]-()
with g.week as week,
     id(g) as gid,
     [r1.points, r2.points] as pts
with week,
     gid,
     reduce(x=0, i in pts | x + i) as tot,
     pts[0] - pts[1] as diff
return week,
       round(avg(tot)) as avg_tot,
       round(avg(diff)) as avg_diff
order by week asc;
"""

In [99]:
result = graph.run(query)
pprint(list(result))

[('week': 1, 'avg_tot': 45.0, 'avg_diff': 1.0),
 ('week': 2, 'avg_tot': 45.0, 'avg_diff': 5.0),
 ('week': 3, 'avg_tot': 47.0, 'avg_diff': 6.0),
 ('week': 4, 'avg_tot': 45.0, 'avg_diff': 3.0),
 ('week': 5, 'avg_tot': 45.0, 'avg_diff': 0.0),
 ('week': 6, 'avg_tot': 48.0, 'avg_diff': 6.0),
 ('week': 7, 'avg_tot': 42.0, 'avg_diff': 1.0),
 ('week': 8, 'avg_tot': 50.0, 'avg_diff': 2.0),
 ('week': 9, 'avg_tot': 50.0, 'avg_diff': -2.0),
 ('week': 10, 'avg_tot': 46.0, 'avg_diff': 4.0),
 ('week': 11, 'avg_tot': 42.0, 'avg_diff': 3.0),
 ('week': 12, 'avg_tot': 46.0, 'avg_diff': 1.0),
 ('week': 13, 'avg_tot': 44.0, 'avg_diff': 6.0),
 ('week': 14, 'avg_tot': 40.0, 'avg_diff': 0.0),
 ('week': 15, 'avg_tot': 44.0, 'avg_diff': 0.0),
 ('week': 16, 'avg_tot': 52.0, 'avg_diff': 6.0),
 ('week': 17, 'avg_tot': 46.0, 'avg_diff': 2.0)]


## Get the avg total score and diff grouped by team

In [120]:
query = """
match (t:Team)-[r:PLAYED]-(:Game)-[r2:PLAYED]-(:Team)
with t.name as team,
     round(avg(r.points + r2.points)) as avg_tot,
     round(avg(r.points - r2.points)) as avg_diff,
     count(*) as n
return team, avg_tot, avg_diff, n
order by team;
"""

In [121]:
result = graph.run(query)
pprint(list(result))

[('team': '49ers', 'avg_tot': 49.0, 'avg_diff': -11.0, 'n': 16),
 ('team': 'bears', 'avg_tot': 42.0, 'avg_diff': -7.0, 'n': 16),
 ('team': 'bengals', 'avg_tot': 40.0, 'avg_diff': 1.0, 'n': 16),
 ('team': 'bills', 'avg_tot': 49.0, 'avg_diff': 1.0, 'n': 16),
 ('team': 'broncos', 'avg_tot': 39.0, 'avg_diff': 2.0, 'n': 16),
 ('team': 'browns', 'avg_tot': 45.0, 'avg_diff': -12.0, 'n': 16),
 ('team': 'buccaneers', 'avg_tot': 45.0, 'avg_diff': -1.0, 'n': 16),
 ('team': 'cardinals', 'avg_tot': 49.0, 'avg_diff': 4.0, 'n': 16),
 ('team': 'chargers', 'avg_tot': 52.0, 'avg_diff': -1.0, 'n': 16),
 ('team': 'chiefs', 'avg_tot': 44.0, 'avg_diff': 5.0, 'n': 16),
 ('team': 'colts', 'avg_tot': 50.0, 'avg_diff': 1.0, 'n': 16),
 ('team': 'cowboys', 'avg_tot': 45.0, 'avg_diff': 7.0, 'n': 16),
 ('team': 'dolphins', 'avg_tot': 46.0, 'avg_diff': -1.0, 'n': 16),
 ('team': 'eagles', 'avg_tot': 44.0, 'avg_diff': 2.0, 'n': 16),
 ('team': 'falcons', 'avg_tot': 59.0, 'avg_diff': 8.0, 'n': 16),
 ('team': 'giants', '

## Get the avg/stdev of total score and avg/stdev of abs score diff for all games

In [108]:
query = """
match ()-[r:PLAYED]-(g:Game)
with id(g) as gid,
     collect(r.points) as pts
with gid,
     reduce(x=0, i in pts | x + i) as tot,
     abs(pts[0] - pts[1]) as diff
return count(gid) as n,
       avg(tot) as avg_tot,
       stdev(tot) as stdev_tot,
       avg(diff) as avg_diff,
       stdev(diff) as stdev_diff,
       max(diff) as max,
       min(diff) as min;
"""

In [109]:
result = graph.run(query)
pprint(list(result))

[('n': 256, 'avg_tot': 45.550781249999964, 'stdev_tot': 13.354200315029034, 'avg_diff': 10.230468750000005, 'stdev_diff': 8.190238652882861, 'max': 38, 'min': 0)]


## Find all ties

In [157]:
query = """
match p=()-[r1:PLAYED {was_home: true}]->()<-[r2:PLAYED]-()
where r1.points - r2.points = 0
return p;
"""

In [158]:
result = graph.run(query)
pprint(list(result))

[('p': (bengals)-[:PLAYED {points:27,was_home:true}]->(b9a1dea)<-[:PLAYED {points:27,was_home:false}]-(redskins)),
 ('p': (cardinals)-[:PLAYED {points:6,was_home:true}]->(aec3dcb)<-[:PLAYED {points:6,was_home:false}]-(seahawks))]


## Get all (1 to 4)-hop paths between two teams

In [162]:
query = """
match path = (:Team {name: $t1})-[:PLAYED*1..4]-(:Team {name: $t2})
return path;
"""

In [170]:
result = graph.run(query, t1='eagles', t2='giants')
for item in result:
    for segment in py2neo.walk(item.get('path')):
        print(segment, type(segment))
    print('-----------------------')

(eagles:Team {name:"eagles"}) <class 'py2neo.types.Node'>
(eagles)-[:PLAYED {points:13,was_home:true}]->(eace51d) <class 'py2neo.types.Relationship'>
(eace51d:Game {week:12}) <class 'py2neo.types.Node'>
(packers)-[:PLAYED {points:27,was_home:false}]->(eace51d) <class 'py2neo.types.Relationship'>
(packers:Team {name:"packers"}) <class 'py2neo.types.Node'>
(packers)-[:PLAYED {points:23,was_home:true}]->(c665a04) <class 'py2neo.types.Relationship'>
(c665a04:Game {week:5}) <class 'py2neo.types.Node'>
(giants)-[:PLAYED {points:16,was_home:false}]->(c665a04) <class 'py2neo.types.Relationship'>
(giants:Team {name:"giants"}) <class 'py2neo.types.Node'>
-----------------------
(eagles:Team {name:"eagles"}) <class 'py2neo.types.Node'>
(eagles)-[:PLAYED {points:26,was_home:false}]->(d87a6e5) <class 'py2neo.types.Relationship'>
(d87a6e5:Game {week:15}) <class 'py2neo.types.Node'>
(ravens)-[:PLAYED {points:27,was_home:true}]->(d87a6e5) <class 'py2neo.types.Relationship'>
(ravens:Team {name:"ravens"