In [None]:
from collections import defaultdict
from datetime import datetime
from dateutil import tz
import os
os.chdir("/home/jovyan/work")

import arrow

from src.utils import send_request
from src._generate_msf_play_by_play_file import _get_play_type, UNMARSHALL, PARSED_GAMES

In [None]:
seasons = [
    "2015-2016-regular",
    "2016-playoff",
    "2016-2017-regular",
    "2017-playoff",
    "2017-2018-regular"
]

In [None]:
def unmarshall_season_game(game):
    date = get_datetime(game["schedule"]["homeTeam"]["abbreviation"], game["schedule"]["startTime"]).strftime("%Y%m%d")
    away = game["schedule"]["awayTeam"]["abbreviation"]
    home = game["schedule"]["homeTeam"]["abbreviation"]
    return "-".join([date, away, home])

In [None]:
def get_datetime(team_abbreviaion, dt):
    if team_abbreviaion in [
        "NYK", "BRO", "BOS", "PHI", 
        "WAS", "CHA", "ATL", "ORL", 
        "MIA", "TOR", "CLE", "DET", "IND"]:
        time_zone = tz.gettz("US/Eastern")
    elif team_abbreviaion in [
        "CHI", "MIN", "DAL", "SAS", 
        "OKL", "MEM", "HOU", "MIL", "NOP"]:
        time_zone = tz.gettz("US/Central")
    elif team_abbreviaion in ["DEN", "PHX", "UTA"]:
        time_zone = tz.gettz("US/Mountain")
    elif team_abbreviaion in [
        "POR", "GSW", "LAL", "LAC", "SAC"]:
        time_zone = tz.gettz("US/Pacific")
    else:
        print("Error with timezones")
    return datetime.fromtimestamp(arrow.get(dt).timestamp, time_zone)

In [None]:
games = []
for season in seasons:
    print("Processing: {season}".format(**locals()))
    fetched_season = send_request("https://api.mysportsfeeds.com/v2.0/pull/nba/{season}/games.json".format(**locals()))
    for game in fetched_season.get("games"):
        games.append({"season": season, "game": unmarshall_season_game(game), "id": game["schedule"]["id"]})

In [None]:
for idx, g in enumerate(games, 1):
    s = "https://api.mysportsfeeds.com/v2.0/pull/nba/{season}/games/{game}/playbyplay.json?force=false".format(**g)
    if idx % 200 == 0:
        print("Processed a batch of 200 games, sleeping for 5:30 minutes, get yourself a coffee ;-)")
        #sleep(5*60 + 30)
    try:
        pbp = send_request(s)
        print("[{idx:04d}/{total}]: fetched {game}({id})".format(idx=idx, total=len(games), **g))
        for play in pbp.get("plays"):
            play_type = _get_play_type(play)
            UNMARSHALL[play_type](play, g["id"])
            
    except ValueError:
        print("No data for {game}({id})".format(**g))
print("Finished!")