In [79]:
import dateutil.parser
import os
os.chdir("/home/jovyan/work")

import pandas as pd
from neomodel import db
from src.utils import send_request
from src.utils import init_connection




## Create Season Files from MSF

### General Setup

In [10]:
columns=[
    "Date", 
    "Start (ET)", 
    "Visitor/Neutral", 
    "PTS",
    "Home/Neutral", 
    "PTS.1", 
    "OT", 
    "Attend.", 
    "game_type", 
    "game_name",
]

### Season 2017/18

### Regular Season

In [11]:
result1718_reg = send_request("https://api.mysportsfeeds.com/v2.0/pull/nba/2017-2018-regular/games.json")

game_reg = result1718_reg.get("games")[0]

game_reg

{'schedule': {'attendance': None,
  'awayTeam': {'abbreviation': 'BOS', 'id': 82},
  'broadcasters': [],
  'delayedOrPostponedReason': None,
  'endedTime': None,
  'homeTeam': {'abbreviation': 'CLE', 'id': 86},
  'id': 42070,
  'officials': [],
  'originalStartTime': None,
  'playedStatus': 'COMPLETED',
  'scheduleStatus': 'NORMAL',
  'startTime': '2017-10-18T00:00:00.000Z',
  'venue': {'id': 80, 'name': 'Quicken Loans Arena'},
  'venueAllegiance': 'HOME',
  'weather': None},
 'score': {'awayScoreTotal': 99,
  'currentIntermission': None,
  'currentQuarter': None,
  'currentQuarterSecondsRemaining': None,
  'homeScoreTotal': 102,
  'quarters': [{'awayScore': 19, 'homeScore': 29, 'quarterNumber': 1},
   {'awayScore': 19, 'homeScore': 25, 'quarterNumber': 2},
   {'awayScore': 33, 'homeScore': 18, 'quarterNumber': 3},
   {'awayScore': 28, 'homeScore': 30, 'quarterNumber': 4}]}}

In [33]:
def unmarshall_game(game, game_type="regular_season"):
    row = {}
    row["game_id"] = game["schedule"]["id"]
    date = dateutil.parser.parse(game["schedule"]["startTime"])
    # Date
    row["Date"] = date.strftime("%Y-%m-%d")
    # Start (ET)
    # ! NO TIME
    row["Start (ET)"] = date.strftime("%I:%M %p")
    # Visitor/Neutral
    row["Visitor/Neutral"] = game["schedule"]["awayTeam"]["abbreviation"]
    # PTS
    row["PTS"] = game["score"]["awayScoreTotal"]
    # Home/Neutral
    row["Home/Neutral"] = game["schedule"]["homeTeam"]["abbreviation"]
    # PTS.1
    row["PTS.1"] = game["score"]["homeScoreTotal"]
    # OT
    row["OT"] = 1 if len(game["score"]["quarters"]) > 4 else 0
    # Attend.
    # ! NO INFO
    row["Attend."] = None
    # game_type
    row["game_type"] = game_type
    # game_name
    row["game_name"] = row["Visitor/Neutral"] + " at " + row["Home/Neutral"]
    return row
unmarshall_game(game_reg)

{'Attend.': None,
 'Date': '2015-10-28',
 'Home/Neutral': 'ATL',
 'OT': 0,
 'PTS': 106,
 'PTS.1': 94,
 'Start (ET)': '12:00 AM',
 'Visitor/Neutral': 'DET',
 'game_id': 31176,
 'game_name': 'DET at ATL',
 'game_type': 'regular_season'}

In [13]:
games_reg1718 = list(map(lambda game: unmarshall_game(game), result1718_reg.get("games")))

### Playoff

In [143]:
result1718_po = send_request("https://api.mysportsfeeds.com/v2.0/pull/nba/2018-playoff/games.json")

game_po = result1718_po.get("games")[0]

game_po

{'schedule': {'id': 45890,
  'startTime': '2018-04-14T19:00:00.000Z',
  'endedTime': None,
  'awayTeam': {'id': 106, 'abbreviation': 'SAS'},
  'homeTeam': {'id': 101, 'abbreviation': 'GSW'},
  'venue': {'id': 95, 'name': 'Oracle Arena'},
  'venueAllegiance': 'NEUTRAL',
  'scheduleStatus': 'NORMAL',
  'originalStartTime': None,
  'delayedOrPostponedReason': None,
  'playedStatus': 'COMPLETED',
  'attendance': None,
  'officials': [],
  'broadcasters': [],
  'weather': None},
 'score': {'currentQuarter': None,
  'currentQuarterSecondsRemaining': None,
  'currentIntermission': None,
  'awayScoreTotal': 92,
  'homeScoreTotal': 113,
  'quarters': [{'quarterNumber': 1, 'awayScore': 17, 'homeScore': 28},
   {'quarterNumber': 2, 'awayScore': 24, 'homeScore': 29},
   {'quarterNumber': 3, 'awayScore': 22, 'homeScore': 29},
   {'quarterNumber': 4, 'awayScore': 29, 'homeScore': 27}]}}

In [144]:
unmarshall_game(game_po, game_type="playoff")

{'Date': '2018-04-14',
 'Start (ET)': '07:00 PM',
 'Visitor/Neutral': 'SAS',
 'PTS': 92,
 'Home/Neutral': 'GSW',
 'PTS.1': 113,
 'OT': 0,
 'Attend.': None,
 'game_type': 'playoff',
 'game_name': 'SAS at GSW'}

In [145]:
games_po1718 = list(map(lambda game: unmarshall_game(game), result1718_po.get("games")))

### Merge Season 2017/2018

In [151]:
df_reg = pd.DataFrame.from_records(games_reg1718)
df_po = pd.DataFrame.from_records(games_po1718)

In [152]:
df = df_reg.append(df_po)

In [155]:
df = df[columns]
df.reset_index(drop=True, inplace=True)

In [159]:
df.to_csv("./data/msf/season_files/season1718.csv", index=False)

# Season 16/17

### Regurlar Season 

In [44]:
result1617_reg = send_request("https://api.mysportsfeeds.com/v2.0/pull/nba/2016-2017-regular/games.json")

game_reg = result1617_reg.get("games")[0]

game_reg

{'schedule': {'attendance': None,
  'awayTeam': {'abbreviation': 'NYK', 'id': 83},
  'broadcasters': [],
  'delayedOrPostponedReason': None,
  'endedTime': None,
  'homeTeam': {'abbreviation': 'CLE', 'id': 86},
  'id': 33941,
  'officials': [],
  'originalStartTime': None,
  'playedStatus': 'COMPLETED',
  'scheduleStatus': 'NORMAL',
  'startTime': '2016-10-25T23:30:00.000Z',
  'venue': {'id': 80, 'name': 'Quicken Loans Arena'},
  'venueAllegiance': 'HOME',
  'weather': None},
 'score': {'awayScoreTotal': 88,
  'currentIntermission': None,
  'currentQuarter': None,
  'currentQuarterSecondsRemaining': None,
  'homeScoreTotal': 117,
  'quarters': [{'awayScore': 18, 'homeScore': 28, 'quarterNumber': 1},
   {'awayScore': 27, 'homeScore': 20, 'quarterNumber': 2},
   {'awayScore': 19, 'homeScore': 34, 'quarterNumber': 3},
   {'awayScore': 24, 'homeScore': 35, 'quarterNumber': 4}]}}

In [14]:
unmarshall_game(game_reg, game_type="regular_season")

{'Attend.': None,
 'Date': '2017-10-18',
 'Home/Neutral': 'CLE',
 'OT': 0,
 'PTS': 99,
 'PTS.1': 102,
 'Start (ET)': '12:00 AM',
 'Visitor/Neutral': 'BOS',
 'game_name': 'BOS at CLE',
 'game_type': 'regular_season'}

In [15]:
games_reg1617 = list(map(lambda game: unmarshall_game(game), result1617_reg.get("games")))

### Playoffs 

In [16]:
result1617_po = send_request("https://api.mysportsfeeds.com/v2.0/pull/nba/2017-playoff/games.json")

game_po = result1617_po.get("games")[0]

game_po

{'schedule': {'attendance': None,
  'awayTeam': {'abbreviation': 'IND', 'id': 87},
  'broadcasters': [],
  'delayedOrPostponedReason': None,
  'endedTime': None,
  'homeTeam': {'abbreviation': 'CLE', 'id': 86},
  'id': 40310,
  'officials': [],
  'originalStartTime': None,
  'playedStatus': 'COMPLETED',
  'scheduleStatus': 'NORMAL',
  'startTime': '2017-04-15T19:00:00.000Z',
  'venue': {'id': 80, 'name': 'Quicken Loans Arena'},
  'venueAllegiance': 'NEUTRAL',
  'weather': None},
 'score': {'awayScoreTotal': 108,
  'currentIntermission': None,
  'currentQuarter': None,
  'currentQuarterSecondsRemaining': None,
  'homeScoreTotal': 109,
  'quarters': [{'awayScore': 29, 'homeScore': 34, 'quarterNumber': 1},
   {'awayScore': 30, 'homeScore': 32, 'quarterNumber': 2},
   {'awayScore': 25, 'homeScore': 26, 'quarterNumber': 3},
   {'awayScore': 24, 'homeScore': 17, 'quarterNumber': 4}]}}

In [17]:
unmarshall_game(game_po, game_type="playoff")

{'Attend.': None,
 'Date': '2017-04-15',
 'Home/Neutral': 'CLE',
 'OT': 0,
 'PTS': 108,
 'PTS.1': 109,
 'Start (ET)': '07:00 PM',
 'Visitor/Neutral': 'IND',
 'game_name': 'IND at CLE',
 'game_type': 'playoff'}

In [18]:
games_po1617 = list(map(lambda game: unmarshall_game(game), result1617_po.get("games")))

### Merge Season 16/17

In [19]:
df_reg = pd.DataFrame.from_records(games_reg1617)
df_po = pd.DataFrame.from_records(games_po1617)

In [20]:
df = df_reg.append(df_po)

In [21]:
df = df[columns]
df.reset_index(drop=True, inplace=True)

In [None]:
df.to_csv("./data/msf/season_files/season1617.csv", index=False)

# Season 15/16

### Regular Season

In [34]:
result1516_reg = send_request("https://api.mysportsfeeds.com/v2.0/pull/nba/2015-2016-regular/games.json")

game_reg = result1516_reg.get("games")[0]

game_reg

{'schedule': {'attendance': None,
  'awayTeam': {'abbreviation': 'DET', 'id': 88},
  'broadcasters': [],
  'delayedOrPostponedReason': None,
  'endedTime': None,
  'homeTeam': {'abbreviation': 'ATL', 'id': 91},
  'id': 31176,
  'officials': [],
  'originalStartTime': None,
  'playedStatus': 'COMPLETED',
  'scheduleStatus': 'NORMAL',
  'startTime': '2015-10-28T00:00:00.000Z',
  'venue': {'id': 20, 'name': 'Philips Arena'},
  'venueAllegiance': 'HOME',
  'weather': None},
 'score': {'awayScoreTotal': 106,
  'currentIntermission': None,
  'currentQuarter': None,
  'currentQuarterSecondsRemaining': None,
  'homeScoreTotal': 94,
  'quarters': [{'awayScore': 25, 'homeScore': 25, 'quarterNumber': 1},
   {'awayScore': 23, 'homeScore': 18, 'quarterNumber': 2},
   {'awayScore': 34, 'homeScore': 23, 'quarterNumber': 3},
   {'awayScore': 24, 'homeScore': 28, 'quarterNumber': 4}]}}

In [24]:
unmarshall_game(game_reg, game_type="regular_season")

{'Attend.': None,
 'Date': '2015-10-28',
 'Home/Neutral': 'ATL',
 'OT': 0,
 'PTS': 106,
 'PTS.1': 94,
 'Start (ET)': '12:00 AM',
 'Visitor/Neutral': 'DET',
 'game_name': 'DET at ATL',
 'game_type': 'regular_season'}

In [25]:
games_reg1516 = list(map(lambda game: unmarshall_game(game), result1516_reg.get("games")))

### Playoffs

In [26]:
result1516_po = send_request("https://api.mysportsfeeds.com/v2.0/pull/nba/2016-playoff/games.json")

game_po = result1516_po.get("games")[0]

game_po

{'schedule': {'attendance': None,
  'awayTeam': {'abbreviation': 'IND', 'id': 87},
  'broadcasters': [],
  'delayedOrPostponedReason': None,
  'endedTime': None,
  'homeTeam': {'abbreviation': 'TOR', 'id': 81},
  'id': 32421,
  'officials': [],
  'originalStartTime': None,
  'playedStatus': 'COMPLETED',
  'scheduleStatus': 'NORMAL',
  'startTime': '2016-04-16T23:00:00.000Z',
  'venue': {'id': 1, 'name': 'Air Canada Center'},
  'venueAllegiance': 'NEUTRAL',
  'weather': None},
 'score': {'awayScoreTotal': 100,
  'currentIntermission': None,
  'currentQuarter': None,
  'currentQuarterSecondsRemaining': None,
  'homeScoreTotal': 90,
  'quarters': [{'awayScore': 19, 'homeScore': 24, 'quarterNumber': 1},
   {'awayScore': 24, 'homeScore': 21, 'quarterNumber': 2},
   {'awayScore': 27, 'homeScore': 22, 'quarterNumber': 3},
   {'awayScore': 30, 'homeScore': 23, 'quarterNumber': 4}]}}

In [27]:
unmarshall_game(game_po, game_type="playoff")

{'Attend.': None,
 'Date': '2016-04-16',
 'Home/Neutral': 'TOR',
 'OT': 0,
 'PTS': 100,
 'PTS.1': 90,
 'Start (ET)': '11:00 PM',
 'Visitor/Neutral': 'IND',
 'game_name': 'IND at TOR',
 'game_type': 'playoff'}

In [28]:
games_po1516 = list(map(lambda game: unmarshall_game(game), result1516_po.get("games")))

### Merge Season

In [29]:
df_reg = pd.DataFrame.from_records(games_reg1516)
df_po = pd.DataFrame.from_records(games_po1516)

In [30]:
df = df_reg.append(df_po)

In [31]:
df = df[columns]
df.reset_index(drop=True, inplace=True)

In [None]:
df.to_csv("./data/msf/season_files/season1516.csv", index=False)

# Play-by-Play data

In [69]:
pbp1718 = send_request("https://api.mysportsfeeds.com/v2.0/pull/nba/2017-2018-regular/games/20180407-POR-SAS/playbyplay.json")

pbp = pbp1718.get("plays")

pbp

[{'description': 'Jump Ball Gasol vs. Nurkic: Tip to Murray',
  'jumpBall': {'awayPlayer': {'firstName': 'Jusuf',
    'id': 9192,
    'jerseyNumber': 27,
    'lastName': 'Nurkic',
    'position': 'C'},
   'homePlayer': {'firstName': 'Pau',
    'id': 9149,
    'jerseyNumber': 16,
    'lastName': 'Gasol',
    'position': 'C'},
   'tippedToPlayer': None,
   'wonBy': 'HOME'},
  'playStatus': {'awayPlayersOnCourt': [{'location': None,
     'player': {'firstName': 'Jusuf',
      'id': 9192,
      'jerseyNumber': 27,
      'lastName': 'Nurkic',
      'position': 'C'}}],
   'homePlayersOnCourt': [{'location': None,
     'player': {'firstName': 'Pau',
      'id': 9149,
      'jerseyNumber': 16,
      'lastName': 'Gasol',
      'position': 'C'}}],
   'quarter': 1,
   'secondsElapsed': 0}},
 {'description': "MISS Gasol 18' Jump Shot",
  'fieldGoalAttempt': {'assistingPlayer': None,
   'blockingPlayer': None,
   'distanceFeet': 0,
   'location': {'x': 227, 'y': 224},
   'points': 2,
   'result': '

**Open:**
- Wie bekommen wir alle Spiele wenn man die Spiele einzeln requesten muss? 
- Welche play types gibt es?  

### Which play types are in the play-by-play data? 

In [74]:
set([list(x.keys())[1] for x in pbp])

{'fieldGoalAttempt',
 'foul',
 'freeThrowAttempt',
 'jumpBall',
 'rebound',
 'substitution',
 'turnover',
 'violation'}

### Example how to request data with game_id

In [None]:
pbp1718_2 = send_request("https://api.mysportsfeeds.com/v2.0/pull/nba/2017-2018-regular/games/43256/playbyplay.json")

pbp_2 = pbp1718_2.get("game")

pbp_2

### List of game_ids per season

In [80]:
init_connection()

In [81]:
game_ids = """
MATCH (g:Game)
RETURN g.game_id as game_id
"""

In [84]:
dta, hea = db.cypher_query(game_ids)

In [89]:
game_ids = pd.DataFrame(dta, columns=hea)

In [176]:
type(game_ids)

pandas.core.frame.DataFrame

In [231]:
pbpData = []

In [216]:
#create list of game_ids
game_ids2 = []
for x in game_ids['game_id'].tolist():
    game_ids2.append(x)

#### Test run with 2 games

In [222]:
game_ids3 = game_ids2[:2]

In [230]:
game_ids3

[45920, 45919]

In [232]:
pbpData = []
for x in game_ids3:
    temp = send_request("https://api.mysportsfeeds.com/v2.0/pull/nba/2017-2018-regular/games/{}/playbyplay.json".format(x))
    pbpData.append(temp)

In [233]:
pbpData

[{'game': {'attendance': None,
   'awayTeam': {'abbreviation': 'GSW', 'id': 101},
   'broadcasters': [],
   'delayedOrPostponedReason': None,
   'endedTime': None,
   'homeTeam': {'abbreviation': 'SAS', 'id': 106},
   'id': 45920,
   'officials': [],
   'originalStartTime': None,
   'playedStatus': 'COMPLETED',
   'scheduleStatus': 'NORMAL',
   'startTime': '2018-04-22T19:30:00.000Z',
   'venue': {'id': 99, 'name': 'AT&T Center'},
   'venueAllegiance': 'HOME',
   'weather': None},
  'lastUpdatedOn': '2018-08-21T09:50:01.114Z',
  'plays': [{'description': 'Jump Ball Aldridge vs. McGee: Tip to Green',
    'jumpBall': {'awayPlayer': {'firstName': 'JaVale',
      'id': 9173,
      'jerseyNumber': 1,
      'lastName': 'McGee',
      'position': 'C'},
     'homePlayer': {'firstName': 'LaMarcus',
      'id': 9480,
      'jerseyNumber': 12,
      'lastName': 'Aldridge',
      'position': 'PF'},
     'tippedToPlayer': None,
     'wonBy': 'HOME'},
    'playStatus': {'awayPlayersOnCourt': [{'loca