In [1]:
# Dependencies and Setup

import json
import pandas as pd
import requests
import numpy as py
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
from sqlalchemy import Column, Integer, String, Float, Date
import pymysql
pymysql.install_as_MySQLdb()
from sqlalchemy import create_engine, inspect, func, distinct
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
import sqlite3
from sqlite3 import Error

In [2]:
# Create engine and engine connection to SQLite Database

engine = create_engine("sqlite:///../SqLIte_files/NBAfantasyML.sqlite")
conn = engine.connect()

In [3]:
# Create function to try and connect to a sqlite database.  if it doesn't work, create one...

def create_connection(db_file):
    try:
        conn = sqlite3.connect(db_file)
        print(sqlite3.version)
    except Error as e:
        print(e)
    finally:
        conn.close()

In [4]:
# If connection succeeds

if __name__ == '__main__':
    create_connection("../NBAfantasyML.sqlite")

2.6.0


In [5]:
#Bring in data from internet as object then convert ot Json

schedule_response_obj=requests.get("http://data.nba.net/prod/v1/2018/schedule.json")
schedule_response=schedule_response_obj.json()

In [6]:
# Count the number of Games for the season 

games_response_count=schedule_response["league"]["standard"]
game_number=len(games_response_count)
# game_number

In [7]:
###    Elements for URL, append List, create DataFrame  ###

# Bring in Game Id and GameData ("startDataEastern") elements from NBA Schedule later used to create box_score URL
# Store elements in list
# Convert list to DataFrame and modify datatypes

# Get elements
NBA_Schedule=[]
for item in schedule_response["league"]["standard"]:
    item_dict={}
    item_dict["gameId"]=item["gameId"]
    item_dict["startDateEastern"]=item["startDateEastern"]

# Append List
    NBA_Schedule.append({"gameId":item_dict["gameId"],
                         "startDataEastern":item_dict["startDateEastern"]})

# Convert NBA_Schedule=[] list to DataFrame and modify Datatypes
NBA_Schedule_DF_initial=pd.DataFrame(NBA_Schedule)
NBA_Schedule_DF_initial["gameId"]=NBA_Schedule_DF_initial["gameId"].astype(int)
NBA_Schedule_DF_initial["startDataEastern"]=NBA_Schedule_DF_initial["startDataEastern"].astype(int)

NBA_Schedule_DF_initial.head()  

Unnamed: 0,gameId,startDataEastern
0,11800001,20180928
1,11800002,20180928
2,11800003,20180929
3,11800004,20180929
4,11800005,20180929


In [20]:
# count the number of gamesId records

game_count=len(NBA_Schedule_DF_initial["gameId"])


1309

In [8]:
# # Generate Box_Score_URL and append to List Box_Score_URL_List

Box_score_URL_List =[{}]

# Looping through gameids 
#for i in range (0,game_count):
    
chosen=0

    # Generate Box Score URL using GameId and GameData ("startDataEastern")
Box_score_URL=F"http://data.nba.net/prod/v1/{NBA_Schedule_DF_initial['startDataEastern'][chosen]}/00{NBA_Schedule_DF_initial['gameId'][chosen]}_boxscore.json"

NBA_Schedule_DF_initial["boxScoreURL"]=Box_score_URL
NBA_Schedule_DF_initial["boxScoreURL"]=NBA_Schedule_DF_initial["boxScoreURL"].astype(str)

# NBA_Schedule_DF_initial.dtypes


gameId               int32
startDataEastern     int32
boxScoreURL         object
dtype: object

In [11]:
# Box Score Request and request Object 

NBA_Schedule_DF_initial.boxScoreURL[chosen]
box_score_obj=requests.get(Box_score_URL)
box_score_response=box_score_obj.json()
# box_score_response

In [27]:
gameid=box_score_response["basicGameData"]["gameId"]
arenaname=box_score_response["basicGameData"]["arena"]["name"]
arenacity=box_score_response["basicGameData"]["arena"]["city"]
visitor_team=box_score_response["basicGameData"]["vTeam"]["teamId"]
home_team=box_score_response["basicGameData"]["hTeam"]["teamId"]

arena=[{"gameId":gameid,
      "arenaname":arenaname,
      "city":arenacity,
      "visitorteamId":visitorteam,  
      "hometeamId":hometeam}]
arena_df=pd.DataFrame(arena)
arena_df["gameId"]=arena_df["gameId"].astype(float)
arena_df["arenaname"]=arena_df["arenaname"].astype('str')
arena_df["city"]=arena_df["city"].astype('str')
arena_df["visitorteamId"]=arena_df["visitorteamId"].astype(float)
arena_df["hometeamId"]=arena_df["hometeamId"].astype(float)
arena_df.dtypes


arenaname         object
city              object
gameId           float64
hometeamId       float64
visitorteamId    float64
dtype: object

In [31]:
# Extract basic box score data EXCLUDING: INDIVIDUAL PLAYER STATS (two loops, vteam & hteam)
Team_boxscores=[]
# Vteam
gameid=box_score_response["basicGameData"]["gameId"]
teamId=box_score_response["basicGameData"]["vTeam"]["teamId"]
fastBreakPoints=box_score_response["stats"]["vTeam"]["fastBreakPoints"]
pointsInPaint=box_score_response["stats"]["vTeam"]["pointsInPaint"]
secondChancePoints=box_score_response["stats"]["vTeam"]["secondChancePoints"]
pointsOffTurnovers=box_score_response["stats"]["vTeam"]["pointsOffTurnovers"]
points=box_score_response["stats"]["vTeam"]["totals"]["points"]
fgm=box_score_response["stats"]["vTeam"]["totals"]["fgm"]
fga=box_score_response["stats"]["vTeam"]["totals"]["fga"]
fgp=box_score_response["stats"]["vTeam"]["totals"]["fgp"]
ftm=box_score_response["stats"]["vTeam"]["totals"]["ftm"]
fta=box_score_response["stats"]["vTeam"]["totals"]["fta"]
ftp=box_score_response["stats"]["vTeam"]["totals"]["ftp"]
ttm=box_score_response["stats"]["vTeam"]["totals"]["ttm"]
tta=box_score_response["stats"]["vTeam"]["totals"]["tta"]
ttp=box_score_response["stats"]["vTeam"]["totals"]["ttp"]
offReb=box_score_response["stats"]["vTeam"]["totals"]["offReb"]
defReb=box_score_response["stats"]["vTeam"]["totals"]["defReb"]
totReb=box_score_response["stats"]["vTeam"]["totals"]["totReb"]
assists=box_score_response["stats"]["vTeam"]["totals"]["assists"]
pFouls=box_score_response["stats"]["vTeam"]["totals"]["pFouls"]
totReb=box_score_response["stats"]["vTeam"]["totals"]["totReb"]
offReb=box_score_response["stats"]["vTeam"]["totals"]["offReb"]
defReb=box_score_response["stats"]["vTeam"]["totals"]["defReb"]
totReb=box_score_response["stats"]["vTeam"]["totals"]["totReb"]
# for non_player_stats in box_score_response["basicGameData"]


31


In [None]:
# Hteam
home_team=box_score_response["basicGameData"]["hTeam"]["teamId"]

In [14]:
# Extract ONLY player stats from box score
boxscore_player_list=[]
for boxscore_playerstats in box_score_response["stats"]["activePlayers"]:
    boxscore_player_dict={}
    boxscore_player_dict["personId"]=boxscore_playerstats["personId"]
    boxscore_player_dict["teamId"]=boxscore_playerstats["teamId"]
    boxscore_player_dict["points"]=boxscore_playerstats["points"]
    boxscore_player_dict["pos"]=boxscore_playerstats["pos"]
    boxscore_player_dict["min"]=boxscore_playerstats["min"]
    boxscore_player_dict["points"]=boxscore_playerstats["points"]
    boxscore_player_dict["fgm"]=boxscore_playerstats["fgm"]
    boxscore_player_dict["min"]=boxscore_playerstats["min"]
    boxscore_player_dict["points"]=boxscore_playerstats["points"]
    boxscore_player_dict["fgm"]=boxscore_playerstats["fgm"]
    boxscore_player_dict["fga"]=boxscore_playerstats["fga"]
    boxscore_player_dict["fgp"]=boxscore_playerstats["fgp"]
    boxscore_player_dict["ftm"]=boxscore_playerstats["ftm"]
    boxscore_player_dict["fta"]=boxscore_playerstats["fta"]
    boxscore_player_dict["ftp"]=boxscore_playerstats["ftp"]
    boxscore_player_dict["tpm"]=boxscore_playerstats["tpm"]
    boxscore_player_dict["tpa"]=boxscore_playerstats["tpa"]
    boxscore_player_dict["tpp"]=boxscore_playerstats["tpp"]
    boxscore_player_dict["offReb"]=boxscore_playerstats["offReb"]
    boxscore_player_dict["defReb"]=boxscore_playerstats["defReb"]
    boxscore_player_dict["totReb"]=boxscore_playerstats["totReb"]
    boxscore_player_dict["assists"]=boxscore_playerstats["assists"]
    boxscore_player_dict["pFouls"]=boxscore_playerstats["pFouls"]
    boxscore_player_dict["steals"]=boxscore_playerstats["steals"]
    boxscore_player_dict["turnovers"]=boxscore_playerstats["totReb"]
    boxscore_player_dict["blocks"]=boxscore_playerstats["blocks"]
    boxscore_player_dict["plusMinus"]=boxscore_playerstats["plusMinus"]
    boxscore_player_dict["dnp"]=boxscore_playerstats["dnp"]
    
    boxscore_player_list.append(boxscore_player_dict)
    
#     boxscore_player_list.append["personId"]=boxscore_player_list["personId"].astype("int")
    
# print(boxscore_player_list["personId"])

In [15]:
boxscore_player_df=pd.DataFrame(boxscore_player_list)
boxscore_player_df

Unnamed: 0,assists,blocks,defReb,dnp,fga,fgm,fgp,fta,ftm,ftp,...,plusMinus,points,pos,steals,teamId,totReb,tpa,tpm,tpp,turnovers
0,1,0,2,,8,1,12.5,3,2,66.7,...,-16,4,SF,4,15016,3,0,0,0.0,3
1,0,0,4,,9,4,44.4,2,2,100.0,...,-9,13,PF,1,15016,5,6,3,50.0,5
2,1,0,10,,13,8,61.5,3,3,100.0,...,-14,19,C,0,15016,13,0,0,0.0,13
3,2,0,1,,12,6,50.0,0,0,0.0,...,-12,13,SG,0,15016,1,6,1,16.7,1
4,5,0,3,,22,6,27.3,4,4,100.0,...,-12,19,PG,4,15016,3,12,3,25.0,3
5,4,0,3,,4,2,50.0,0,0,0.0,...,-5,5,,1,15016,3,3,1,33.3,3
6,0,0,1,,3,0,0.0,2,2,100.0,...,-9,2,,0,15016,2,1,0,0.0,2
7,0,1,2,,7,2,28.6,0,0,0.0,...,-16,5,,0,15016,3,4,1,25.0,3
8,2,0,0,,2,1,50.0,0,0,0.0,...,-11,2,,0,15016,0,0,0,0.0,0
9,0,0,3,,1,1,100.0,0,0,0.0,...,1,2,,0,15016,4,0,0,0.0,4


In [16]:
#Load NBA_TEAMS table
# NBA_Schedule_DF_GameId.to_sql('NBA_SCHEDULE_GAME_DATE',conn, if_exists='replace', index=False)