In [17]:
#Import dependencies
import pandas as pd
import requests
from pprint import pprint
from config import api_key

In [18]:
#Save API url
url = "https://api-nba-v1.p.rapidapi.com/players/"

#Query for players in the USA 
querystring = {"country":"USA"}

#input header values
headers = {
	
	#input api key from api key file
	"X-RapidAPI-Key": api_key,
	"X-RapidAPI-Host": "api-nba-v1.p.rapidapi.com"
}
#Stores response as a json file in variable response
response = requests.get(url, headers=headers, params=querystring).json()

#pretty print response
pprint(response)

{'errors': [],
 'get': 'players/',
 'parameters': {'country': 'USA'},
 'response': [{'affiliation': 'Baylor/USA',
               'birth': {'country': 'USA', 'date': '1990-10-06'},
               'college': 'Baylor',
               'firstname': 'Quincy',
               'height': {'feets': '6', 'inches': '7', 'meters': '2.01'},
               'id': 2,
               'lastname': 'Acy',
               'leagues': {'standard': {'active': True,
                                        'jersey': 4,
                                        'pos': 'F'}},
               'nba': {'pro': 6, 'start': 2012},
               'weight': {'kilograms': '108.9', 'pounds': '240'}},
              {'affiliation': 'Texas-Austin/USA',
               'birth': {'country': 'USA', 'date': '1985-07-19'},
               'college': 'Texas-Austin',
               'firstname': 'LaMarcus',
               'height': {'feets': '6', 'inches': '11', 'meters': '2.11'},
               'id': 8,
               'lastname': 'Aldridge',

In [19]:
#saves response list as players 
players_data = response['response']

#create an empty list to store each player dictionaries in 
players = []

#loop through the json list
for player in players_data:

    #add each player dictionary to players list 
    players.append(player)

#Displays first player entry
players[0]

{'id': 2,
 'firstname': 'Quincy',
 'lastname': 'Acy',
 'birth': {'date': '1990-10-06', 'country': 'USA'},
 'nba': {'start': 2012, 'pro': 6},
 'height': {'feets': '6', 'inches': '7', 'meters': '2.01'},
 'weight': {'pounds': '240', 'kilograms': '108.9'},
 'college': 'Baylor',
 'affiliation': 'Baylor/USA',
 'leagues': {'standard': {'jersey': 4, 'active': True, 'pos': 'F'}}}

In [20]:
#Create a dataframe from the players array
players_df = pd.DataFrame(players)

#Display first 5 rows of the df
players_df.head()

Unnamed: 0,id,firstname,lastname,birth,nba,height,weight,college,affiliation,leagues
0,2,Quincy,Acy,"{'date': '1990-10-06', 'country': 'USA'}","{'start': 2012, 'pro': 6}","{'feets': '6', 'inches': '7', 'meters': '2.01'}","{'pounds': '240', 'kilograms': '108.9'}",Baylor,Baylor/USA,"{'standard': {'jersey': 4, 'active': True, 'po..."
1,8,LaMarcus,Aldridge,"{'date': '1985-07-19', 'country': 'USA'}","{'start': 2006, 'pro': 15}","{'feets': '6', 'inches': '11', 'meters': '2.11'}","{'pounds': '250', 'kilograms': '113.4'}",Texas-Austin,Texas-Austin/USA,"{'standard': {'jersey': 21, 'active': True, 'p..."
2,11,Tony,Allen,"{'date': '1982-01-11', 'country': 'USA'}","{'start': 2004, 'pro': 12}","{'feets': '6', 'inches': '4', 'meters': '1.93'}","{'pounds': '213', 'kilograms': '96.6'}",Oklahoma State,Oklahoma State/USA,"{'standard': {'jersey': 9, 'active': False, 'p..."
3,13,Lou,Amundson,"{'date': '1982-12-07', 'country': 'USA'}","{'start': 2006, 'pro': 9}","{'feets': '6', 'inches': '9', 'meters': '2.06'}","{'pounds': '220', 'kilograms': '99.8'}",Nevada-Las Vegas,,"{'standard': {'jersey': 17, 'active': False, '..."
4,14,Chris,Andersen,"{'date': '1978-07-07', 'country': 'USA'}","{'start': 2001, 'pro': 13}","{'feets': '6', 'inches': '10', 'meters': '2.08'}","{'pounds': '245', 'kilograms': '111.1'}",Blinn,,"{'standard': {'jersey': None, 'active': False,..."


In [21]:
#Check column types
players_df.dtypes

id              int64
firstname      object
lastname       object
birth          object
nba            object
height         object
weight         object
college        object
affiliation    object
leagues        object
dtype: object

In [22]:
#change column types to strings
players_df = players_df.convert_dtypes()

#recheck column types
players_df.dtypes


id              Int64
firstname      string
lastname       string
birth          object
nba            object
height         object
weight         object
college        string
affiliation    string
leagues        object
dtype: object

In [23]:
#create a function to identify columns that are object type
def is_array_of_dicts(column_name):
    #create an empty list
    lib_column_names = []

    #loop through columns in the dataframe
    for column_name in players_df.columns:
        
        #checks if the column type is equal to object
        if players_df[column_name].dtype == 'object':

            #adds the column name to the list
            lib_column_names.append(column_name)
    #return the list of object columns        
    return lib_column_names

#create a function to split the dictionaries in the object columns
def split_and_concat_columns(df, columns_to_split):

    #loop through the given columns
    for name in columns_to_split:

        #splits the library into two columns
        split_columns = df[name].apply(pd.Series)
        
        #organizes new columns to match
        df = pd.concat([df, split_columns], axis=1)
        
        #drops the original column
        df = df.drop(name,axis=1)
    
    #returns the new dataframe
    return df

#run the functions to split the columns with dictionaries as values
players_df = split_and_concat_columns(players_df,is_array_of_dicts(players_df.columns))

#display sample from dataframe
players_df.head()


Unnamed: 0,id,firstname,lastname,college,affiliation,date,country,start,pro,feets,inches,meters,pounds,kilograms,standard,vegas,africa,sacramento
0,2,Quincy,Acy,Baylor,Baylor/USA,1990-10-06,USA,2012,6,6,7,2.01,240,108.9,"{'jersey': 4, 'active': True, 'pos': 'F'}",,,
1,8,LaMarcus,Aldridge,Texas-Austin,Texas-Austin/USA,1985-07-19,USA,2006,15,6,11,2.11,250,113.4,"{'jersey': 21, 'active': True, 'pos': 'C-F'}",,,
2,11,Tony,Allen,Oklahoma State,Oklahoma State/USA,1982-01-11,USA,2004,12,6,4,1.93,213,96.6,"{'jersey': 9, 'active': False, 'pos': 'G-F'}",,,
3,13,Lou,Amundson,Nevada-Las Vegas,,1982-12-07,USA,2006,9,6,9,2.06,220,99.8,"{'jersey': 17, 'active': False, 'pos': 'F'}",,,
4,14,Chris,Andersen,Blinn,,1978-07-07,USA,2001,13,6,10,2.08,245,111.1,"{'jersey': None, 'active': False, 'pos': 'F-C'}",,,


In [24]:
#drop new unnecessary columns
players_df = players_df.drop(['meters', 'kilograms', 'vegas', 'africa','sacramento'],axis =1)

#recheck column types
print(players_df.dtypes)

#display new df
players_df.head()

id              Int64
firstname      string
lastname       string
college        string
affiliation    string
date           object
country        object
start           int64
pro             int64
feets          object
inches         object
pounds         object
standard       object
dtype: object


Unnamed: 0,id,firstname,lastname,college,affiliation,date,country,start,pro,feets,inches,pounds,standard
0,2,Quincy,Acy,Baylor,Baylor/USA,1990-10-06,USA,2012,6,6,7,240,"{'jersey': 4, 'active': True, 'pos': 'F'}"
1,8,LaMarcus,Aldridge,Texas-Austin,Texas-Austin/USA,1985-07-19,USA,2006,15,6,11,250,"{'jersey': 21, 'active': True, 'pos': 'C-F'}"
2,11,Tony,Allen,Oklahoma State,Oklahoma State/USA,1982-01-11,USA,2004,12,6,4,213,"{'jersey': 9, 'active': False, 'pos': 'G-F'}"
3,13,Lou,Amundson,Nevada-Las Vegas,,1982-12-07,USA,2006,9,6,9,220,"{'jersey': 17, 'active': False, 'pos': 'F'}"
4,14,Chris,Andersen,Blinn,,1978-07-07,USA,2001,13,6,10,245,"{'jersey': None, 'active': False, 'pos': 'F-C'}"


In [25]:
#change the date column to datetime
players_df['date'] = pd.to_datetime(players_df['date'])

#convert other columns from obects to string if possible
players_df = players_df.convert_dtypes()

#recheck datatypes
print(players_df.dtypes)

#visualize dataframe
players_df.head()

id                      Int64
firstname              string
lastname               string
college                string
affiliation            string
date           datetime64[ns]
country                string
start                   Int64
pro                     Int64
feets                  string
inches                 string
pounds                 string
standard               object
dtype: object


Unnamed: 0,id,firstname,lastname,college,affiliation,date,country,start,pro,feets,inches,pounds,standard
0,2,Quincy,Acy,Baylor,Baylor/USA,1990-10-06,USA,2012,6,6,7,240,"{'jersey': 4, 'active': True, 'pos': 'F'}"
1,8,LaMarcus,Aldridge,Texas-Austin,Texas-Austin/USA,1985-07-19,USA,2006,15,6,11,250,"{'jersey': 21, 'active': True, 'pos': 'C-F'}"
2,11,Tony,Allen,Oklahoma State,Oklahoma State/USA,1982-01-11,USA,2004,12,6,4,213,"{'jersey': 9, 'active': False, 'pos': 'G-F'}"
3,13,Lou,Amundson,Nevada-Las Vegas,,1982-12-07,USA,2006,9,6,9,220,"{'jersey': 17, 'active': False, 'pos': 'F'}"
4,14,Chris,Andersen,Blinn,,1978-07-07,USA,2001,13,6,10,245,"{'jersey': None, 'active': False, 'pos': 'F-C'}"


In [26]:
#split the 'standard' column
players_df = split_and_concat_columns(players_df,is_array_of_dicts(players_df['standard']))

#display dataframe
players_df.head()

Unnamed: 0,id,firstname,lastname,college,affiliation,date,country,start,pro,feets,inches,pounds,jersey,active,pos,0
0,2,Quincy,Acy,Baylor,Baylor/USA,1990-10-06,USA,2012,6,6,7,240,4.0,True,F,
1,8,LaMarcus,Aldridge,Texas-Austin,Texas-Austin/USA,1985-07-19,USA,2006,15,6,11,250,21.0,True,C-F,
2,11,Tony,Allen,Oklahoma State,Oklahoma State/USA,1982-01-11,USA,2004,12,6,4,213,9.0,False,G-F,
3,13,Lou,Amundson,Nevada-Las Vegas,,1982-12-07,USA,2006,9,6,9,220,17.0,False,F,
4,14,Chris,Andersen,Blinn,,1978-07-07,USA,2001,13,6,10,245,,False,F-C,


In [27]:
#drop 0 column
players_df = players_df.drop([0],axis =1)

#display dataframe
players_df.head()

Unnamed: 0,id,firstname,lastname,college,affiliation,date,country,start,pro,feets,inches,pounds,jersey,active,pos
0,2,Quincy,Acy,Baylor,Baylor/USA,1990-10-06,USA,2012,6,6,7,240,4.0,True,F
1,8,LaMarcus,Aldridge,Texas-Austin,Texas-Austin/USA,1985-07-19,USA,2006,15,6,11,250,21.0,True,C-F
2,11,Tony,Allen,Oklahoma State,Oklahoma State/USA,1982-01-11,USA,2004,12,6,4,213,9.0,False,G-F
3,13,Lou,Amundson,Nevada-Las Vegas,,1982-12-07,USA,2006,9,6,9,220,17.0,False,F
4,14,Chris,Andersen,Blinn,,1978-07-07,USA,2001,13,6,10,245,,False,F-C


In [28]:
#drop rows with null, none, or <NA> values
players_df = players_df.dropna()

#reset the dataframe index
players_df.reset_index(inplace=True)

#recheck column types
print(players_df.dtypes)

#display dataframe
players_df.head()

index                   int64
id                      Int64
firstname              string
lastname               string
college                string
affiliation            string
date           datetime64[ns]
country                string
start                   Int64
pro                     Int64
feets                  string
inches                 string
pounds                 string
jersey                float64
active                 object
pos                    object
dtype: object


Unnamed: 0,index,id,firstname,lastname,college,affiliation,date,country,start,pro,feets,inches,pounds,jersey,active,pos
0,0,2,Quincy,Acy,Baylor,Baylor/USA,1990-10-06,USA,2012,6,6,7,240,4.0,True,F
1,1,8,LaMarcus,Aldridge,Texas-Austin,Texas-Austin/USA,1985-07-19,USA,2006,15,6,11,250,21.0,True,C-F
2,2,11,Tony,Allen,Oklahoma State,Oklahoma State/USA,1982-01-11,USA,2004,12,6,4,213,9.0,False,G-F
3,5,15,Alan,Anderson,Michigan State,Michigan State/USA,1982-10-16,USA,2005,7,6,6,220,9.0,False,F-G
4,7,17,Justin,Anderson,Virginia,Virginia/USA,1993-11-19,USA,2015,5,6,5,231,10.0,True,F-G


In [29]:
#Change height, weight, and jersy values to integers
players_df['feets'] = players_df['feets'].astype(int)
players_df['inches'] = players_df['inches'].astype(int)
players_df['pounds'] = players_df['pounds'].astype(int)
players_df['jersey'] = players_df['jersey'].astype(int)

#recheck datatypes
print(players_df.dtypes)
#display sample of dataframe
players_df.head()

index                   int64
id                      Int64
firstname              string
lastname               string
college                string
affiliation            string
date           datetime64[ns]
country                string
start                   Int64
pro                     Int64
feets                   int32
inches                  int32
pounds                  int32
jersey                  int32
active                 object
pos                    object
dtype: object


Unnamed: 0,index,id,firstname,lastname,college,affiliation,date,country,start,pro,feets,inches,pounds,jersey,active,pos
0,0,2,Quincy,Acy,Baylor,Baylor/USA,1990-10-06,USA,2012,6,6,7,240,4,True,F
1,1,8,LaMarcus,Aldridge,Texas-Austin,Texas-Austin/USA,1985-07-19,USA,2006,15,6,11,250,21,True,C-F
2,2,11,Tony,Allen,Oklahoma State,Oklahoma State/USA,1982-01-11,USA,2004,12,6,4,213,9,False,G-F
3,5,15,Alan,Anderson,Michigan State,Michigan State/USA,1982-10-16,USA,2005,7,6,6,220,9,False,F-G
4,7,17,Justin,Anderson,Virginia,Virginia/USA,1993-11-19,USA,2015,5,6,5,231,10,True,F-G


In [30]:
#multiply feets values by 12 and add to the inches column to get full height in inches
players_df['height(in)'] = (players_df['feets'] * 12) + players_df['inches']

#drop feets and inches columns
players_df = players_df.drop(['feets','inches'],axis=1)

#display sample of df
players_df.head()

Unnamed: 0,index,id,firstname,lastname,college,affiliation,date,country,start,pro,pounds,jersey,active,pos,height(in)
0,0,2,Quincy,Acy,Baylor,Baylor/USA,1990-10-06,USA,2012,6,240,4,True,F,79
1,1,8,LaMarcus,Aldridge,Texas-Austin,Texas-Austin/USA,1985-07-19,USA,2006,15,250,21,True,C-F,83
2,2,11,Tony,Allen,Oklahoma State,Oklahoma State/USA,1982-01-11,USA,2004,12,213,9,False,G-F,76
3,5,15,Alan,Anderson,Michigan State,Michigan State/USA,1982-10-16,USA,2005,7,220,9,False,F-G,78
4,7,17,Justin,Anderson,Virginia,Virginia/USA,1993-11-19,USA,2015,5,231,10,True,F-G,77


In [31]:
players_df.drop(columns=['index'],inplace=True)
players_df.reset_index(drop=True, inplace=True)
players_df.set_index('id',inplace=True)
players_df
players_df.to_csv('Player_Personal_Data.csv')
players_df.to_json("Player_Personal_data.json")

In [32]:
players_df_t = players_df.transpose()
players_df_t.head()
players_df_t.to_json("Player_Personal_data_t.json")