In [1]:
#Import Dependencies

import pandas as pd
import numpy as np
from datetime import datetime
import sqlalchemy
import requests
import json

from config import gkey


In [2]:
#Read in csvs

players = pd.read_csv('players.csv')
salaries = pd.read_csv('salaries.csv')

In [3]:
#Merge Players and Salaries on (_id and player_id)

players = pd.merge(players, salaries, left_on = '_id', right_on = 'player_id')

In [4]:
#Choose an appropriate format for useful fields

players['draft_year'] = players['draft_year'].astype(float)
players['position'] = players['position'].astype(str)
players['player_id'] = players['player_id'].astype(str)
players['college'] = players['college'].astype(str)

In [5]:
#limit data to 2000 and beyond

players = players[players.draft_year >= 2000]

In [6]:
# Select columns for dataframe

players_cols = ['player_id','birthDate','draft_year','name','position',
                'college','salary','season_end','season_start','team']

In [7]:
# Applied to dataframe

players = players[players_cols]

In [8]:
players.dtypes

player_id        object
birthDate        object
draft_year      float64
name             object
position         object
college          object
salary            int64
season_end        int64
season_start      int64
team             object
dtype: object

In [9]:
#Find max salary by id and save to idx

idx = players.groupby(['player_id'])['salary'].transform(max) == players['salary']

In [10]:
# Applied idx to dataframe

players = players[idx]
players.reset_index()

Unnamed: 0,index,player_id,birthDate,draft_year,name,position,college,salary,season_end,season_start,team
0,39,abrinal01,"August 1, 1993",2013.0,Alex Abrines,Shooting Guard,,5994764,2017,2016,Oklahoma City Thunder
1,42,ackeral01,"January 21, 1983",2005.0,Alex Acker,Shooting Guard,Pepperdine University,839209,2007,2006,Detroit Pistons
2,53,acyqu01,"October 6, 1990",2012.0,Quincy Acy,Power Forward and Small Forward,Baylor University,1914544,2017,2016,Brooklyn Nets
3,59,adamsha01,"June 20, 1984",2006.0,Hassan Adams,Shooting Guard,University of Arizona,412718,2007,2006,New Jersey Nets
4,63,adamsjo01,"July 8, 1994",2014.0,Jordan Adams,Shooting Guard,"University of California, Los Angeles",1465080,2017,2016,Memphis Grizzlies
...,...,...,...,...,...,...,...,...,...,...,...
978,14147,zellety01,"January 17, 1990",2012.0,Tyler Zeller,Center,University of North Carolina,8000000,2017,2016,Boston Celtics
979,14157,zimmest01,"September 9, 1996",2016.0,Stephen Zimmerman,Center,"University of Nevada, Las Vegas",950000,2017,2016,Orlando Magic
980,14159,zipsepa01,"February 18, 1994",2016.0,Paul Zipser,Small Forward,,1312611,2018,2017,Chicago Bulls
981,14160,zizican01,"January 4, 1997",2016.0,Ante Zizic,Center,,1645200,2018,2017,Cleveland Cavaliers


In [11]:
players[50:100]

Unnamed: 0,player_id,birthDate,draft_year,name,position,college,salary,season_end,season_start,team
713,balllo01,"October 27, 1997",2017.0,Lonzo Ball,Point Guard,"University of California, Los Angeles",6286560,2018,2017,Los Angeles Lakers
725,banksma01,"November 19, 1981",2003.0,Marcus Banks,Point Guard,"University of Nevada, Las Vegas",4847586,2011,2010,New Orleans Hornets
738,barbole01,"November 28, 1982",2003.0,Leandro Barbosa,Point Guard and Shooting Guard,,7600000,2012,2011,Toronto Raptors
765,bargnan01,"October 26, 1985",2006.0,Andrea Bargnani,Power Forward and Center,,11862500,2014,2013,New York Knicks
784,barkler01,"February 21, 1978",2000.0,Erick Barkley,Point Guard,St. John's University,712200,2001,2000,Portland Trail Blazers
792,barneha02,"May 30, 1992",2012.0,Harrison Barnes,Power Forward and Small Forward,University of North Carolina,23112004,2018,2017,Dallas Mavericks
806,barnema02,"March 9, 1980",2002.0,Matt Barnes,Small Forward and Power Forward,"University of California, Los Angeles",6125000,2017,2016,Sacramento Kings
865,bartowi01,"January 6, 1991",2012.0,Will Barton,Shooting Guard,University of Memphis,3533333,2017,2016,Denver Nuggets
866,bartowi01,"January 6, 1991",2012.0,Will Barton,Shooting Guard,University of Memphis,3533333,2018,2017,Denver Nuggets
877,bassbr01,"April 30, 1985",2005.0,Brandon Bass,Power Forward and Center,Louisiana State University,6900000,2015,2014,Boston Celtics


In [15]:
unique_college= []
for college1 in players.college:
    college1 = college1.split(",")
    if len(college1) > 1:
        college_new = college1[1]
        if len(college_new) > 1:
             print(college_new)
    else:
        college_new = college1[0]
    
    unique_college.append(college_new)

 Los Angeles
 Los Angeles
 California State University
 Oklahoma State University
 Los Angeles
 Los Angeles
 Los Angeles
 Reno
 Los Angeles
 Las Vegas
 Los Angeles
 University of North Dakota
 Las Vegas
 Villanova University
 University of Texas at El Paso
 University of Missouri
 University of Missouri
 Georgia Institute of Technology
 Los Angeles
 Xavier University
 Gonzaga University
 University of Idaho
 University of Houston
 Florida State University
 Fresno
 Long Beach
 Los Angeles
 Reno
 Manhattan College
 Los Angeles
 Syracuse University
 Fresno
 University of New Mexico
 Oklahoma State University
 University of New Mexico
 University of Louisiana at Lafayette
 Louisiana State University
 Brigham Young University
 University of Georgia
 Fresno
 Los Angeles
 Los Angeles
 Los Angeles
 Duke University
 University of Notre Dame
 California State University
 Reno
 University of California
 Syracuse University
 Duke University
 Los Angeles
 Los Angeles
 Los Angeles
 Los Angeles
 Los 

In [16]:
players['college'] = unique_college
merged_duplicates = players[players.duplicated()]
print("Duplicate Rows except first occurrence based on all columns are :")
print(merged_duplicates)

# Select duplicate rows except first occurrence based on all columns
merged_duplicates = players[players.duplicated()]

Duplicate Rows except first occurrence based on all columns are :
Empty DataFrame
Columns: [player_id, birthDate, draft_year, name, position, college, salary, season_end, season_start, team]
Index: []


In [17]:
players['college'] = players['college'].str.replace('Los Angeles','University of California')
players['college'] = players['college'].str.replace('Fresno','University of California')
players['college'] = players['college'].str.replace('Long Beach','University of California')
players['college'] = players['college'].str.replace('Fullerton','University of California')
players['college'] = players['college'].str.replace('Reno','University of Nevada')
players['college'] = players['college'].str.replace('Las Vegas','University of Nevada')
players['college'] = players['college'].str.replace('nan','')

In [18]:
players.drop(players.loc[players["college"] == ""].index, inplace=True)

In [19]:
players = players.dropna(subset=['college'])
players.head(1000)

Unnamed: 0,player_id,birthDate,draft_year,name,position,college,salary,season_end,season_start,team
42,ackeral01,"January 21, 1983",2005.0,Alex Acker,Shooting Guard,Pepperdine University,839209,2007,2006,Detroit Pistons
53,acyqu01,"October 6, 1990",2012.0,Quincy Acy,Power Forward and Small Forward,Baylor University,1914544,2017,2016,Brooklyn Nets
59,adamsha01,"June 20, 1984",2006.0,Hassan Adams,Shooting Guard,University of Arizona,412718,2007,2006,New Jersey Nets
63,adamsjo01,"July 8, 1994",2014.0,Jordan Adams,Shooting Guard,University of California,1465080,2017,2016,Memphis Grizzlies
76,adamsst01,"July 20, 1993",2013.0,Steven Adams,Center,University of Pittsburgh,22471910,2018,2017,Oklahoma City Thunder
...,...,...,...,...,...,...,...,...,...,...
14122,youngsa01,"June 1, 1985",2009.0,Sam Young,Shooting Guard and Small Forward,University of Pittsburgh,947800,2012,2011,Memphis Grizzlies
14134,youngth01,"June 21, 1988",2007.0,Thaddeus Young,Power Forward,Georgia Institute of Technology,14796348,2018,2017,Indiana Pacers
14141,zelleco01,"October 5, 1992",2013.0,Cody Zeller,Center,Indiana University,12584270,2018,2017,Charlotte Hornets
14147,zellety01,"January 17, 1990",2012.0,Tyler Zeller,Center,University of North Carolina,8000000,2017,2016,Boston Celtics


In [20]:
NBADF = pd.DataFrame(players)
NBADF['college']

42                 Pepperdine University
53                     Baylor University
59                 University of Arizona
63              University of California
76              University of Pittsburgh
                      ...               
14122           University of Pittsburgh
14134    Georgia Institute of Technology
14141                 Indiana University
14147       University of North Carolina
14157               University of Nevada
Name: college, Length: 762, dtype: object

In [21]:
NBADF['college'][50:100]

950                          University of Arizona
970                          University of Florida
972                     University of North Dakota
974                       Florida State University
978                        Kansas State University
1019                          University of Oregon
1034                                Boston College
1036                     Saint Joseph's University
1064                          University of Nevada
1085                            Oakland University
1090                            Providence College
1111                        University of Arkansas
1204                      University of Pittsburgh
1205                      University of Pittsburgh
1212                        University of Maryland
1219                         Iowa State University
1254                        University of Kentucky
1288                        University of Kentucky
1318                            University of Utah
1351                         Un

In [22]:
NBADF.head(1000)

Unnamed: 0,player_id,birthDate,draft_year,name,position,college,salary,season_end,season_start,team
42,ackeral01,"January 21, 1983",2005.0,Alex Acker,Shooting Guard,Pepperdine University,839209,2007,2006,Detroit Pistons
53,acyqu01,"October 6, 1990",2012.0,Quincy Acy,Power Forward and Small Forward,Baylor University,1914544,2017,2016,Brooklyn Nets
59,adamsha01,"June 20, 1984",2006.0,Hassan Adams,Shooting Guard,University of Arizona,412718,2007,2006,New Jersey Nets
63,adamsjo01,"July 8, 1994",2014.0,Jordan Adams,Shooting Guard,University of California,1465080,2017,2016,Memphis Grizzlies
76,adamsst01,"July 20, 1993",2013.0,Steven Adams,Center,University of Pittsburgh,22471910,2018,2017,Oklahoma City Thunder
...,...,...,...,...,...,...,...,...,...,...
14122,youngsa01,"June 1, 1985",2009.0,Sam Young,Shooting Guard and Small Forward,University of Pittsburgh,947800,2012,2011,Memphis Grizzlies
14134,youngth01,"June 21, 1988",2007.0,Thaddeus Young,Power Forward,Georgia Institute of Technology,14796348,2018,2017,Indiana Pacers
14141,zelleco01,"October 5, 1992",2013.0,Cody Zeller,Center,Indiana University,12584270,2018,2017,Charlotte Hornets
14147,zellety01,"January 17, 1990",2012.0,Tyler Zeller,Center,University of North Carolina,8000000,2017,2016,Boston Celtics


In [23]:
#list of colleges
colleges = NBADF["college"]

# Build the endpoint URL
NewDF = pd.DataFrame()
for college in colleges:
    target_url = ('https://maps.googleapis.com/maps/api/geocode/json?'
    'address={0}&key={1}').format(college, gkey)
    geo_data = requests.get(target_url).json()
    lat = geo_data["results"][0]["geometry"]["location"]["lat"]
    lng = geo_data["results"][0]["geometry"]["location"]["lng"]
    coll_coord = {"college": college, "lat": lat, "long": lng}
    print(coll_coord)
    NewDF = NewDF.append(coll_coord, ignore_index=True)

    

{'college': 'Pepperdine University', 'lat': 34.0414045, 'long': -118.7095814}
{'college': 'Baylor University', 'lat': 31.5469132, 'long': -97.1210998}
{'college': 'University of Arizona', 'lat': 32.2318851, 'long': -110.9501094}
{'college': ' University of California', 'lat': 37.8718992, 'long': -122.2585399}
{'college': 'University of Pittsburgh', 'lat': 40.4443533, 'long': -79.960835}
{'college': 'University of Kentucky', 'lat': 38.0306511, 'long': -84.5039697}
{'college': ' University of California', 'lat': 37.8718992, 'long': -122.2585399}
{'college': 'Michigan State University', 'lat': 42.701848, 'long': -84.4821719}
{'college': 'Florida State University', 'lat': 30.4418778, 'long': -84.2984889}
{'college': 'University of Kansas', 'lat': 38.9543439, 'long': -95.2557961}
{'college': 'University of Texas at Austin', 'lat': 30.2849185, 'long': -97.7340567}
{'college': ' California State University', 'lat': 34.0667698, 'long': -118.1684392}
{'college': 'West Virginia University', 'lat

{'college': 'University of Arizona', 'lat': 32.2318851, 'long': -110.9501094}
{'college': 'University of North Carolina', 'lat': 35.9049122, 'long': -79.0469134}
{'college': 'University of Michigan', 'lat': 42.2780436, 'long': -83.7382241}
{'college': 'University of Colorado', 'lat': 40.00758099999999, 'long': -105.2659417}
{'college': 'University of Memphis', 'lat': 35.1187498, 'long': -89.9374928}
{'college': 'University of Connecticut', 'lat': 41.8077414, 'long': -72.2539805}
{'college': 'Marquette University', 'lat': 43.0388337, 'long': -87.92856669999999}
{'college': 'La Salle University', 'lat': 40.0386037, 'long': -75.1566354}
{'college': 'University of Florida', 'lat': 29.6436325, 'long': -82.3549302}
{'college': 'University of Georgia', 'lat': 33.9480053, 'long': -83.3773221}
{'college': 'Murray State University', 'lat': 36.6163842, 'long': -88.3214979}
{'college': ' University of Texas at El Paso', 'lat': 31.7709368, 'long': -106.5046405}
{'college': 'Purdue University', 'lat

{'college': 'Western Kentucky University', 'lat': 36.983537, 'long': -86.4573752}
{'college': 'University of Memphis', 'lat': 35.1187498, 'long': -89.9374928}
{'college': 'Duke University', 'lat': 36.0014258, 'long': -78.9382286}
{'college': 'Vanderbilt University', 'lat': 36.1447034, 'long': -86.8026551}
{'college': 'Morehead State University', 'lat': 38.1886162, 'long': -83.4315122}
{'college': ' University of California', 'lat': 37.8718992, 'long': -122.2585399}
{'college': 'Georgia Institute of Technology', 'lat': 33.7756178, 'long': -84.39628499999999}
{'college': ' University of Nevada', 'lat': 39.5441917, 'long': -119.816397}
{'college': 'Arizona State University', 'lat': 33.4242399, 'long': -111.9280527}
{'college': 'University of North Carolina', 'lat': 35.9049122, 'long': -79.0469134}
{'college': 'Stanford University', 'lat': 37.4274745, 'long': -122.169719}
{'college': 'Stanford University', 'lat': 37.4274745, 'long': -122.169719}
{'college': 'Stanford University', 'lat': 37

{'college': 'Butler University', 'lat': 39.8405491, 'long': -86.1708927}
{'college': 'Marquette University', 'lat': 43.0388337, 'long': -87.92856669999999}
{'college': 'University of North Carolina', 'lat': 35.9049122, 'long': -79.0469134}
{'college': 'University of Illinois at Urbana-Champaign', 'lat': 40.1019523, 'long': -88.2271615}
{'college': 'Duke University', 'lat': 36.0014258, 'long': -78.9382286}
{'college': 'University of Kansas', 'lat': 38.9543439, 'long': -95.2557961}
{'college': 'University of North Carolina', 'lat': 35.9049122, 'long': -79.0469134}
{'college': 'Georgetown University', 'lat': 38.9076089, 'long': -77.07225849999999}
{'college': 'North Carolina State University', 'lat': 35.7846633, 'long': -78.6820946}
{'college': 'University of Oklahoma', 'lat': 35.2058936, 'long': -97.4457137}
{'college': 'Indiana University-Purdue University Indianapolis', 'lat': 39.7743174, 'long': -86.1764194}
{'college': 'University of Arizona', 'lat': 32.2318851, 'long': -110.9501094}

{'college': 'University of Kentucky', 'lat': 38.0306511, 'long': -84.5039697}
{'college': 'University of Connecticut', 'lat': 41.8077414, 'long': -72.2539805}
{'college': 'University of California', 'lat': 37.8718992, 'long': -122.2585399}
{'college': 'Purdue University', 'lat': 40.4237054, 'long': -86.92119459999999}
{'college': 'Vanderbilt University', 'lat': 36.1447034, 'long': -86.8026551}
{'college': 'University of Miami', 'lat': 25.7178924, 'long': -80.2746368}
{'college': 'University of Massachusetts Amherst', 'lat': 42.3867598, 'long': -72.5300515}
{'college': ' University of California', 'lat': 37.8718992, 'long': -122.2585399}
{'college': 'Texas A&M University', 'lat': 30.6090918, 'long': -96.3337124}
{'college': 'Georgia Institute of Technology', 'lat': 33.7756178, 'long': -84.39628499999999}
{'college': 'University of North Carolina', 'lat': 35.9049122, 'long': -79.0469134}
{'college': 'University of Maryland', 'lat': 38.9869183, 'long': -76.9425543}
{'college': ' Universit

{'college': ' Shaw University', 'lat': 35.771043, 'long': -78.6379262}
{'college': 'Bucknell University', 'lat': 40.9547722, 'long': -76.88507589999999}
{'college': 'Rutgers University', 'lat': 40.5008186, 'long': -74.44739910000001}
{'college': 'Auburn University', 'lat': 32.5933574, 'long': -85.4951663}
{'college': ' Iowa State University', 'lat': 42.0266187, 'long': -93.6464654}
{'college': 'University of Oklahoma', 'lat': 35.2058936, 'long': -97.4457137}
{'college': 'University of Wyoming', 'lat': 41.3148754, 'long': -105.5665744}
{'college': 'University of Connecticut', 'lat': 41.8077414, 'long': -72.2539805}
{'college': "Saint Joseph's University", 'lat': 39.9951217, 'long': -75.23993279999999}
{'college': "Saint Joseph's University", 'lat': 39.9951217, 'long': -75.23993279999999}
{'college': 'Syracuse University', 'lat': 43.0391534, 'long': -76.1351158}
{'college': 'St. Bonaventure University', 'lat': 42.0794875, 'long': -78.48427029999999}
{'college': 'University of Florida', '

{'college': 'New Mexico State University', 'lat': 32.2787745, 'long': -106.7479059}
{'college': 'University of Kansas', 'lat': 38.9543439, 'long': -95.2557961}
{'college': 'Louisiana State University', 'lat': 30.4132579, 'long': -91.1800023}
{'college': 'DePaul University', 'lat': 41.9247559, 'long': -87.6566465}
{'college': 'North Carolina State University', 'lat': 35.7846633, 'long': -78.6820946}
{'college': 'Duke University', 'lat': 36.0014258, 'long': -78.9382286}
{'college': 'University of Virginia', 'lat': 38.0335529, 'long': -78.5079772}
{'college': 'Florida State University', 'lat': 30.4418778, 'long': -84.2984889}
{'college': 'University of Louisville', 'lat': 38.2122761, 'long': -85.75850229999999}
{'college': 'Marshall University', 'lat': 38.4235252, 'long': -82.42641449999999}
{'college': 'Oklahoma State University', 'lat': 36.1270236, 'long': -97.07372219999999}
{'college': 'Boston College', 'lat': 42.3355488, 'long': -71.16849450000001}
{'college': 'Boston College', 'lat'

{'college': ' University of California', 'lat': 37.8718992, 'long': -122.2585399}
{'college': ' University of Cincinnati', 'lat': 39.1329219, 'long': -84.51495039999999}
{'college': 'Indiana University', 'lat': 39.1754487, 'long': -86.512627}
{'college': 'University of Colorado', 'lat': 40.00758099999999, 'long': -105.2659417}
{'college': ' University of Cincinnati', 'lat': 39.1329219, 'long': -84.51495039999999}
{'college': 'University of North Carolina at Charlotte', 'lat': 35.3070929, 'long': -80.735164}
{'college': 'Iowa State University', 'lat': 42.0266187, 'long': -93.6464654}
{'college': 'Seton Hall University', 'lat': 40.7433773, 'long': -74.2465446}
{'college': 'Marshall University', 'lat': 38.4235252, 'long': -82.42641449999999}
{'college': 'University of Kansas', 'lat': 38.9543439, 'long': -95.2557961}
{'college': 'University of Maryland', 'lat': 38.9869183, 'long': -76.9425543}
{'college': 'University of Illinois at Urbana-Champaign', 'lat': 40.1019523, 'long': -88.2271615}

In [24]:
NewDF['college'] = NewDF.replace(" ", "")

In [25]:
NewDF.head(1000)

Unnamed: 0,college,lat,long
0,Pepperdine University,34.041404,-118.709581
1,Baylor University,31.546913,-97.121100
2,University of Arizona,32.231885,-110.950109
3,University of California,37.871899,-122.258540
4,University of Pittsburgh,40.444353,-79.960835
...,...,...,...
757,University of Pittsburgh,40.444353,-79.960835
758,Georgia Institute of Technology,33.775618,-84.396285
759,Indiana University,39.175449,-86.512627
760,University of North Carolina,35.904912,-79.046913


In [26]:
merged = pd.merge(players, NewDF, left_on = 'college', right_on = 'college', how = 'left')

In [27]:
merged = merged.drop_duplicates(['player_id'], keep = "last")

In [28]:
new_position= []
for position in merged.position:
    position = position.split(" ")
    if len(position) > 1:
        position_clear = position[0] + " " + position[1]
        
    else:
        position_clear = position[0]
    new_position.append(position_clear)  

In [29]:
merged['position'] = new_position
merged_duplicates = merged[merged.duplicated()]
print("Duplicate Rows except first occurrence based on all columns are :")
print(merged_duplicates)

# Select duplicate rows except first occurrence based on all columns
merged_duplicates = merged[merged.duplicated()]

Duplicate Rows except first occurrence based on all columns are :
Empty DataFrame
Columns: [player_id, birthDate, draft_year, name, position, college, salary, season_end, season_start, team, lat, long]
Index: []


In [30]:
merged['position'].replace('Center and','Center',inplace=True)

In [31]:
college_sp = merged["college"]
college_stri = []
for college in college_sp:
    college_stri.append(college.strip())

In [32]:
merged["college"] = college_stri

In [33]:
merged = merged.set_index('player_id')

In [34]:
merged = merged.dropna()

In [35]:
# Export to csv

merged.to_csv('NBAdataset.csv')

In [None]:
# Export to json
merged.reset_index().to_json('NBAdataJSON.json', orient = 'records')

In [None]:
# SQL upload

database_username = input('ENTER USERNAME')
database_password = input('ENTER PASSWORD')
database_name     = 'NBAdataset'
database_connection = sqlalchemy.create_engine('postgresql://{0}:{1}@localhost:5432/{2}'.format(database_username, database_password, database_name))
merged.to_sql(con=database_connection, name='NBAdataset', if_exists='append')

**----------END OF CODE----------**