In [11]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
import hvplot.pandas
from pathlib import Path

In [12]:
# Pull NBA draft CSV
draft_path = Path("Resources/cleaned_draft_data.csv")
draft_data = pd.read_csv(draft_path)
draft_data.head()

Unnamed: 0,_id,Name,Month,Day,Year,Position,College,Draft Year,Draft Round,Draft Pick,Career Games Played,Career Points Per Game,Total Career Points,Birth City,Birth State/Country
0,abdelal01,Alaa Abdelnaby,June,24,1968,Power Forward,Duke University,1990,1st round,25th overall,256,5.7,1459,Cairo,Egypt
1,abdulma02,Mahmoud Abdul-Rauf,March,9,1969,Point Guard,Louisiana State University,1990,1st round,3rd overall,586,14.6,8555,Gulfport,Mississippi
2,abdulta01,Tariq Abdul-Wahad,November,3,1974,Shooting Guard,"University of Michigan, San Jose State University",1997,1st round,11th overall,236,7.8,1840,Maisons Alfort,France
3,abdursh01,Shareef Abdur-Rahim,December,11,1976,Center and Small Forward and Power Forward,University of California,1996,1st round,3rd overall,830,18.1,15023,Marietta,Georgia
4,abrinal01,Alex Abrines,August,1,1993,Shooting Guard,No college,2013,2nd round,32nd overall,174,5.3,922,Palma de Mallorca,Spain


In [13]:
# Pull NBA salary CSV
salaries_path = Path("Resources/cleaned_salary_data.csv")
salaries = pd.read_csv(salaries_path)
salaries.head()

Unnamed: 0,_id,Total Earnings
0,abdelal01,2844000
1,abdulka01,8560000
2,abdulma02,19849500
3,abdulta01,37982800
4,abdursh01,95866000


In [14]:
# Merge data
full_data = pd.merge(draft_data, salaries, on="_id")
full_data.head()

Unnamed: 0,_id,Name,Month,Day,Year,Position,College,Draft Year,Draft Round,Draft Pick,Career Games Played,Career Points Per Game,Total Career Points,Birth City,Birth State/Country,Total Earnings
0,abdelal01,Alaa Abdelnaby,June,24,1968,Power Forward,Duke University,1990,1st round,25th overall,256,5.7,1459,Cairo,Egypt,2844000
1,abdulma02,Mahmoud Abdul-Rauf,March,9,1969,Point Guard,Louisiana State University,1990,1st round,3rd overall,586,14.6,8555,Gulfport,Mississippi,19849500
2,abdulta01,Tariq Abdul-Wahad,November,3,1974,Shooting Guard,"University of Michigan, San Jose State University",1997,1st round,11th overall,236,7.8,1840,Maisons Alfort,France,37982800
3,abdursh01,Shareef Abdur-Rahim,December,11,1976,Center and Small Forward and Power Forward,University of California,1996,1st round,3rd overall,830,18.1,15023,Marietta,Georgia,95866000
4,abrinal01,Alex Abrines,August,1,1993,Shooting Guard,No college,2013,2nd round,32nd overall,174,5.3,922,Palma de Mallorca,Spain,11719764


In [15]:
# Print total number of rows
full_data.count()

_id                       1535
Name                      1535
Month                     1535
Day                       1535
Year                      1535
Position                  1535
College                   1535
Draft Year                1535
Draft Round               1535
Draft Pick                1535
Career Games Played       1535
Career Points Per Game    1535
Total Career Points       1535
Birth City                1535
Birth State/Country       1532
Total Earnings            1535
dtype: int64

# What university produces the most NBA players? Highest career earnings? Most #1 draft picks

In [16]:
uni_data = full_data.groupby("College")
most_players = uni_data.size().sort_values(ascending=False).to_frame().rename(columns={0: "Number of players produced"})
display(most_players.head())

Unnamed: 0_level_0,Number of players produced
College,Unnamed: 1_level_1
No college,210
University of Kentucky,44
Duke University,42
"University of California, Los Angeles",40
University of North Carolina,38


# What birth month produces the most NBA players?  Most #1 draft picks?

In [17]:
birth_months_grouped = full_data.groupby(["Month"]).size()
birth_months_grouped

Month
April        120
August       110
December     108
February     156
January      118
July         126
June         128
March        144
May          143
November     119
October      128
September    135
dtype: int64

In [19]:
most_first_overall = full_data[full_data["Draft Pick"] == "1st overall"]
first_overall_grouped = most_first_overall.groupby(["Month"]).size()
first_overall_grouped

Month
April        3
August       1
December     3
February     1
January      2
July         2
June         2
March        9
May          2
November     2
October      3
September    2
dtype: int64