# How Player Height Affects the NBA
### Analysis Team
*Lindsay Reynolds, Nick Sheets*

In [1]:
%matplotlib notebook

In [2]:
# Set up dependencies and read in csv files needed
import os
import pandas as pd

import matplotlib.pyplot as plt
import numpy as np

from scipy.stats import stats
from scipy.stats import linregress

In [3]:
# Read in csv file including rows for each season for each player (from analysis notebook)
cleaned_player_stats_breakout_path = os.path.join("Output_files", "cleaned_player_stats_breakout.csv")

# To display data consistently
pd.options.display.float_format = "{:.2f}".format

cleaned_nba_stats_breakout_df = pd.read_csv(cleaned_player_stats_breakout_path).drop(['Unnamed: 0'], axis=1)
cleaned_nba_stats_breakout_df

Unnamed: 0,season,player_name,height (in),pts,reb,ast,college,country,team
0,2016-17,AJ Hammons,84.00,2.20,1.60,0.20,Purdue,USA,DAL
1,2017-18,Aaron Brooks,72.00,2.30,0.50,0.60,Oregon,USA,MIN
2,2016-17,Aaron Brooks,72.00,5.00,1.10,1.90,Oregon,USA,IND
3,2018-19,Aaron Gordon,81.00,16.00,7.40,3.70,Arizona,USA,ORL
4,2016-17,Aaron Gordon,81.00,12.70,5.10,1.90,Arizona,USA,ORL
...,...,...,...,...,...,...,...,...,...
1551,2017-18,Zaza Pachulia,83.00,5.40,4.70,1.60,,Georgia,GSW
1552,2018-19,Zaza Pachulia,83.00,3.90,3.90,1.30,,Georgia,DET
1553,2018-19,Zhaire Smith,76.00,6.70,2.20,1.70,Texas Tech,USA,PHI
1554,2017-18,Zhou Qi,85.00,1.20,1.20,0.10,,China,HOU


In [4]:
# Read in csv file including average of seasons for each player (from analysis notebook)
cleaned_player_stats_path = os.path.join("Output_files", "cleaned_player_stats.csv")
cleaned_nba_stats_df = pd.read_csv(cleaned_player_stats_path).drop(['Unnamed: 0'], axis=1)
cleaned_nba_stats_df

Unnamed: 0,player_name,height (in),pts,reb,ast,college,country,team,Height Range
0,AJ Hammons,84.00,2.20,1.60,0.20,Purdue,USA,DAL,"6'10"" - 7'"
1,Aaron Brooks,72.00,3.65,0.80,1.25,Oregon,USA,MIN,<= 6ft
2,Aaron Gordon,81.00,15.43,6.80,2.63,Arizona,USA,ORL,"6'7"" - 6'9"""
3,Aaron Harrison,78.00,3.45,1.65,0.90,Kentucky,USA,CHA,"6'4"" - 6'6"""
4,Aaron Holiday,73.00,5.90,1.30,1.70,UCLA,USA,IND,"6'1"" - 6'3"""
...,...,...,...,...,...,...,...,...,...
743,Zach Lofton,76.00,0.00,0.00,0.00,New Mexico State,USA,DET,"6'4"" - 6'6"""
744,Zach Randolph,81.00,14.30,7.45,1.95,Michigan State,USA,SAC,"6'7"" - 6'9"""
745,Zaza Pachulia,83.00,5.13,4.83,1.60,,Georgia,GSW,"6'10"" - 7'"
746,Zhaire Smith,76.00,6.70,2.20,1.70,Texas Tech,USA,PHI,"6'4"" - 6'6"""


In [5]:
# Read in csv file including salaries for each player (from analysis notebook)
cleaned_player_salaries_path = os.path.join("Output_files", "cleaned_player_salaries.csv")
cleaned_nba_stats_with_salaries_df = pd.read_csv(cleaned_player_salaries_path).drop(['Unnamed: 0'], axis=1)
cleaned_nba_stats_with_salaries_df

Unnamed: 0,player_name,height (in),pts,reb,ast,college,country,team,2020-21 Salary ($),2020-21 Salary (Millions),Salary Range
0,Aaron Gordon,81.00,15.43,6.80,2.63,Arizona,USA,ORL,18136364.00,18.14,15M to 19.9M
1,Aaron Holiday,73.00,5.90,1.30,1.70,UCLA,USA,IND,2345640.00,2.35,1M to 2.4M
2,Abdel Nader,78.00,3.50,1.70,0.40,Iowa State,Egypt,BOS,1752950.00,1.75,1M to 2.4M
3,Al Horford,82.00,13.50,6.97,4.63,Florida,Dominican Republic,BOS,27500000.00,27.50,> 20M
4,Al-Farouq Aminu,81.00,9.13,7.50,1.37,Wake Forest,USA,POR,9720900.00,9.72,7.5M to 9.9M
...,...,...,...,...,...,...,...,...,...,...,...
319,Willie Cauley-Stein,84.00,10.93,6.63,1.97,Kentucky,USA,SAC,4000000.00,4.00,2.5M to 4.9M
320,Yuta Watanabe,81.00,2.60,2.10,0.50,George Washington,Japan,MEM,321893.00,0.32,< 1M
321,Zach Collins,84.00,5.50,3.75,0.85,Gonzaga,USA,POR,5406255.00,5.41,5M to 7.4M
322,Zach LaVine,77.00,19.77,4.00,3.50,UCLA,USA,MIN,19500000.00,19.50,15M to 19.9M


In [6]:
# Read in stats csv file for averages per player across the three seasons (from data cleanup notebook)
player_stats_path = os.path.join("Output_files_for_analysis", "player_stats.csv")
nba_stats_df = pd.read_csv(player_stats_path).drop(['Unnamed: 0'], axis=1)

# To display data consistently
pd.options.display.float_format = "{:.2f}".format

nba_stats_df

Unnamed: 0,player_name,height (in),pts,reb,ast,college,country,team,Height Range
0,AJ Hammons,84.00,2.20,1.60,0.20,Purdue,USA,DAL,"6'10"" - 7'"
1,Aaron Brooks,72.00,3.65,0.80,1.25,Oregon,USA,MIN,<= 6ft
2,Aaron Gordon,81.00,15.43,6.80,2.63,Arizona,USA,ORL,"6'7"" - 6'9"""
3,Aaron Harrison,78.00,3.45,1.65,0.90,Kentucky,USA,CHA,"6'4"" - 6'6"""
4,Aaron Holiday,73.00,5.90,1.30,1.70,UCLA,USA,IND,"6'1"" - 6'3"""
...,...,...,...,...,...,...,...,...,...
743,Zach Lofton,76.00,0.00,0.00,0.00,New Mexico State,USA,DET,"6'4"" - 6'6"""
744,Zach Randolph,81.00,14.30,7.45,1.95,Michigan State,USA,SAC,"6'7"" - 6'9"""
745,Zaza Pachulia,83.00,5.13,4.83,1.60,,Georgia,GSW,"6'10"" - 7'"
746,Zhaire Smith,76.00,6.70,2.20,1.70,Texas Tech,USA,PHI,"6'4"" - 6'6"""


### Define a function to allow user input for searching for stats 
#### Search by Player, by College or by Country

### Allow user input for search by player, college or country
#### Player search can be for multi-year stats or for average  
  * Choose All or Average
    * All will return a row for each season of data from the analysis
    * Average will return the average across those seasons played
    * Seasons in the data frames include only 2016-17, 2017-18 and 2018-19
  
#### College search returns data frame rows for players who attended the college entered
  * Choose All or Average 
    * All will return a row for each player and each season of data from the stats analysis
    * Average will return the average for each player across those seasons played
    * Will return up to 50 rows of results
    
#### Country search returns data frame rows for players listed for the country entered
  * Choose All or Aveage 
    * All will return a row for each player and each season of data from the stats analysis
    * Average will return the average for each player across those seasons played
    * Will return up to 50 rows of results
    * There are 50 total players not listed as USA in our data   

In [7]:
def choice_made(request): 
    
    user_choice = input("What type of search would you like to begin (enter TYPE of search: Player, College, or Country)? ")
    if (user_choice == "Player"):
        search_for_player = input("Great choice! What player are you searching for (enter first and last name please)? ")
        type_of_stats= input("Okay, would you like to see multi-year stats or an average across the three seasons analyzed (enter All or Average)? ")
        if type_of_stats == "All":
            result_of_search = cleaned_nba_stats_breakout_df[cleaned_nba_stats_breakout_df["player_name"] == search_for_player]
            print(f"Here are the stats from our analysis for your player, {search_for_player}.")
            return result_of_search.head(3)
        elif type_of_stats == "Average": 
            result_of_search_average = cleaned_nba_stats_df[cleaned_nba_stats_df["player_name"] == search_for_player]
            print(f"Here are the averaged stats from our analysis for your player, {search_for_player}.")
            return result_of_search_average.head(3)
        #else:
         #    print("There are no stats to display for this player.")

    elif (user_choice == "College"):
        search_for_college = input("Great choice! What college would you like to search for? ")
        type_of_stats_college = input("Okay, would you like to see multi-year stats or an average across the three seasons analyzed (enter All or Average)? ")
        if type_of_stats_college == "All":
            result_of_search_college = cleaned_nba_stats_breakout_df[cleaned_nba_stats_breakout_df["college"] == search_for_college]
            print(f"Here are the stats for players who went to the college from your search, {search_for_college}. (up to 50 results)")
            return result_of_search_college.head(50)
        elif type_of_stats_college == "Average":
            result_of_search_college_avg = cleaned_nba_stats_df[cleaned_nba_stats_df["college"] == search_for_college]
            print(f"Here are the average stats for players who went to the college from your search, {search_for_college}. (up to 50 results)")
            return result_of_search_college_avg.head(50)
    elif (user_choice == "Country"):
        search_for_country = input("Great choice! What country would you like to search for? ")
        type_of_stats_country = input("Okay, would you like to see multi-year stats or an average across the three seasons analyzed (enter All or Average)? ")
        if type_of_stats_country == "All":
            result_of_search_country = cleaned_nba_stats_breakout_df[cleaned_nba_stats_breakout_df["country"] == search_for_country]
            print(f"Here are the stats for players from the country of your search, {search_for_country}. (up to 50 results)")
            return result_of_search_country.head(50)
        elif type_of_stats_country == "Average":
            result_of_search_country_avg = cleaned_nba_stats_df[cleaned_nba_stats_df["country"] == search_for_country]
            print(f"Here are the average stats for players from the country of your search,, {search_for_country}. (up to 50 results)")
            return result_of_search_country_avg.head(50)
            
            # End of function set-up

In [9]:
# Allow for user entry of Player, College or Country by running this cell and following input directions
choice_made("user_choice")

# Hit enter for each input box if want to escape (error in input will also escape)

What type of search would you like to begin (enter TYPE of search: Player, College, or Country)? College
Great choice! What college would you like to search for? Butler
Okay, would you like to see multi-year stats or an average across the three seasons analyzed (enter All or Average)? All
Here are the stats for players who went to the college from your search, Butler. (up to 50 results)


Unnamed: 0,season,player_name,height (in),pts,reb,ast,college,country,team
522,2017-18,Gordon Hayward,80.0,2.0,1.0,0.0,Butler,USA,BOS
523,2016-17,Gordon Hayward,80.0,21.9,5.4,3.5,Butler,USA,UTA
524,2018-19,Gordon Hayward,80.0,11.5,4.5,3.4,Butler,USA,BOS
1326,2016-17,Shelvin Mack,75.0,7.8,2.3,2.8,Butler,USA,UTA
1327,2017-18,Shelvin Mack,75.0,6.9,2.4,3.9,Butler,USA,ORL
1328,2018-19,Shelvin Mack,75.0,7.5,1.8,3.2,Butler,USA,CHA
