# Goal:
Perform data carpentry on the nba player career stats dataset and load into the database

Import libraries for data acquisition, manipulation, and database loading

In [3]:
import pandas as pd
import getpass
import numpy as np
import time
from datetime import datetime
import psycopg2
from psycopg2.extensions import adapt, register_adapter, AsIs
from sportsreference.nba.teams import Teams
from sportsreference.nba.roster import Roster, Player
from sportsreference.nba.boxscore import Boxscore, Boxscores, BoxscorePlayer, AbstractPlayer
from sportsreference.nba.schedule import Schedule, Game
pd.set_option('display.max_columns', 500)

Read data from csv into the dataframe

In [4]:
player_stats = pd.read_csv('../Data/nba_player_career_stats.csv')

The three cells below give us a snapshot of the player stats dataframe

In [5]:
player_stats.shape

(22878, 91)

In [6]:
player_stats.head()

Unnamed: 0,season,and_ones,assist_percentage,assists,block_percentage,blocking_fouls,blocks,box_plus_minus,center_percentage,defensive_box_plus_minus,defensive_rebound_percentage,defensive_rebounds,defensive_win_shares,dunks,effective_field_goal_percentage,field_goal_attempts,field_goal_perc_sixteen_foot_plus_two_pointers,field_goal_perc_ten_to_sixteen_feet,field_goal_perc_three_to_ten_feet,field_goal_perc_zero_to_three_feet,field_goal_percentage,field_goals,free_throw_attempt_rate,free_throw_attempts,free_throw_percentage,free_throws,games_played,games_started,half_court_heaves,half_court_heaves_made,height,lost_ball_turnovers,minutes_played,nationality,net_plus_minus,offensive_box_plus_minus,offensive_fouls,offensive_rebound_percentage,offensive_rebounds,offensive_win_shares,on_court_plus_minus,other_turnovers,passing_turnovers,percentage_field_goals_as_dunks,percentage_of_three_pointers_from_corner,percentage_shots_three_pointers,percentage_shots_two_pointers,percentage_sixteen_foot_plus_two_pointers,percentage_ten_to_sixteen_footers,percentage_three_to_ten_footers,percentage_zero_to_three_footers,personal_fouls,player_efficiency_rating,player_id,point_guard_percentage,points,points_generated_by_assists,position,power_forward_percentage,salary,shooting_distance,shooting_fouls,shooting_fouls_drawn,shooting_guard_percentage,shots_blocked,small_forward_percentage,steal_percentage,steals,take_fouls,team_abbreviation,three_point_attempt_rate,three_point_attempts,three_point_percentage,three_point_shot_percentage_from_corner,three_pointers,three_pointers_assisted_percentage,total_rebound_percentage,total_rebounds,true_shooting_percentage,turnover_percentage,turnovers,two_point_attempts,two_point_percentage,two_pointers,two_pointers_assisted_percentage,usage_percentage,value_over_replacement_player,weight,win_shares,win_shares_per_48_minutes,player_name
0,1976,,10.4,4.0,0.0,,0.0,-12.2,0,-6.2,11.4,7.0,0.0,,,46.0,,,,,0.37,17.0,0.065,3.0,1.0,3.0,13.0,,,,6-2,,64.0,United States of America,,-6.0,,1.6,1.0,-0.2,,,,,,,,,,,,18.0,7.3,mayfike01,0,37.0,,SG,0,,,,,0,,0,0.0,0.0,,NYK,,,,,,,6.4,8.0,0.391,,,,,,,,-0.2,185,-0.2,-0.134,Ken Mayfield
1,2006,,5.2,63.0,0.9,,24.0,-3.7,0,-1.9,16.5,261.0,0.7,47.0,0.455,531.0,0.395,0.314,0.152,0.61,0.443,235.0,0.476,253.0,0.747,189.0,79.0,7.0,0.0,0.0,6-9,,1952.0,United States of America,,-1.7,,7.3,122.0,2.1,,,,0.1,0.208,0.1,0.9,0.386,0.066,0.062,0.386,227.0,12.2,willima02,0,672.0,,PF,0,3883560.0,11.3,,,0,,0,1.3,48.0,,ATL,0.1,53.0,0.245,0.0,13.0,0.923,11.8,383.0,0.523,11.4,83.0,478.0,0.464,222.0,0.725,16.9,-0.8,237,2.8,0.069,Marvin Williams
2,2007,,9.7,121.0,1.1,,30.0,-2.7,0,-0.7,14.2,253.0,1.4,25.0,0.441,706.0,0.427,0.37,0.484,0.485,0.433,306.0,0.375,265.0,0.815,216.0,64.0,63.0,0.0,0.0,6-9,,2179.0,United States of America,,-2.0,,4.6,84.0,0.9,,,,0.044,0.222,0.064,0.936,0.477,0.076,0.091,0.292,191.0,12.2,willima02,0,839.0,,PF,0,4174800.0,12.5,,,0,,0,1.3,52.0,,ATL,0.064,45.0,0.244,0.3,11.0,0.909,9.3,337.0,0.51,13.4,127.0,661.0,0.446,295.0,0.756,20.0,-0.4,237,2.3,0.051,Marvin Williams
3,2008,,8.3,137.0,0.9,,33.0,-1.6,0,-0.6,14.2,338.0,2.1,72.0,0.462,918.0,0.454,0.288,0.393,0.586,0.462,424.0,0.446,409.0,0.822,336.0,80.0,80.0,1.0,0.0,6-9,,2765.0,United States of America,,-1.0,,5.0,119.0,3.6,,,,0.087,0.5,0.011,0.989,0.475,0.113,0.117,0.284,225.0,14.5,willima02,0,1185.0,,PF,0,4466040.0,11.6,,,0,,0,1.5,81.0,,ATL,0.011,10.0,0.1,0.0,1.0,1.0,9.6,457.0,0.54,10.4,127.0,908.0,0.466,423.0,0.754,20.1,0.3,237,5.7,0.099,Marvin Williams
4,2009,,6.3,81.0,1.4,,39.0,0.9,0,0.1,15.2,272.0,2.2,33.0,0.502,622.0,0.443,0.311,0.504,0.58,0.458,285.0,0.439,273.0,0.806,220.0,61.0,59.0,1.0,0.0,6-9,,2093.0,United States of America,,0.8,,6.2,110.0,3.9,,,,0.058,0.49,0.249,0.751,0.185,0.098,0.188,0.28,127.0,16.0,willima02,0,845.0,,PF,0,5636143.0,12.4,,,0,,0,1.5,57.0,,ATL,0.249,155.0,0.355,0.368,55.0,0.964,10.7,382.0,0.569,8.6,70.0,467.0,0.493,230.0,0.613,18.2,1.5,237,6.1,0.14,Marvin Williams


In [7]:
player_stats.tail()

Unnamed: 0,season,and_ones,assist_percentage,assists,block_percentage,blocking_fouls,blocks,box_plus_minus,center_percentage,defensive_box_plus_minus,defensive_rebound_percentage,defensive_rebounds,defensive_win_shares,dunks,effective_field_goal_percentage,field_goal_attempts,field_goal_perc_sixteen_foot_plus_two_pointers,field_goal_perc_ten_to_sixteen_feet,field_goal_perc_three_to_ten_feet,field_goal_perc_zero_to_three_feet,field_goal_percentage,field_goals,free_throw_attempt_rate,free_throw_attempts,free_throw_percentage,free_throws,games_played,games_started,half_court_heaves,half_court_heaves_made,height,lost_ball_turnovers,minutes_played,nationality,net_plus_minus,offensive_box_plus_minus,offensive_fouls,offensive_rebound_percentage,offensive_rebounds,offensive_win_shares,on_court_plus_minus,other_turnovers,passing_turnovers,percentage_field_goals_as_dunks,percentage_of_three_pointers_from_corner,percentage_shots_three_pointers,percentage_shots_two_pointers,percentage_sixteen_foot_plus_two_pointers,percentage_ten_to_sixteen_footers,percentage_three_to_ten_footers,percentage_zero_to_three_footers,personal_fouls,player_efficiency_rating,player_id,point_guard_percentage,points,points_generated_by_assists,position,power_forward_percentage,salary,shooting_distance,shooting_fouls,shooting_fouls_drawn,shooting_guard_percentage,shots_blocked,small_forward_percentage,steal_percentage,steals,take_fouls,team_abbreviation,three_point_attempt_rate,three_point_attempts,three_point_percentage,three_point_shot_percentage_from_corner,three_pointers,three_pointers_assisted_percentage,total_rebound_percentage,total_rebounds,true_shooting_percentage,turnover_percentage,turnovers,two_point_attempts,two_point_percentage,two_pointers,two_pointers_assisted_percentage,usage_percentage,value_over_replacement_player,weight,win_shares,win_shares_per_48_minutes,player_name
22873,1978,,11.3,131.0,2.8,,81.0,1.9,0,1.9,21.0,312.0,2.0,,0.434,525.0,,,,,0.434,228.0,0.472,248.0,0.794,197.0,76.0,,,,7-2,,1525.0,United States of America,,-0.1,,11.4,170.0,2.2,,,,,,,,,,,,259.0,16.5,burleto01,0,653.0,,C,0,,,,,0,,0,1.8,62.0,,KCK,,,,,,,16.2,482.0,0.515,16.2,123.0,525.0,0.434,228.0,,18.9,1.5,225,4.3,0.134,Tom Burleson
22874,1979,,6.9,50.0,3.5,,58.0,-0.3,0,0.8,23.7,197.0,1.6,,0.459,342.0,,,,,0.459,157.0,0.494,169.0,0.716,121.0,56.0,,,,7-2,,927.0,United States of America,,-1.0,,9.6,84.0,1.1,,,,,,,,,,,,183.0,15.3,burleto01,0,435.0,,C,0,,,,,0,,0,1.3,26.0,,KCK,,,,,,,16.5,281.0,0.522,13.3,64.0,342.0,0.459,157.0,,19.9,0.4,225,2.7,0.14,Tom Burleson
22875,1980,,9.5,20.0,2.8,,13.0,-5.0,0,0.2,20.0,49.0,0.5,,0.346,104.0,,,,,0.346,36.0,0.385,40.0,0.575,23.0,37.0,,,,7-2,,272.0,United States of America,,-5.2,,8.7,23.0,-0.6,,,,,,,,,,,,49.0,7.8,burleto01,0,95.0,,C,0,,,,,0,,0,1.4,8.0,,KCK,0.029,3.0,0.0,,0.0,,14.2,72.0,0.391,17.1,25.0,101.0,0.356,36.0,,21.5,-0.2,225,-0.1,-0.012,Tom Burleson
22876,1981,,4.6,12.0,3.1,,19.0,-3.6,0,0.4,16.0,50.0,0.3,,0.414,99.0,,,,,0.414,41.0,0.414,41.0,0.488,20.0,31.0,,,,7-2,,363.0,United States of America,,-4.0,,13.7,44.0,-0.2,,,,,,,,,,,,73.0,8.5,burleto01,0,102.0,,C,0,,,,,0,,0,1.1,8.0,,ATL,0.0,0.0,,,0.0,,14.9,94.0,0.436,17.0,24.0,99.0,0.414,41.0,,16.1,-0.1,225,0.2,0.021,Tom Burleson
22877,1950,,,3.0,,,,,0,,,,0.0,,,6.0,,,,,0.167,1.0,0.333,2.0,1.0,2.0,2.0,,,,6-2,,,United States of America,,,,,,0.0,,,,,,,,,,,,3.0,,gratedo01,0,4.0,,,0,,,,,0,,0,,,,SHE,,,,,,,,,0.291,,,,,,,,,185,0.0,,Don Grate


Calculate a new column called `player_season_number` that tracks what season the player is playing in (with respect to their career)

In [8]:
player_stats['player_season_number'] = player_stats.groupby(['player_id']).cumcount() + 1

View the addition of the new column

In [9]:
player_stats.head()

Unnamed: 0,season,and_ones,assist_percentage,assists,block_percentage,blocking_fouls,blocks,box_plus_minus,center_percentage,defensive_box_plus_minus,defensive_rebound_percentage,defensive_rebounds,defensive_win_shares,dunks,effective_field_goal_percentage,field_goal_attempts,field_goal_perc_sixteen_foot_plus_two_pointers,field_goal_perc_ten_to_sixteen_feet,field_goal_perc_three_to_ten_feet,field_goal_perc_zero_to_three_feet,field_goal_percentage,field_goals,free_throw_attempt_rate,free_throw_attempts,free_throw_percentage,free_throws,games_played,games_started,half_court_heaves,half_court_heaves_made,height,lost_ball_turnovers,minutes_played,nationality,net_plus_minus,offensive_box_plus_minus,offensive_fouls,offensive_rebound_percentage,offensive_rebounds,offensive_win_shares,on_court_plus_minus,other_turnovers,passing_turnovers,percentage_field_goals_as_dunks,percentage_of_three_pointers_from_corner,percentage_shots_three_pointers,percentage_shots_two_pointers,percentage_sixteen_foot_plus_two_pointers,percentage_ten_to_sixteen_footers,percentage_three_to_ten_footers,percentage_zero_to_three_footers,personal_fouls,player_efficiency_rating,player_id,point_guard_percentage,points,points_generated_by_assists,position,power_forward_percentage,salary,shooting_distance,shooting_fouls,shooting_fouls_drawn,shooting_guard_percentage,shots_blocked,small_forward_percentage,steal_percentage,steals,take_fouls,team_abbreviation,three_point_attempt_rate,three_point_attempts,three_point_percentage,three_point_shot_percentage_from_corner,three_pointers,three_pointers_assisted_percentage,total_rebound_percentage,total_rebounds,true_shooting_percentage,turnover_percentage,turnovers,two_point_attempts,two_point_percentage,two_pointers,two_pointers_assisted_percentage,usage_percentage,value_over_replacement_player,weight,win_shares,win_shares_per_48_minutes,player_name,player_season_number
0,1976,,10.4,4.0,0.0,,0.0,-12.2,0,-6.2,11.4,7.0,0.0,,,46.0,,,,,0.37,17.0,0.065,3.0,1.0,3.0,13.0,,,,6-2,,64.0,United States of America,,-6.0,,1.6,1.0,-0.2,,,,,,,,,,,,18.0,7.3,mayfike01,0,37.0,,SG,0,,,,,0,,0,0.0,0.0,,NYK,,,,,,,6.4,8.0,0.391,,,,,,,,-0.2,185,-0.2,-0.134,Ken Mayfield,1
1,2006,,5.2,63.0,0.9,,24.0,-3.7,0,-1.9,16.5,261.0,0.7,47.0,0.455,531.0,0.395,0.314,0.152,0.61,0.443,235.0,0.476,253.0,0.747,189.0,79.0,7.0,0.0,0.0,6-9,,1952.0,United States of America,,-1.7,,7.3,122.0,2.1,,,,0.1,0.208,0.1,0.9,0.386,0.066,0.062,0.386,227.0,12.2,willima02,0,672.0,,PF,0,3883560.0,11.3,,,0,,0,1.3,48.0,,ATL,0.1,53.0,0.245,0.0,13.0,0.923,11.8,383.0,0.523,11.4,83.0,478.0,0.464,222.0,0.725,16.9,-0.8,237,2.8,0.069,Marvin Williams,1
2,2007,,9.7,121.0,1.1,,30.0,-2.7,0,-0.7,14.2,253.0,1.4,25.0,0.441,706.0,0.427,0.37,0.484,0.485,0.433,306.0,0.375,265.0,0.815,216.0,64.0,63.0,0.0,0.0,6-9,,2179.0,United States of America,,-2.0,,4.6,84.0,0.9,,,,0.044,0.222,0.064,0.936,0.477,0.076,0.091,0.292,191.0,12.2,willima02,0,839.0,,PF,0,4174800.0,12.5,,,0,,0,1.3,52.0,,ATL,0.064,45.0,0.244,0.3,11.0,0.909,9.3,337.0,0.51,13.4,127.0,661.0,0.446,295.0,0.756,20.0,-0.4,237,2.3,0.051,Marvin Williams,2
3,2008,,8.3,137.0,0.9,,33.0,-1.6,0,-0.6,14.2,338.0,2.1,72.0,0.462,918.0,0.454,0.288,0.393,0.586,0.462,424.0,0.446,409.0,0.822,336.0,80.0,80.0,1.0,0.0,6-9,,2765.0,United States of America,,-1.0,,5.0,119.0,3.6,,,,0.087,0.5,0.011,0.989,0.475,0.113,0.117,0.284,225.0,14.5,willima02,0,1185.0,,PF,0,4466040.0,11.6,,,0,,0,1.5,81.0,,ATL,0.011,10.0,0.1,0.0,1.0,1.0,9.6,457.0,0.54,10.4,127.0,908.0,0.466,423.0,0.754,20.1,0.3,237,5.7,0.099,Marvin Williams,3
4,2009,,6.3,81.0,1.4,,39.0,0.9,0,0.1,15.2,272.0,2.2,33.0,0.502,622.0,0.443,0.311,0.504,0.58,0.458,285.0,0.439,273.0,0.806,220.0,61.0,59.0,1.0,0.0,6-9,,2093.0,United States of America,,0.8,,6.2,110.0,3.9,,,,0.058,0.49,0.249,0.751,0.185,0.098,0.188,0.28,127.0,16.0,willima02,0,845.0,,PF,0,5636143.0,12.4,,,0,,0,1.5,57.0,,ATL,0.249,155.0,0.355,0.368,55.0,0.964,10.7,382.0,0.569,8.6,70.0,467.0,0.493,230.0,0.613,18.2,1.5,237,6.1,0.14,Marvin Williams,4


Calculate player heights in inches from the initial entries which are listed in the feet and inches convention. 

In [10]:
player_heights = []
for height in player_stats.height:
    split = height.split('-')
    feet = split[0]
    inches = split[1]
    new_height = int(feet) * 12 + int(inches)
    player_heights.append(new_height)

In [11]:
player_stats.height = player_heights

Convert percentages from a 0 to 100 range to a decimal format (0 to 1)

In [12]:
player_stats.assist_percentage = player_stats.assist_percentage / 100
player_stats.block_percentage = player_stats.block_percentage / 100
player_stats.center_percentage = player_stats.center_percentage / 100
player_stats.defensive_rebound_percentage = player_stats.defensive_rebound_percentage / 100
player_stats.offensive_rebound_percentage = player_stats.offensive_rebound_percentage / 100
player_stats.point_guard_percentage = player_stats.point_guard_percentage / 100
player_stats.power_forward_percentage = player_stats.power_forward_percentage / 100
player_stats.shooting_guard_percentage = player_stats.shooting_guard_percentage / 100
player_stats.small_forward_percentage = player_stats.small_forward_percentage / 100
player_stats.steal_percentage = player_stats.steal_percentage / 100
player_stats.total_rebound_percentage = player_stats.total_rebound_percentage / 100
player_stats.turnover_percentage = player_stats.turnover_percentage / 100
player_stats.usage_percentage = player_stats.usage_percentage / 100

In [13]:
player_stats.head()

Unnamed: 0,season,and_ones,assist_percentage,assists,block_percentage,blocking_fouls,blocks,box_plus_minus,center_percentage,defensive_box_plus_minus,defensive_rebound_percentage,defensive_rebounds,defensive_win_shares,dunks,effective_field_goal_percentage,field_goal_attempts,field_goal_perc_sixteen_foot_plus_two_pointers,field_goal_perc_ten_to_sixteen_feet,field_goal_perc_three_to_ten_feet,field_goal_perc_zero_to_three_feet,field_goal_percentage,field_goals,free_throw_attempt_rate,free_throw_attempts,free_throw_percentage,free_throws,games_played,games_started,half_court_heaves,half_court_heaves_made,height,lost_ball_turnovers,minutes_played,nationality,net_plus_minus,offensive_box_plus_minus,offensive_fouls,offensive_rebound_percentage,offensive_rebounds,offensive_win_shares,on_court_plus_minus,other_turnovers,passing_turnovers,percentage_field_goals_as_dunks,percentage_of_three_pointers_from_corner,percentage_shots_three_pointers,percentage_shots_two_pointers,percentage_sixteen_foot_plus_two_pointers,percentage_ten_to_sixteen_footers,percentage_three_to_ten_footers,percentage_zero_to_three_footers,personal_fouls,player_efficiency_rating,player_id,point_guard_percentage,points,points_generated_by_assists,position,power_forward_percentage,salary,shooting_distance,shooting_fouls,shooting_fouls_drawn,shooting_guard_percentage,shots_blocked,small_forward_percentage,steal_percentage,steals,take_fouls,team_abbreviation,three_point_attempt_rate,three_point_attempts,three_point_percentage,three_point_shot_percentage_from_corner,three_pointers,three_pointers_assisted_percentage,total_rebound_percentage,total_rebounds,true_shooting_percentage,turnover_percentage,turnovers,two_point_attempts,two_point_percentage,two_pointers,two_pointers_assisted_percentage,usage_percentage,value_over_replacement_player,weight,win_shares,win_shares_per_48_minutes,player_name,player_season_number
0,1976,,0.104,4.0,0.0,,0.0,-12.2,0.0,-6.2,0.114,7.0,0.0,,,46.0,,,,,0.37,17.0,0.065,3.0,1.0,3.0,13.0,,,,74,,64.0,United States of America,,-6.0,,0.016,1.0,-0.2,,,,,,,,,,,,18.0,7.3,mayfike01,0.0,37.0,,SG,0.0,,,,,0.0,,0.0,0.0,0.0,,NYK,,,,,,,0.064,8.0,0.391,,,,,,,,-0.2,185,-0.2,-0.134,Ken Mayfield,1
1,2006,,0.052,63.0,0.009,,24.0,-3.7,0.0,-1.9,0.165,261.0,0.7,47.0,0.455,531.0,0.395,0.314,0.152,0.61,0.443,235.0,0.476,253.0,0.747,189.0,79.0,7.0,0.0,0.0,81,,1952.0,United States of America,,-1.7,,0.073,122.0,2.1,,,,0.1,0.208,0.1,0.9,0.386,0.066,0.062,0.386,227.0,12.2,willima02,0.0,672.0,,PF,0.0,3883560.0,11.3,,,0.0,,0.0,0.013,48.0,,ATL,0.1,53.0,0.245,0.0,13.0,0.923,0.118,383.0,0.523,0.114,83.0,478.0,0.464,222.0,0.725,0.169,-0.8,237,2.8,0.069,Marvin Williams,1
2,2007,,0.097,121.0,0.011,,30.0,-2.7,0.0,-0.7,0.142,253.0,1.4,25.0,0.441,706.0,0.427,0.37,0.484,0.485,0.433,306.0,0.375,265.0,0.815,216.0,64.0,63.0,0.0,0.0,81,,2179.0,United States of America,,-2.0,,0.046,84.0,0.9,,,,0.044,0.222,0.064,0.936,0.477,0.076,0.091,0.292,191.0,12.2,willima02,0.0,839.0,,PF,0.0,4174800.0,12.5,,,0.0,,0.0,0.013,52.0,,ATL,0.064,45.0,0.244,0.3,11.0,0.909,0.093,337.0,0.51,0.134,127.0,661.0,0.446,295.0,0.756,0.2,-0.4,237,2.3,0.051,Marvin Williams,2
3,2008,,0.083,137.0,0.009,,33.0,-1.6,0.0,-0.6,0.142,338.0,2.1,72.0,0.462,918.0,0.454,0.288,0.393,0.586,0.462,424.0,0.446,409.0,0.822,336.0,80.0,80.0,1.0,0.0,81,,2765.0,United States of America,,-1.0,,0.05,119.0,3.6,,,,0.087,0.5,0.011,0.989,0.475,0.113,0.117,0.284,225.0,14.5,willima02,0.0,1185.0,,PF,0.0,4466040.0,11.6,,,0.0,,0.0,0.015,81.0,,ATL,0.011,10.0,0.1,0.0,1.0,1.0,0.096,457.0,0.54,0.104,127.0,908.0,0.466,423.0,0.754,0.201,0.3,237,5.7,0.099,Marvin Williams,3
4,2009,,0.063,81.0,0.014,,39.0,0.9,0.0,0.1,0.152,272.0,2.2,33.0,0.502,622.0,0.443,0.311,0.504,0.58,0.458,285.0,0.439,273.0,0.806,220.0,61.0,59.0,1.0,0.0,81,,2093.0,United States of America,,0.8,,0.062,110.0,3.9,,,,0.058,0.49,0.249,0.751,0.185,0.098,0.188,0.28,127.0,16.0,willima02,0.0,845.0,,PF,0.0,5636143.0,12.4,,,0.0,,0.0,0.015,57.0,,ATL,0.249,155.0,0.355,0.368,55.0,0.964,0.107,382.0,0.569,0.086,70.0,467.0,0.493,230.0,0.613,0.182,1.5,237,6.1,0.14,Marvin Williams,4


Create player id and birth date lists from Player objects. Capture missing data.

In [14]:
p_ids = []
birth_dates = []
missing_p_ids = []
for player in player_stats.player_id.unique():
    try:
        p = Player(player)
#         print(p.name)
        birth_dates.append(p.birth_date)
        p_ids.append(player)
    except:
        print('Missing data for {}'.format(player))
        missing_p_ids.append(player)
        pass

In [15]:
missing_p_ids

[]

Remove the following player IDs from the list since they do not have birth dates.

In [16]:
# p_ids.remove('oneilmi01')
# p_ids.remove('mcbrike01')
# p_ids.remove('reddofr01')
# p_ids.remove('schafbo01')
# p_ids.remove('bielkdo01')

Create player birthday dataframe

In [17]:
player_bdays = pd.DataFrame({'player_id':p_ids,
                             'birth_dates':birth_dates})

Merge player birthdays into main dataframe

In [18]:
player_stats = player_stats.merge(player_bdays,on='player_id',how='left')

Rename birth date for more clarity

In [19]:
player_stats.rename(columns={'birth_dates':'birth_date'},inplace=True)

Calculate current ages from birth date and current season date

In [20]:
ages = []
for season, bday in zip(player_stats.season,player_stats.birth_date):
    age = (pd.to_datetime('1/1/'+str(season)) - bday) / np.timedelta64(1,'Y')
    ages.append(age)

Add ages list to the dataframe as the column `age`

In [21]:
player_stats['age'] = ages

View our newly edited dataframe

In [22]:
player_stats.head()

Unnamed: 0,season,and_ones,assist_percentage,assists,block_percentage,blocking_fouls,blocks,box_plus_minus,center_percentage,defensive_box_plus_minus,defensive_rebound_percentage,defensive_rebounds,defensive_win_shares,dunks,effective_field_goal_percentage,field_goal_attempts,field_goal_perc_sixteen_foot_plus_two_pointers,field_goal_perc_ten_to_sixteen_feet,field_goal_perc_three_to_ten_feet,field_goal_perc_zero_to_three_feet,field_goal_percentage,field_goals,free_throw_attempt_rate,free_throw_attempts,free_throw_percentage,free_throws,games_played,games_started,half_court_heaves,half_court_heaves_made,height,lost_ball_turnovers,minutes_played,nationality,net_plus_minus,offensive_box_plus_minus,offensive_fouls,offensive_rebound_percentage,offensive_rebounds,offensive_win_shares,on_court_plus_minus,other_turnovers,passing_turnovers,percentage_field_goals_as_dunks,percentage_of_three_pointers_from_corner,percentage_shots_three_pointers,percentage_shots_two_pointers,percentage_sixteen_foot_plus_two_pointers,percentage_ten_to_sixteen_footers,percentage_three_to_ten_footers,percentage_zero_to_three_footers,personal_fouls,player_efficiency_rating,player_id,point_guard_percentage,points,points_generated_by_assists,position,power_forward_percentage,salary,shooting_distance,shooting_fouls,shooting_fouls_drawn,shooting_guard_percentage,shots_blocked,small_forward_percentage,steal_percentage,steals,take_fouls,team_abbreviation,three_point_attempt_rate,three_point_attempts,three_point_percentage,three_point_shot_percentage_from_corner,three_pointers,three_pointers_assisted_percentage,total_rebound_percentage,total_rebounds,true_shooting_percentage,turnover_percentage,turnovers,two_point_attempts,two_point_percentage,two_pointers,two_pointers_assisted_percentage,usage_percentage,value_over_replacement_player,weight,win_shares,win_shares_per_48_minutes,player_name,player_season_number,birth_date,age
0,1976,,0.104,4.0,0.0,,0.0,-12.2,0.0,-6.2,0.114,7.0,0.0,,,46.0,,,,,0.37,17.0,0.065,3.0,1.0,3.0,13.0,,,,74,,64.0,United States of America,,-6.0,,0.016,1.0,-0.2,,,,,,,,,,,,18.0,7.3,mayfike01,0.0,37.0,,SG,0.0,,,,,0.0,,0.0,0.0,0.0,,NYK,,,,,,,0.064,8.0,0.391,,,,,,,,-0.2,185,-0.2,-0.134,Ken Mayfield,1,1948-05-11,27.641909
1,2006,,0.052,63.0,0.009,,24.0,-3.7,0.0,-1.9,0.165,261.0,0.7,47.0,0.455,531.0,0.395,0.314,0.152,0.61,0.443,235.0,0.476,253.0,0.747,189.0,79.0,7.0,0.0,0.0,81,,1952.0,United States of America,,-1.7,,0.073,122.0,2.1,,,,0.1,0.208,0.1,0.9,0.386,0.066,0.062,0.386,227.0,12.2,willima02,0.0,672.0,,PF,0.0,3883560.0,11.3,,,0.0,,0.0,0.013,48.0,,ATL,0.1,53.0,0.245,0.0,13.0,0.923,0.118,383.0,0.523,0.114,83.0,478.0,0.464,222.0,0.725,0.169,-0.8,237,2.8,0.069,Marvin Williams,1,1986-06-19,19.537704
2,2007,,0.097,121.0,0.011,,30.0,-2.7,0.0,-0.7,0.142,253.0,1.4,25.0,0.441,706.0,0.427,0.37,0.484,0.485,0.433,306.0,0.375,265.0,0.815,216.0,64.0,63.0,0.0,0.0,81,,2179.0,United States of America,,-2.0,,0.046,84.0,0.9,,,,0.044,0.222,0.064,0.936,0.477,0.076,0.091,0.292,191.0,12.2,willima02,0.0,839.0,,PF,0.0,4174800.0,12.5,,,0.0,,0.0,0.013,52.0,,ATL,0.064,45.0,0.244,0.3,11.0,0.909,0.093,337.0,0.51,0.134,127.0,661.0,0.446,295.0,0.756,0.2,-0.4,237,2.3,0.051,Marvin Williams,2,1986-06-19,20.53704
3,2008,,0.083,137.0,0.009,,33.0,-1.6,0.0,-0.6,0.142,338.0,2.1,72.0,0.462,918.0,0.454,0.288,0.393,0.586,0.462,424.0,0.446,409.0,0.822,336.0,80.0,80.0,1.0,0.0,81,,2765.0,United States of America,,-1.0,,0.05,119.0,3.6,,,,0.087,0.5,0.011,0.989,0.475,0.113,0.117,0.284,225.0,14.5,willima02,0.0,1185.0,,PF,0.0,4466040.0,11.6,,,0.0,,0.0,0.015,81.0,,ATL,0.011,10.0,0.1,0.0,1.0,1.0,0.096,457.0,0.54,0.104,127.0,908.0,0.466,423.0,0.754,0.201,0.3,237,5.7,0.099,Marvin Williams,3,1986-06-19,21.536377
4,2009,,0.063,81.0,0.014,,39.0,0.9,0.0,0.1,0.152,272.0,2.2,33.0,0.502,622.0,0.443,0.311,0.504,0.58,0.458,285.0,0.439,273.0,0.806,220.0,61.0,59.0,1.0,0.0,81,,2093.0,United States of America,,0.8,,0.062,110.0,3.9,,,,0.058,0.49,0.249,0.751,0.185,0.098,0.188,0.28,127.0,16.0,willima02,0.0,845.0,,PF,0.0,5636143.0,12.4,,,0.0,,0.0,0.015,57.0,,ATL,0.249,155.0,0.355,0.368,55.0,0.964,0.107,382.0,0.569,0.086,70.0,467.0,0.493,230.0,0.613,0.182,1.5,237,6.1,0.14,Marvin Williams,4,1986-06-19,22.53845


Write our data carpentry complete dataset back to csv 

In [23]:
player_stats.to_csv('../Data/player_career_stats.csv',index=False)

Connect to our database and load data

In [1]:
mypasswd = getpass.getpass()

# Then connects to the DB
conn = psycopg2.connect(database = 'cs20_group4', 
                              user = 'fhfrf', 
                              host = 'pgsql.dsa.lan',
                              password = mypasswd)

# Then remove the password from computer memory
del mypasswd

NameError: name 'getpass' is not defined

In [None]:
cursor = conn.cursor()

Create a table called `nba_player_season_totals` which will contain our data

In [None]:
create_table = """
DROP TABLE IF EXISTS nba_player_season_totals;
CREATE TABLE nba_player_season_totals (
season numeric
, and_ones numeric
, assist_percentage numeric
, assists numeric
, block_percentage numeric
, blocking_fouls numeric
, blocks numeric
, box_plus_minus numeric
, center_percentage numeric
, defensive_box_plus_minus numeric
, defensive_rebound_percentage numeric
, defensive_rebounds numeric
, defensive_win_shares numeric
, dunks numeric
, effective_field_goal_percentage numeric
, field_goal_attempts numeric
, field_goal_perc_sixteen_foot_plus_two_pointers numeric
, field_goal_perc_ten_to_sixteen_feet numeric
, field_goal_perc_three_to_ten_feet numeric
, field_goal_perc_zero_to_three_feet numeric
, field_goal_percentage numeric
, field_goals numeric
, free_throw_attempt_rate numeric
, free_throw_attempts numeric
, free_throw_percentage numeric
, free_throws numeric
, games_played numeric
, games_started numeric
, half_court_heaves numeric
, half_court_heaves_made numeric
, height numeric
, lost_ball_turnovers numeric
, minutes_played numeric
, nationality varchar(100)
, net_plus_minus numeric
, offensive_box_plus_minus numeric
, offensive_fouls numeric
, offensive_rebound_percentage numeric
, offensive_rebounds numeric
, offensive_win_shares numeric
, on_court_plus_minus numeric
, other_turnovers numeric
, passing_turnovers numeric
, percentage_field_goals_as_dunks numeric
, percentage_of_three_pointers_from_corner numeric
, percentage_shots_three_pointers numeric
, percentage_shots_two_pointers numeric
, percentage_sixteen_foot_plus_two_pointers numeric
, percentage_ten_to_sixteen_footers numeric
, percentage_three_to_ten_footers numeric
, percentage_zero_to_three_footers numeric
, personal_fouls numeric
, player_efficiency_rating numeric
, player_id varchar(25)
, point_guard_percentage numeric
, points numeric
, points_generated_by_assists numeric
, position varchar(5)
, power_forward_percentage numeric
, salary numeric
, shooting_distance numeric
, shooting_fouls numeric
, shooting_fouls_drawn numeric
, shooting_guard_percentage numeric
, shots_blocked numeric
, small_forward_percentage numeric
, steal_percentage numeric
, steals numeric
, take_fouls numeric
, team_abbreviation varchar(10)
, three_point_attempt_rate numeric
, three_point_attempts numeric
, three_point_percentage numeric
, three_point_shot_percentage_from_corner numeric
, three_pointers numeric
, three_pointers_assisted_percentage numeric
, total_rebound_percentage numeric
, total_rebounds numeric
, true_shooting_percentage numeric
, turnover_percentage numeric
, turnovers numeric
, two_point_attempts numeric
, two_point_percentage numeric
, two_pointers numeric
, two_pointers_assisted_percentage numeric
, usage_percentage numeric
, value_over_replacement_player numeric
, weight numeric
, win_shares numeric
, win_shares_per_48_minutes numeric
, player_name varchar(250)
, player_season_number numeric
, birth_date TIMESTAMP
, age numeric
)
"""

In [None]:
cursor.execute(create_table)

In [None]:
conn.commit()

In [None]:
mypasswd = getpass.getpass()

# Then connects to the DB
conn = psycopg2.connect(database = 'cs20_group4', 
                              user = 'fhfrf', 
                              host = 'pgsql.dsa.lan',
                              password = mypasswd)

# Then remove the password from computer memory
del mypasswd

In [None]:
cursor = conn.cursor()

Insert data into our newly created table `nba_player_season_totals`

In [None]:
stats = player_stats.loc[player_stats.birth_date.notnull()]
stats = stats.where(pd.notnull(stats),None)

INSERT_SQL = 'INSERT INTO nba_player_season_totals'
INSERT_SQL += '(season'
INSERT_SQL += ', and_ones'
INSERT_SQL += ', assist_percentage'
INSERT_SQL += ', assists'
INSERT_SQL += ', block_percentage'
INSERT_SQL += ', blocking_fouls'
INSERT_SQL += ', blocks'
INSERT_SQL += ', box_plus_minus'
INSERT_SQL += ', center_percentage'
INSERT_SQL += ', defensive_box_plus_minus'
INSERT_SQL += ', defensive_rebound_percentage'
INSERT_SQL += ', defensive_rebounds'
INSERT_SQL += ', defensive_win_shares'
INSERT_SQL += ', dunks'
INSERT_SQL += ', effective_field_goal_percentage'
INSERT_SQL += ', field_goal_attempts'
INSERT_SQL += ', field_goal_perc_sixteen_foot_plus_two_pointers'
INSERT_SQL += ', field_goal_perc_ten_to_sixteen_feet'
INSERT_SQL += ', field_goal_perc_three_to_ten_feet'
INSERT_SQL += ', field_goal_perc_zero_to_three_feet'
INSERT_SQL += ', field_goal_percentage'
INSERT_SQL += ', field_goals'
INSERT_SQL += ', free_throw_attempt_rate'
INSERT_SQL += ', free_throw_attempts'
INSERT_SQL += ', free_throw_percentage'
INSERT_SQL += ', free_throws'
INSERT_SQL += ', games_played'
INSERT_SQL += ', games_started'
INSERT_SQL += ', half_court_heaves'
INSERT_SQL += ', half_court_heaves_made'
INSERT_SQL += ', height'
INSERT_SQL += ', lost_ball_turnovers'
INSERT_SQL += ', minutes_played'
INSERT_SQL += ', nationality'
INSERT_SQL += ', net_plus_minus'
INSERT_SQL += ', offensive_box_plus_minus'
INSERT_SQL += ', offensive_fouls'
INSERT_SQL += ', offensive_rebound_percentage'
INSERT_SQL += ', offensive_rebounds'
INSERT_SQL += ', offensive_win_shares'
INSERT_SQL += ', on_court_plus_minus'
INSERT_SQL += ', other_turnovers'
INSERT_SQL += ', passing_turnovers'
INSERT_SQL += ', percentage_field_goals_as_dunks'
INSERT_SQL += ', percentage_of_three_pointers_from_corner'
INSERT_SQL += ', percentage_shots_three_pointers'
INSERT_SQL += ', percentage_shots_two_pointers'
INSERT_SQL += ', percentage_sixteen_foot_plus_two_pointers'
INSERT_SQL += ', percentage_ten_to_sixteen_footers'
INSERT_SQL += ', percentage_three_to_ten_footers'
INSERT_SQL += ', percentage_zero_to_three_footers'
INSERT_SQL += ', personal_fouls'
INSERT_SQL += ', player_efficiency_rating'
INSERT_SQL += ', player_id'
INSERT_SQL += ', point_guard_percentage'
INSERT_SQL += ', points'
INSERT_SQL += ', points_generated_by_assists'
INSERT_SQL += ', position'
INSERT_SQL += ', power_forward_percentage'
INSERT_SQL += ', salary'
INSERT_SQL += ', shooting_distance'
INSERT_SQL += ', shooting_fouls'
INSERT_SQL += ', shooting_fouls_drawn'
INSERT_SQL += ', shooting_guard_percentage'
INSERT_SQL += ', shots_blocked'
INSERT_SQL += ', small_forward_percentage'
INSERT_SQL += ', steal_percentage'
INSERT_SQL += ', steals'
INSERT_SQL += ', take_fouls'
INSERT_SQL += ', team_abbreviation'
INSERT_SQL += ', three_point_attempt_rate'
INSERT_SQL += ', three_point_attempts'
INSERT_SQL += ', three_point_percentage'
INSERT_SQL += ', three_point_shot_percentage_from_corner'
INSERT_SQL += ', three_pointers'
INSERT_SQL += ', three_pointers_assisted_percentage'
INSERT_SQL += ', total_rebound_percentage'
INSERT_SQL += ', total_rebounds'
INSERT_SQL += ', true_shooting_percentage'
INSERT_SQL += ', turnover_percentage'
INSERT_SQL += ', turnovers'
INSERT_SQL += ', two_point_attempts'
INSERT_SQL += ', two_point_percentage'
INSERT_SQL += ', two_pointers'
INSERT_SQL += ', two_pointers_assisted_percentage'
INSERT_SQL += ', usage_percentage'
INSERT_SQL += ', value_over_replacement_player'
INSERT_SQL += ', weight'
INSERT_SQL += ', win_shares'
INSERT_SQL += ', win_shares_per_48_minutes'
INSERT_SQL += ', player_name'
INSERT_SQL += ', player_season_number'
INSERT_SQL += ', birth_date'
INSERT_SQL += ', age) VALUES'
INSERT_SQL += '(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,'
INSERT_SQL += '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,'
INSERT_SQL += '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,'
INSERT_SQL += '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,'
INSERT_SQL += '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,'
INSERT_SQL += '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,'
INSERT_SQL += '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,'
INSERT_SQL += '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,'
INSERT_SQL += '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,'
INSERT_SQL += '%s,%s,%s,%s)'

with conn, conn.cursor() as cursor:
    for row in stats.itertuples(index=False, name=None):
        cursor.execute(INSERT_SQL,row)

In [None]:
player_stats.head()

Create a player stats per minute dataset by first copying our player stats dataframe

In [None]:
player_stats_per_minute = player_stats.copy()

In [None]:
player_stats_per_minute.head()

Reorder our columns for easier usage and analysis

In [None]:
player_stats_per_minute = player_stats_per_minute[['player_name','player_id','position','team_abbreviation','season',
                                                   'player_season_number','and_ones','assists','blocking_fouls',
                                                  'blocks','defensive_rebounds','dunks','field_goal_attempts',
                                                  'field_goals','free_throw_attempts','free_throws',
                                                  'lost_ball_turnovers','minutes_played','offensive_fouls',
                                                  'offensive_rebounds','other_turnovers','passing_turnovers',
                                                  'personal_fouls','points','points_generated_by_assists',
                                                  'salary','shooting_fouls','shooting_fouls_drawn','shots_blocked',
                                                  'steals','take_fouls','three_point_attempts','three_pointers',
                                                  'total_rebounds','turnovers','two_point_attempts',
                                                  'two_pointers','win_shares']]

In [None]:
player_stats_per_minute.head()

Rename columns to a per minute basis

In [None]:
player_stats_per_minute.rename(columns={'and_ones':'and_ones_per_minute',
                                       'assists':'assists_per_minute',
                                       'blocking_fouls':'blocking_fouls_per_minute',
                                       'blocks':'blocks_per_minute',
                                       'defensive_rebounds':'defensive_rebounds_per_minute',
                                       'dunks':'dunks_per_minute',
                                       'field_goal_attempts':'field_goal_attempts_per_minute',
                                       'field_goals':'field_goals_made_per_minute',
                                       'free_throw_attempts':'free_throw_attempts_per_minute',
                                       'free_throws':'free_throws_made_per_minute',
                                       'lost_ball_turnovers':'lost_ball_turnovers_per_minute',
                                       'offensive_fouls':'offensive_fouls_per_minute',
                                       'offensive_rebounds':'offensive_rebounds_per_minute',
                                       'other_turnovers':'other_turnovers_per_minute',
                                       'passing_turnovers':'passing_turnovers_per_minute',
                                       'personal_fouls':'personal_fouls_per_minute',
                                       'points':'points_per_minute',
                                       'points_generated_by_assists':'points_generated_by_assists_per_minute',
                                       'shooting_fouls':'shooting_fouls_committed_per_minute',
                                       'shooting_fouls_drawn':'shooting_fouls_drawn_per_minute',
                                       'shots_blocked':'shot_attempts_blocked_per_minute',
                                       'steals':'steals_per_minute',
                                       'take_fouls':'take_fouls_per_minute',
                                       'three_point_attempts':'three_point_attempts_per_minute',
                                       'three_pointers':'three_pointers_made_per_minute',
                                       'total_rebounds':'total_rebounds_per_minute',
                                       'turnovers':'turnovers_per_minute',
                                       'two_point_attempts':'two_point_attempts_per_minute',
                                       'two_pointers':'two_pointers_made_per_minute',
                                       'win_shares':'win_shares_per_minute'},inplace=True)

In [None]:
player_stats_per_minute.head()

View a sample row to verify data validity

In [None]:
player_stats_per_minute.loc[(player_stats_per_minute.player_name=='Damion James') & (player_stats_per_minute.season==2013),'minutes_played'] = 1

Calculate per-minute stats by dividing stat by number of minutes played by a player and append to per-minute dataframe

In [2]:
player_stats_per_minute.and_ones_per_minute = player_stats_per_minute.and_ones_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.assists_per_minute = player_stats_per_minute.assists_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.blocking_fouls_per_minute = player_stats_per_minute.blocking_fouls_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.blocks_per_minute = player_stats_per_minute.blocks_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.defensive_rebounds_per_minute = player_stats_per_minute.defensive_rebounds_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.dunks_per_minute = player_stats_per_minute.dunks_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.field_goal_attempts_per_minute = player_stats_per_minute.field_goal_attempts_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.field_goals_made_per_minute = player_stats_per_minute.field_goals_made_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.free_throw_attempts_per_minute = player_stats_per_minute.free_throw_attempts_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.free_throws_made_per_minute = player_stats_per_minute.free_throws_made_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.lost_ball_turnovers_per_minute = player_stats_per_minute.lost_ball_turnovers_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.offensive_fouls_per_minute = player_stats_per_minute.offensive_fouls_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.offensive_rebounds_per_minute = player_stats_per_minute.offensive_rebounds_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.other_turnovers_per_minute = player_stats_per_minute.other_turnovers_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.passing_turnovers_per_minute = player_stats_per_minute.passing_turnovers_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.personal_fouls_per_minute = player_stats_per_minute.personal_fouls_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.points_per_minute = player_stats_per_minute.points_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.points_generated_by_assists_per_minute = player_stats_per_minute.points_generated_by_assists_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.shooting_fouls_committed_per_minute = player_stats_per_minute.shooting_fouls_committed_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.shooting_fouls_drawn_per_minute = player_stats_per_minute.shooting_fouls_drawn_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.shot_attempts_blocked_per_minute = player_stats_per_minute.shot_attempts_blocked_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.steals_per_minute = player_stats_per_minute.steals_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.take_fouls_per_minute = player_stats_per_minute.take_fouls_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.three_point_attempts_per_minute = player_stats_per_minute.three_point_attempts_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.three_pointers_made_per_minute = player_stats_per_minute.three_pointers_made_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.total_rebounds_per_minute = player_stats_per_minute.total_rebounds_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.turnovers_per_minute = player_stats_per_minute.turnovers_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.two_point_attempts_per_minute = player_stats_per_minute.two_point_attempts_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.two_pointers_made_per_minute = player_stats_per_minute.two_pointers_made_per_minute / player_stats_per_minute.minutes_played
player_stats_per_minute.win_shares_per_minute = player_stats_per_minute.win_shares_per_minute / player_stats_per_minute.minutes_played


NameError: name 'player_stats_per_minute' is not defined

View our results in the `player_stats_per_minute` dataframe

In [None]:
player_stats_per_minute.head()

Create a table called `nba_player_per_minute_stats_season`

In [None]:
mypasswd = getpass.getpass()

# Then connects to the DB
conn = psycopg2.connect(database = 'cs20_group4', 
                              user = 'fhfrf', 
                              host = 'pgsql.dsa.lan',
                              password = mypasswd)

# Then remove the password from computer memory
del mypasswd

In [None]:
cursor = conn.cursor()

In [None]:
create_table = """
DROP TABLE IF EXISTS nba_player_per_minute_stats_season;
CREATE TABLE nba_player_per_minute_stats_season (
player_name varchar(250)
, player_id varchar(50)
, position varchar(5)
, team_abbreviation varchar(10)
, season numeric
, player_season_number numeric
, and_ones_per_minute numeric
, assists_per_minute numeric
, blocking_fouls_per_minute numeric
, blocks_per_minute numeric
, defensive_rebounds_per_minute numeric
, dunks_per_minute numeric
, field_goal_attempts_per_minute numeric
, field_goals_made_per_minute numeric
, free_throw_attempts_per_minute numeric
, free_throws_made_per_minute numeric
, lost_ball_turnovers_per_minute numeric
, minutes_played numeric
, offensive_fouls_per_minute numeric
, offensive_rebounds_per_minute numeric
, other_turnovers_per_minute numeric
, passing_turnovers_per_minute numeric
, personal_fouls_per_minute numeric
, points_per_minute numeric
, points_generated_by_assists_per_minute numeric
, salary numeric
, shooting_fouls_committed_per_minute numeric
, shooting_fouls_drawn_per_minute numeric
, shot_attempts_blocked_per_minute numeric
, steals_per_minute numeric
, take_fouls_per_minute numeric
, three_point_attempts_per_minute numeric
, three_pointers_made_per_minute numeric
, total_rebounds_per_minute numeric
, turnovers_per_minute numeric
, two_point_attempts_per_minute numeric
, two_pointers_made_per_minute numeric
, win_shares_per_minute numeric)
"""

In [None]:
cursor.execute(create_table)

In [None]:
conn.commit()

Insert `player_stats` dataframe into the newly created table

In [None]:
mypasswd = getpass.getpass()

# Then connects to the DB
conn = psycopg2.connect(database = 'cs20_group4', 
                              user = 'fhfrf', 
                              host = 'pgsql.dsa.lan',
                              password = mypasswd)

# Then remove the password from computer memory
del mypasswd

In [None]:
cursor = conn.cursor()

In [None]:
pstats = player_stats_per_minute.where(pd.notnull(player_stats_per_minute),None)

INSERT_SQL = 'INSERT INTO nba_player_per_minute_stats_season'
INSERT_SQL += '(player_name'
INSERT_SQL += ', player_id'
INSERT_SQL += ', position'
INSERT_SQL += ', team_abbreviation'
INSERT_SQL += ', season'
INSERT_SQL += ', player_season_number'
INSERT_SQL += ', and_ones_per_minute'
INSERT_SQL += ', assists_per_minute'
INSERT_SQL += ', blocking_fouls_per_minute'
INSERT_SQL += ', blocks_per_minute'
INSERT_SQL += ', defensive_rebounds_per_minute'
INSERT_SQL += ', dunks_per_minute'
INSERT_SQL += ', field_goal_attempts_per_minute'
INSERT_SQL += ', field_goals_made_per_minute'
INSERT_SQL += ', free_throw_attempts_per_minute'
INSERT_SQL += ', free_throws_made_per_minute'
INSERT_SQL += ', lost_ball_turnovers_per_minute'
INSERT_SQL += ', minutes_played'
INSERT_SQL += ', offensive_fouls_per_minute'
INSERT_SQL += ', offensive_rebounds_per_minute'
INSERT_SQL += ', other_turnovers_per_minute'
INSERT_SQL += ', passing_turnovers_per_minute'
INSERT_SQL += ', personal_fouls_per_minute'
INSERT_SQL += ', points_per_minute'
INSERT_SQL += ', points_generated_by_assists_per_minute'
INSERT_SQL += ', salary'
INSERT_SQL += ', shooting_fouls_committed_per_minute'
INSERT_SQL += ', shooting_fouls_drawn_per_minute'
INSERT_SQL += ', shot_attempts_blocked_per_minute'
INSERT_SQL += ', steals_per_minute'
INSERT_SQL += ', take_fouls_per_minute'
INSERT_SQL += ', three_point_attempts_per_minute'
INSERT_SQL += ', three_pointers_made_per_minute'
INSERT_SQL += ', total_rebounds_per_minute'
INSERT_SQL += ', turnovers_per_minute'
INSERT_SQL += ', two_point_attempts_per_minute'
INSERT_SQL += ', two_pointers_made_per_minute'
INSERT_SQL += ', win_shares_per_minute) VALUES'
INSERT_SQL += '(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,'
INSERT_SQL += '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,'
INSERT_SQL += '%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,'
INSERT_SQL += '%s,%s,%s,%s,%s,%s,%s,%s)'

with conn, conn.cursor() as cursor:
    for row in pstats.itertuples(index=False, name=None):
        cursor.execute(INSERT_SQL,row)