### Project ETL - NBA Data

In [67]:
### Libraries:
import datetime
import pandas as pd
from sqlalchemy import create_engine
import pymysql
pymysql.install_as_MySQLdb()
from zodiac_sign import get_zodiac_sign
from scipy import stats
from statsmodels.stats.proportion import proportions_ztest
import numpy as np

### Extract CSVs into DataFrames

### Player Data File

In [38]:
NBA_player_file = "Resources/player_data.csv"
NBA_player_data_df = pd.read_csv(NBA_player_file)
NBA_player_data_df.head()

Unnamed: 0,name,year_start,year_end,position,height,weight,birth_date,college
0,Alaa Abdelnaby,1991,1995,F-C,6-10,240.0,"June 24, 1968",Duke University
1,Zaid Abdul-Aziz,1969,1978,C-F,6-9,235.0,"April 7, 1946",Iowa State University
2,Kareem Abdul-Jabbar,1970,1989,C,7-2,225.0,"April 16, 1947","University of California, Los Angeles"
3,Mahmoud Abdul-Rauf,1991,2001,G,6-1,162.0,"March 9, 1969",Louisiana State University
4,Tariq Abdul-Wahad,1998,2003,F,6-6,223.0,"November 3, 1974",San Jose State University


### Player Week File

In [39]:
NBA_player_week_file = "Resources/NBA_player_of_the_week.csv"
NBA_player_week_df = pd.read_csv(NBA_player_week_file)

NBA_player_week_df.head()

Unnamed: 0,Active season,Player,Team,Conference,Date,Position,Height,Weight,Age,Draft Year,Seasons in league,Season,Season short,Real_value
0,0,Micheal Ray Richardson,New Jersey Nets,,"Apr 14, 1985",PG,6-5,189,29,1978,6,1984-1985,1985,1.0
1,0,Derek Smith,Los Angeles Clippers,,"Apr 7, 1985",SG,6-6,205,23,1982,2,1984-1985,1985,1.0
2,0,Calvin Natt,Denver Nuggets,,"Apr 1, 1985",F,6-6,220,28,1979,5,1984-1985,1985,1.0
3,0,Kareem Abdul-Jabbar,Los Angeles Lakers,,"Mar 24, 1985",C,7-2,225,37,1969,15,1984-1985,1985,1.0
4,0,Larry Bird,Boston Celtics,,"Mar 17, 1985",SF,6-9,220,28,1978,5,1984-1985,1985,1.0


In [40]:
### Checking Columns NBA Data

NBA_player_data_df.columns

Index(['name', 'year_start', 'year_end', 'position', 'height', 'weight',
       'birth_date', 'college'],
      dtype='object')

In [41]:
### Checking Columns NBA Week

NBA_player_week_df.columns

Index(['Active season', 'Player', 'Team', 'Conference', 'Date', 'Position',
       'Height', 'Weight', 'Age', 'Draft Year', 'Seasons in league', 'Season',
       'Season short', 'Real_value'],
      dtype='object')

In [42]:
###  selecting columns - NBA Data

new_NBA_player_data_df = NBA_player_data_df[['name', 'birth_date', 'height']].copy()
new_NBA_player_data_df.columns = ['player_name', 'birth_date', 'height']
new_NBA_player_data_df['id'] = new_player_data_df.index
new_NBA_player_data_df['name_height']= new_NBA_player_data_df['player_name']+'_'+new_NBA_player_data_df['height']
new_NBA_player_data_df.head()

Unnamed: 0,player_name,birth_date,height,id,name_height
0,Alaa Abdelnaby,"June 24, 1968",6-10,0,Alaa Abdelnaby_6-10
1,Zaid Abdul-Aziz,"April 7, 1946",6-9,1,Zaid Abdul-Aziz_6-9
2,Kareem Abdul-Jabbar,"April 16, 1947",7-2,2,Kareem Abdul-Jabbar_7-2
3,Mahmoud Abdul-Rauf,"March 9, 1969",6-1,3,Mahmoud Abdul-Rauf_6-1
4,Tariq Abdul-Wahad,"November 3, 1974",6-6,4,Tariq Abdul-Wahad_6-6


In [44]:
###  including zodiac sign column - NBA Data

new_NBA_player_data_df['birth_date'] = pd.to_datetime(new_NBA_player_data_df['birth_date'])
new_NBA_player_data_df = new_NBA_player_data_df.dropna()
new_NBA_player_data_df['zodiac'] = new_NBA_player_data_df['birth_date'].apply(get_zodiac_sign)
new_NBA_player_data_df.head()


Unnamed: 0,player_name,birth_date,height,id,name_height,zodiac
0,Alaa Abdelnaby,1968-06-24,6-10,0,Alaa Abdelnaby_6-10,Cancer
1,Zaid Abdul-Aziz,1946-04-07,6-9,1,Zaid Abdul-Aziz_6-9,Aries
2,Kareem Abdul-Jabbar,1947-04-16,7-2,2,Kareem Abdul-Jabbar_7-2,Aries
3,Mahmoud Abdul-Rauf,1969-03-09,6-1,3,Mahmoud Abdul-Rauf_6-1,Pisces
4,Tariq Abdul-Wahad,1974-11-03,6-6,4,Tariq Abdul-Wahad_6-6,Scorpio


In [43]:
###  selecting columns - NBA Week

new_NBA_player_week_df = NBA_player_week_df[["Player", "Age", "Height", "Real_value", "Team"]].copy()

new_NBA_player_week_df.columns = ["player_name", "age", "height", "real_value", "team"]

new_NBA_player_week_df.head()

Unnamed: 0,player_name,age,height,real_value,team
0,Micheal Ray Richardson,29,6-5,1.0,New Jersey Nets
1,Derek Smith,23,6-6,1.0,Los Angeles Clippers
2,Calvin Natt,28,6-6,1.0,Denver Nuggets
3,Kareem Abdul-Jabbar,37,7-2,1.0,Los Angeles Lakers
4,Larry Bird,28,6-9,1.0,Boston Celtics


### Creating database connection

In [47]:
rds_connection_string = "root:Musica123!@127.0.0.1/nba_players"
engine = create_engine(f'mysql://{rds_connection_string}')

In [48]:
# Confirm tables
engine.table_names()

  result = self._query(query)


['player_birth_date', 'player_week']

In [50]:
### Loading DataFrames into database

new_NBA_player_data_df.to_sql(name='player_birth_date', con=engine, if_exists='append', index=False)
new_NBA_player_week_df.to_sql(name='player_week', con=engine, if_exists='replace', index=False)

In [51]:
### read sql table - player of the week

pd.read_sql_query('select * from player_week', con=engine).head()

Unnamed: 0,player_name,age,height,real_value,team
0,Micheal Ray Richardson,29,6-5,1.0,New Jersey Nets
1,Derek Smith,23,6-6,1.0,Los Angeles Clippers
2,Calvin Natt,28,6-6,1.0,Denver Nuggets
3,Kareem Abdul-Jabbar,37,7-2,1.0,Los Angeles Lakers
4,Larry Bird,28,6-9,1.0,Boston Celtics


In [52]:
### read sql table - player data

pd.read_sql_query('select * from player_birth_date', con=engine).head()

Unnamed: 0,id,player_name,birth_date,height,name_height,zodiac
0,0,Alaa Abdelnaby,1968-06-24,6-10,Alaa Abdelnaby_6-10,Cancer
1,1,Zaid Abdul-Aziz,1946-04-07,6-9,Zaid Abdul-Aziz_6-9,Aries
2,2,Kareem Abdul-Jabbar,1947-04-16,7-2,Kareem Abdul-Jabbar_7-2,Aries
3,3,Mahmoud Abdul-Rauf,1969-03-09,6-1,Mahmoud Abdul-Rauf_6-1,Pisces
4,4,Tariq Abdul-Wahad,1974-11-03,6-6,Tariq Abdul-Wahad_6-6,Scorpio


In [55]:
### read sql table - Zodiac Player of the week

pd.read_sql_query('select * from Zodiac_Player_of_the_Week', con=engine).head(12)

Unnamed: 0,zodiac,# of Player of the Week Award
0,Leo,97.5
1,Pisces,94.5
2,Aquarius,83.0
3,Taurus,79.5
4,Capricorn,78.5
5,Virgo,66.5
6,Cancer,64.0
7,Aries,56.0
8,Gemini,54.5
9,Libra,46.5


In [57]:
### transforming to df in Pandas


NBA_zodiac_player_week_df = pd.read_sql_query('select * from Zodiac_Player_of_the_Week', con=engine)
NBA_zodiac_player_week_df.head(12)

Unnamed: 0,zodiac,# of Player of the Week Award
0,Leo,97.5
1,Pisces,94.5
2,Aquarius,83.0
3,Taurus,79.5
4,Capricorn,78.5
5,Virgo,66.5
6,Cancer,64.0
7,Aries,56.0
8,Gemini,54.5
9,Libra,46.5


In [60]:
NBA_zodiac_player_week_df["Percentage"] = (NBA_zodiac_player_week_df["# of Player of the Week Award"] / 
                                        NBA_zodiac_player_week_df["# of Player of the Week Award"].sum()) * 100

NBA_zodiac_player_week_df.head(12)

Unnamed: 0,zodiac,# of Player of the Week Award,Percentage
0,Leo,97.5,12.126866
1,Pisces,94.5,11.753731
2,Aquarius,83.0,10.323383
3,Taurus,79.5,9.88806
4,Capricorn,78.5,9.763682
5,Virgo,66.5,8.271144
6,Cancer,64.0,7.960199
7,Aries,56.0,6.965174
8,Gemini,54.5,6.778607
9,Libra,46.5,5.783582


In [63]:
### Defining highest zodiac weekly awards

Leo_week_awards = NBA_zodiac_player_week_df["# of Player of the Week Award"][0]

Leo_week_awards

97.5

In [64]:
### Defining lowest zodiac weekly awards

Scorpio_week_awards = NBA_zodiac_player_week_df["# of Player of the Week Award"][11]

Scorpio_week_awards

38.0

In [61]:
### Defining total weekly awards

Total_player_weeks = NBA_zodiac_player_week_df["# of Player of the Week Award"].sum()
print(Total_player_weeks)

804.0


In [68]:
### Creating a Z-Test

count = np.array([Leo_week_awards, Scorpio_week_awards])
nobs = np.array([Total_player_weeks, Total_player_weeks])
value = .05
stat, pval = proportions_ztest(count, nobs)
print('{0:0.3f}'.format(pval))

0.000


### Conclusion

As per table above, since 1985 NBA player pertaining to Leo zodiac sign has almost 3 times more player of the week award than Scorpio NBA players.

After making a Z-Test we could infer that the null hipothesys has been rejected, thus there is a greater chance of a Leo NBA player to be a awarded player of the week in comparison to a Scorpio NBA player.