<a href="https://colab.research.google.com/github/dimitramuni/NBA-Player-Profile-analysis/blob/main/Script/NBA_Hypothesis_Test_(Chi2_test).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from datetime import datetime
import calendar as cal
import scipy.stats 


class NBA_HypothesisChi2Test:
  '''
      pandas dataframe must have following 
      features for the given team
      ['Date', 'Sep', 'Opp', 'Result', 
       'TeamScore', 'OpponentScore', 'FG', 
       'FGA', 'FG%', '3P', '3PA', '3P%', 
       'FT', 'FTA', 'FT%', 'ORB', 'TRB',
       'AST', 'STL', 'BLK', 'TOV', 'PF']
        
      ref:https://www.basketball-reference.com
               /teams/POR/1999/gamelog/
  '''

  def __init__(self,df_gamelog):
    

    #new column indicating Home or Away game 
    df_gamelog['HomeAway']=np.where\
    (df_gamelog['Sep']=='@','Away','Home')
  
    #adding a new column indicating the 
    #day of the week based on the date 
    #ref:https://stackoverflow.com/questions
    #  /9847213/how-do-i-get-the-day-of-week-given-a-date
  
    df_gamelog['Date']=pd.to_datetime(df_gamelog['Date'])

    #adding a new column weekday which 
    #converts date into day of the week

    df_gamelog['Weekday']=df_gamelog['Date']\
          .apply(lambda t:cal.day_name[t.dayofweek])

  
    #creating a new column indicating the difference
    #in score for Portland against a specific team

    df_gamelog['DiffScore']=df_gamelog['TeamScore']-\
     df_gamelog['OpponentScore']

  
    #creating a variable inside 
    #the class for further methods
  
    self.df_gamelog=df_gamelog


  def hypothesis_chisqr(self,opponent_teams=
                        ['LAL','LAC','GSW','DEN''SAS','PHO' ],
                        significance_level=0.95):
      
      
      
      '''
      selecting subset of original dataframe
      to conisder matches against opponent_teams
      '''

      #idenitifying indicies of raw where 
      #opponent team is part of subset of interest
      raw_inds= np.where(self.df_gamelog['Opp']\
                         .isin(opponent_teams))
      '''
      converting the indicies into 1D 
      numpy array for further use in the next step
      '''
      subset_inds=np.asarray(raw_inds).flatten()
      #subset of original dataframe for furhter anlaysis
      sub_df=self.df_gamelog.iloc[subset_inds]
      print('\nNo. of games on certain day',sub_df['Weekday'].value_counts())
      '''
      identifying indicies of 
      sub_df where Portland is winning
      '''
      raw_win=np.where(sub_df['Result']=='W')
      raw_loss=np.where(sub_df['Result']=='L')
      '''
      converting the indicies into 1D numpy 
      array for further use in the next step
      '''
      win_inds=np.asarray(raw_win).flatten()
       
      loss_inds=np.asarray(raw_loss).flatten()
      #wins observed on a certain day
      w_o=sub_df.iloc[win_inds]['Weekday']\
          .value_counts()
      print(w_o)
      l_o=sub_df.iloc[loss_inds]['Weekday']\
          .value_counts()
      print(l_o)
      '''
      expected winning on each day would be 
      equally divided across all days of the week
      '''
      w_e=win_inds.shape[0]/7

      day_in_week=['Monday','Tuesday',
                   'Wednesday','Thursday',
                   'Friday','Saturday','Sunday']

      ## chi_square statistics

      #storing the calculations in an empty list
      bar=[]
      for day in day_in_week:
        '''
        calculating squared difference of observed 
        winning on certain day and expected winning 
        dividing this squared difference by expected 
        winning.
        '''
        bar.append((w_o[day]-w_e)**2/w_e)
      chi_sq2_stat=sum(bar)
      print('Chi square stat',chi_sq2_stat)
      '''
      chi-square value at 0.005 significance level
      for degree of freedom = number of outcome - 1= 
      degree of freedom=7-1=6
      here outcomes are winning on monday,
      winning on tuesday, etc. n=7
      '''
      chi_val=scipy.stats.chi2.ppf\
      (significance_level,df=6)
      print('Chi square value at',
            significance_level*100,
            '%',chi_val)

      if(chi_sq2_stat>chi_val):
        print('Reject null hypothesis.')
      else:
        print('Can not reject null hypothesis.')


# Hypothesis test: significance of games against high performing teams of Western conference and day of the week on Winning ?

* $H_0:-$ Trail Blazers playing on certain day of the week  has no effect on winning the game on that day.
* $H_1:-$ There is some statistically significant advantage to play  on certain day of the week.

In [None]:

df=pd.read_csv('/content/homeaway_updated.csv')
#ignoring the matches from season 1998-99

team_list=['LAL','LAC','GSW','DEN''SAS','PHO' ]
df=df.drop([0,1])
ob=NBA_HypothesisChi2Test(df)
ob.hypothesis_chisqr(significance_level=0.995,opponent_teams=team_list)


No. of games on certain day Wednesday    17
Tuesday      15
Saturday     13
Friday       13
Sunday       11
Thursday      8
Monday        3
Name: Weekday, dtype: int64
Wednesday    13
Saturday      8
Friday        8
Tuesday       7
Thursday      6
Sunday        4
Monday        3
Name: Weekday, dtype: int64
Tuesday      8
Sunday       7
Saturday     5
Friday       5
Wednesday    4
Thursday     2
Name: Weekday, dtype: int64
Chi square stat 9.142857142857144
Chi square value at 99.5 % 18.547584178511087
Can not reject null hypothesis.
