In [None]:
import numpy as np
import pandas as pd

seasons = ['2007-2008', '2008-2009', '2009-2010', '2010-2011', '2011-2012', '2012-2013', '2013-2014', '2014-2015',
           '2015-2016', '2016-2017', '2017-2018', '2018-2019', '2019-2020', '2020-2021', '2021-2022']
game_data = pd.DataFrame()

for season in seasons:
  temp = pd.read_excel(f'/content/drive/MyDrive/Sports Data Analysis/NBA/Possession Data/Excel Files/{season}.xlsx')
  s1, s2 = season.split('-')
  temp['Season'] = np.full(len(temp), int(s2))
  temp.to_csv(f'/content/drive/MyDrive/Sports Data Analysis/NBA/Possession Data/CSV Files/{season}.csv')
  game_data = game_data.append(temp).reset_index(drop=True)

class CalcTeamRatings:
  def __init__(self, home, away, date, df):
    self.home = home
    self.date = date
    self.away = away
    self.df = df[df['Date'] < self.date].sort_values(['Date'])
    self.df = self.df.dropna()
    self.home_df = self.df[self.df['Home Team'] == self.home]
    self.home_df = self.home_df.tail(30)
    self.away_df = self.df[self.df['Away Team'] == self.away]
    self.away_df = self.away_df.tail(30)
  def rollingAvgEff(self):
    if (len(self.home_df) < 10):
      return self.home_df['Home Eff'].mean()
    if (len(self.home_df) < 20):
      return ((self.home_df['Home Eff'].tail(10).mean() + self.home_df['Home Eff'].mean()) / 2)
    old_games = self.home_df['Home Eff'].head(20).mean()
    new_games = self.home_df['Home Eff'].tail(10).mean()
    return ((old_games + new_games) / 2)
  def rollingAvgEffA(self):
    if (len(self.home_df) < 10):
      return self.home_df['Away Eff'].mean()
    if (len(self.home_df) < 20):
      return ((self.home_df['Away Eff'].tail(10).mean() + self.home_df['Away Eff'].mean()) / 2)
    old_games = self.home_df['Away Eff'].head(20).mean()
    new_games = self.home_df['Away Eff'].tail(10).mean()
    return ((old_games + new_games) / 2)
  def away_rollingAvgEff(self):
    if (len(self.away_df) < 10):
      return self.away_df['Away Eff'].mean()
    if (len(self.away_df) < 20):
      return ((self.away_df['Away Eff'].tail(10).mean() + self.away_df['Away Eff'].mean()) / 2)
    old_games = self.away_df['Away Eff'].head(20).mean()
    new_games = self.away_df['Away Eff'].tail(10).mean()
    return ((old_games + new_games) / 2)
  def away_rollingAvgEffA(self):
    if (len(self.away_df) < 10):
      return self.away_df['Home Eff'].mean()
    if (len(self.away_df) < 20):
      return ((self.away_df['Home Eff'].tail(10).mean() + self.away_df['Home Eff'].mean()) / 2)
    old_games = self.away_df['Home Eff'].head(20).mean()
    new_games = self.away_df['Home Eff'].tail(10).mean()
    return ((old_games + new_games) / 2)
  def home_pace(self):
    if (len(self.home_df) < 10):
      return self.home_df['Home Possessions'].mean()
    if (len(self.home_df) < 20):
      return ((self.home_df['Home Possessions'].tail(10).mean() + self.home_df['Home Possessions'].mean()) / 2)
    old_games = self.home_df['Home Possessions'].head(20).mean()
    new_games = self.home_df['Home Possessions'].tail(10).mean()
    return ((old_games + new_games) / 2)
  def away_pace(self):
    if (len(self.away_df) < 10):
      return self.away_df['Away Possessions'].mean()
    if (len(self.away_df) < 20):
      return ((self.away_df['Away Possessions'].tail(10).mean() + self.away_df['Away Possessions'].mean()) / 2)
    old_games = self.away_df['Away Possessions'].head(20).mean()
    new_games = self.away_df['Away Possessions'].tail(10).mean()
    return ((old_games + new_games) / 2)

game_data['Home Eff'] = np.array(game_data['Home Points']) / np.array(game_data['Home Possessions'])
game_data['Away Eff'] = np.array(game_data['Away Points']) / np.array(game_data['Away Possessions'])
game_data = game_data.replace(to_replace='NOH', value='NOP')
game_data = game_data.replace(to_replace='SEA', value='OKC')
game_data = game_data.replace(to_replace='NJN', value='BKN')

# This is about managing memory; colab can't run this operation on the entire dataset at once
df = game_data[game_data['Season'] < 2016]
obj1 = [CalcTeamRatings(df['Home Team'][x], df['Away Team'][x], df['Date'][x], df) for x in range(len(df))]
df = game_data[game_data['Season'] >= 2016].reset_index(drop=True)
obj2 = [CalcTeamRatings(df['Home Team'][x], df['Away Team'][x], df['Date'][x], df) for x in range(len(df))]
obj = obj1 + obj2

game_data['Home Atk'] = [obj[x].rollingAvgEff() for x in range(len(game_data))]
game_data['Home Def'] = [obj[x].rollingAvgEffA() for x in range(len(game_data))]
game_data['Away Atk'] = [obj[x].away_rollingAvgEff() for x in range(len(game_data))]
game_data['Away Def'] = [obj[x].away_rollingAvgEffA() for x in range(len(game_data))]
game_data['Home Pace'] = [obj[x].home_pace() for x in range(len(game_data))]
game_data['Away Pace'] = [obj[x].away_pace() for x in range(len(game_data))]

game_data.to_csv('/content/drive/MyDrive/Sports Data Analysis/NBA/Possession Data/CSV Files/2007-2022.csv')
