In [53]:
import pandas as pd
import numpy as np
import re

In [41]:
pbp = pd.read_csv('../data/PlayByPlay.csv')
pbp.quarter = pbp.quarter.replace({"1": "1st quarter"})
lineups = pd.read_csv('../data/lineups.csv')

In [116]:
#We are going to add a "per 36" +/- column to the lineups dataframe

class lineup_minutes:
    def __init__(self):
        self.cur_home_lineup = -1
        self.cur_away_lineup = -1
        self.cur_game_id = -1
        self.stint_index = 0
        self.cur_quarter = -1
        self.current_time = pd.to_timedelta("00:12:00")
        self.cur_away_dict = {}
        self.cur_home_dict = {}
        

        self.sched = pd.read_csv('../data/ScheduleData.csv')

        self.pbp = pd.read_csv('../data/PlayByPlay.csv')
        self.pbp.quarter = self.pbp.quarter.replace({"1": "1st quarter"})

        self.boxscore = pd.read_csv('../data/BoxScoreData.csv')
        
        self.columns = ["points", "rebounds", "assists",
                        "2pm", "2pa", "3pm", "3pa", "FTM", "FTA", "FGM depth", "FGA depth",
                        "FGM", "FGA", "offensive rebounds", "defensive rebounds",
                        "fouls", "fouls drawn", "turnovers", "steals", "blocks", "+/-"]

    def start_player(self, player_id, team):
        
        #If we throw an error here I'm gonna be upset
        if not team and player_id not in self.cur_away_dict:
            self.cur_away_dict[player_id] = {}
            for col in self.columns:
                self.cur_away_dict[player_id][col] = 0
        if team and player_id not in self.cur_home_dict:
            self.cur_home_dict[player_id] = {}
            for col in self.columns:
                self.cur_home_dict[player_id][col] = 0

    def start_players(self):
        #This should be called only at the start of each game, technically could just do it at every reset actually
        self.cur_home_dict = {}
        self.cur_away_dict = {}
        players = self.boxscore.loc[self.boxscore.Game_ID == self.cur_game_id]
        for _, row in players.iterrows():
            self.start_player(row["Player_ID"], row["Home"]) #Each player gets added to the lineup dicts


    def played(self, row):
        #The asssumption, since the lineups got messed up a bit, is that if a player has any stats at all, they played
        #And if they don't, they didn't
        stat_sum = row["FGA"] + row["rebounds"] + row["assists"] + row["points"] + row["turnovers"] + row["steals"] + row["blocks"] + row["fouls"] + row["fouls drawn"] + row["FTA"]
        return stat_sum

    def write_csv(self, time):

        home_df = pd.DataFrame.from_dict(self.cur_home_dict, orient='index').reset_index().rename(columns={"index": 'player_id'})
        home_df["home"] = True
        home_df["team"] = self.sched[self.sched['game_id'] == self.cur_game_id]['Home Team'].values[0]
        away_df = pd.DataFrame.from_dict(self.cur_away_dict, orient='index').reset_index().rename(columns={"index": 'player_id'})
        away_df["home"] = False
        away_df["team"] = self.sched[self.sched['game_id'] == self.cur_game_id]['Away Team'].values[0]
        combined = pd.concat([home_df, away_df], ignore_index=True)

        combined["game_id"] = self.cur_game_id
        time_passed = (self.current_time - time).seconds
        combined["seconds"] = time_passed
        #Actually get depth
        combined["FGM depth"] = combined["FGM depth"] / combined["FGM"].replace({0: np.nan})
        combined["FGA depth"] = combined["FGA depth"] / combined["FGA"].replace({0: np.nan})

        combined["stint_id"] = self.stint_index
        combined = combined.loc[combined.apply(self.played, axis = 1) > 0]

        if time_passed > 0: #if no time has passed, don't write, probably means multiple substitutions happening
            combined.to_csv('../data/pbp_stints.csv', mode='a', index=False, header=False)
            self.stint_index += 1


    def get_distance(self, distance):
        try:
            distance = int(re.findall("[0-9]+", distance)[0]) #remove the "ft" from the shot depth
            return distance
        except TypeError as e:
            return 1
        

    def update_plus_minus(self, point, team):
        if team == "home":
            for player in self.cur_home_dict:
                self.cur_home_dict[player]["+/-"] += point
            for player in self.cur_away_dict:
                self.cur_away_dict[player]["+/-"] -= point
        if team == "away":
            for player in self.cur_away_dict:
                self.cur_away_dict[player]["+/-"] += point
            for player in self.cur_home_dict:
                self.cur_home_dict[player]["+/-"] -= point
        

    def update_stats(self, row):
        play = row["play_type"]
        team = None
        player = row["player_id"]
        if player in self.cur_home_dict:
            team = "home"
        if player in self.cur_away_dict:
            team = "away"
        if team is None:
            if player != "Team":
                print("PLAYER NOT FOUND IN LINEUP", player)
            return #Player not in current lineup?
        


        sec_player = row["secondary_player_id"] if not pd.isna(row.secondary_player_id) else None

        #Some type of shot
        if play == "Shot Missed":
            if team == "home":
                if sec_player is not None:
                    self.cur_away_dict[sec_player]["blocks"] += 1
                if row["shot_score"] == "2-pt":
                    self.cur_home_dict[player]["FGA"] += 1
                    self.cur_home_dict[player]["2pa"] += 1
                    self.cur_home_dict[player]["FGA depth"] += self.get_distance(row["distance"])
                if row["shot_score"] == "3-pt":
                    self.cur_home_dict[player]["FGA"] += 1
                    self.cur_home_dict[player]["3pa"] += 1
                    self.cur_home_dict[player]["FGA depth"] += self.get_distance(row["distance"])
                if row["shot_score"] == "free":
                    self.cur_home_dict[player]["FTA"] += 1

            if team == "away":
                if sec_player is not None:
                    self.cur_home_dict[sec_player]["blocks"] += 1
                if row["shot_score"] == "2-pt":
                    self.cur_away_dict[player]["FGA"] += 1
                    self.cur_away_dict[player]["2pa"] += 1
                    self.cur_away_dict[player]["FGA depth"] += self.get_distance(row["distance"])
                if row["shot_score"] == "3-pt":
                    self.cur_away_dict[player]["FGA"] += 1
                    self.cur_away_dict[player]["3pa"] += 1
                    self.cur_away_dict[player]["FGA depth"] += self.get_distance(row["distance"])
                if row["shot_score"] == "free":
                    self.cur_away_dict[player]["FTA"] += 1

        if play == "Shot Made":
            point = 0
            if team == "home":
                if sec_player is not None:
                    self.cur_home_dict[sec_player]["assists"] += 1
                if row["shot_score"] == "2-pt":
                    point = 2
                    self.cur_home_dict[player]["FGA"] += 1
                    self.cur_home_dict[player]["FGM"] += 1
                    self.cur_home_dict[player]["2pa"] += 1
                    self.cur_home_dict[player]["2pm"] += 1

                    self.cur_home_dict[player]["FGA depth"] += self.get_distance(row["distance"])
                    self.cur_home_dict[player]["FGM depth"] += self.get_distance(row["distance"])
                if row["shot_score"] == "3-pt":
                    point = 3
                    self.cur_home_dict[player]["FGA"] += 1
                    self.cur_home_dict[player]["FGM"] += 1

                    self.cur_home_dict[player]["3pa"] += 1
                    self.cur_home_dict[player]["3pm"] += 1

                    self.cur_home_dict[player]["FGA depth"] += self.get_distance(row["distance"])
                    self.cur_home_dict[player]["FGM depth"] += self.get_distance(row["distance"])

                if row["shot_score"] == "free":
                    point = 1
                    self.cur_home_dict[player]["FTA"] += 1

            if team == "away":
                if sec_player is not None:
                    self.cur_away_dict[sec_player]["assists"] += 1
                if row["shot_score"] == "2-pt":
                    point = 2
                    self.cur_away_dict[player]["FGA"] += 1
                    self.cur_away_dict[player]["FGM"] += 1
                    self.cur_away_dict[player]["2pa"] += 1
                    self.cur_away_dict[player]["2pm"] += 1

                    self.cur_away_dict[player]["FGA depth"] += self.get_distance(row["distance"])
                    self.cur_away_dict[player]["FGM depth"] += self.get_distance(row["distance"])

                if row["shot_score"] == "3-pt":
                    point = 3
                    self.cur_away_dict[player]["FGA"] += 1
                    self.cur_away_dict[player]["FGM"] += 1
                    self.cur_away_dict[player]["3pa"] += 1
                    self.cur_away_dict[player]["3pm"] += 1

                    self.cur_away_dict[player]["FGA depth"] += self.get_distance(row["distance"])
                    self.cur_away_dict[player]["FGM depth"] += self.get_distance(row["distance"])

                if row["shot_score"] == "free":
                    point = 1
                    self.cur_away_dict[player]["FTA"] += 1
                    self.cur_away_dict[player]["FTM"] += 1
            self.update_plus_minus(point, team)

        #Rebound
        if play == "Rebound":
            if team == "home":
                self.cur_home_dict[player]["rebounds"] += 1
                if row["rebound"] == "Offensive":
                    self.cur_home_dict[player]["offensive rebounds"] += 1
                if row["rebound"] == "Defensive":
                    self.cur_home_dict[player]["defensive rebounds"] += 1
            if team == "away":
                self.cur_away_dict[player]["rebounds"] += 1
                if row["rebound"] == "Offensive":
                    self.cur_away_dict[player]["offensive rebounds"] += 1
                if row["rebound"] == "Defensive":
                    self.cur_away_dict[player]["defensive rebounds"] += 1
        
        #Foul
        if play == "Foul":
            if team == "home":
                self.cur_home_dict[player]["fouls"] += 1
                if sec_player is not None:
                    self.cur_away_dict[sec_player]["fouls drawn"] += 1
            if team == "away":
                self.cur_away_dict[player]["fouls"] += 1
                if sec_player is not None:
                    self.cur_home_dict[sec_player]["fouls drawn"] += 1

        #Turnover
        if play == "Turnover":
            if team == "home":
                self.cur_home_dict[player]["turnovers"] += 1
                if sec_player is not None:
                    self.cur_away_dict[sec_player]["steals"] += 1
            if team == "away":
                self.cur_away_dict[player]["turnovers"] += 1
                if sec_player is not None:
                    self.cur_home_dict[sec_player]["steals"] += 1

    
    def check_for_update(self, row):
        if ((row["game_id"] != self.cur_game_id) or (row["quarter"] != self.cur_quarter) or row["play_type"] == "Enters"):
            #Game has changed, lineup has changed, or quarter has changed
            if self.cur_game_id != -1:
                if row["quarter"] != self.cur_quarter:
                    self.write_csv(pd.to_timedelta("00:00:00"))
                else:
                    self.write_csv(pd.to_timedelta("00:" + row["time"])) #writing to csv if not first game
            if row["game_id"] != self.cur_game_id:
                self.cur_game_id = row["game_id"]
            self.start_players()

            self.cur_quarter = row["quarter"]
            self.current_time = pd.to_timedelta("00:" + row["time"])
        
    def pbp_loop(self):
        #START LOOPING

        for _, row in self.pbp.iterrows():
            #Run checks to see if lineup, game, or quarter has changed
            self.check_for_update(row)
            #If anything needed to be updated, now it has been. This also means things will have been written
            self.update_stats(row)
            #OKAY THIS SHOULD BE GOOD PLEASE


In [51]:
pbp.play_type.unique()

array(['Shot Made', 'Shot Missed', 'Rebound', 'Foul', 'Turnover',
       'Enters', 'Timeout'], dtype=object)

In [None]:
lineup_minutes().pbp_loop()

PLAYER NOT FOUND IN LINEUP jacksis01
PLAYER NOT FOUND IN LINEUP youngtr01
PLAYER NOT FOUND IN LINEUP smartma01
PLAYER NOT FOUND IN LINEUP craigto01
PLAYER NOT FOUND IN LINEUP hylanbo01
PLAYER NOT FOUND IN LINEUP davised01
PLAYER NOT FOUND IN LINEUP antetgi01
PLAYER NOT FOUND IN LINEUP metuch01
PLAYER NOT FOUND IN LINEUP butleji01
PLAYER NOT FOUND IN LINEUP davised01
PLAYER NOT FOUND IN LINEUP nunnke01
PLAYER NOT FOUND IN LINEUP pinsoth01
PLAYER NOT FOUND IN LINEUP pinsoth01
PLAYER NOT FOUND IN LINEUP antetth01
PLAYER NOT FOUND IN LINEUP jacksis01
PLAYER NOT FOUND IN LINEUP youngtr01
PLAYER NOT FOUND IN LINEUP smartma01
PLAYER NOT FOUND IN LINEUP craigto01
PLAYER NOT FOUND IN LINEUP hylanbo01
