In [110]:
#Taking only the parts of the game that are "in the clutch" (end of a close 4th or overtime)
import pandas as pd
import numpy as np
import re
import math

In [132]:
class clutch_data:
    def __init__(self):
        self.game_id = -1
        self.players = {}
        self.teams = {}
        self.pbp = pd.read_csv("../data/PlayByPlay.csv")
        self.boxscore = pd.read_csv("../data/BoxScoreData.csv")

        self.columns = ["points", "rebounds", "assists",
                        "2pm", "2pa", "3pm", "3pa", "FTM", "FTA", "FGM depth", "FGA depth",
                        "FGM", "FGA", "offensive rebounds", "defensive rebounds",
                        "fouls", "fouls drawn", "turnovers", "steals", "blocks"] #All the columns we'll have in each clutch dataset
        


    def is_clutch(self, row):
        timing = row["time"]
        minutes = timing.split(":")[0]
        seconds = timing.split(":")[1] #needed if we're gonna get more specific than "less than 4 minutes left" with our time cutoff

        quarter = row.quarter
        if len(quarter) > 8:
            if (quarter[-8:] == "overtime"): #just gonna count this as OT
                quarter = "4th quarter"

        if quarter == "4th quarter":
            if (int(minutes) < 2) and (np.abs(row["home score"] - row["away score"]) < 6):
                return True
            if (int(minutes) < 4) and (int(minutes) > 2) and (np.abs(row["home score"] - row["away score"]) < 9):
                return True
        return False
        

    def start_team(self):
        team_dict = {}
        team_dict["game_id"] = self.game_id
        for column in self.columns:
            team_dict[column] = 0
        
        return team_dict

    def reset_dicts(self):
        player_df = pd.DataFrame.from_dict(self.players, orient = "index").reset_index().rename(columns={"index": "player_id"})
        player_df["game_id"] = self.game_id

        # game_df = pd.DataFrame.from_dict(self.teams, orient = "index").reset_index().rename(columns={"index": "team"})
        # game_df["game_id"] = self.game_id
        

        #After adding columns to new dataframe (one for players one for team)
        #write/append to that csv here
        if len(player_df.columns) > 3:
            player_df.to_csv("../data/ClutchPlayerData.csv", mode='a', index=False, header=False)
        #game_df.to_csv("../data/ClutchTeamData.csv", mode='a', index=True)
        
        self.players = {}

        # hometeam = self.boxscore.loc[self.boxscore.Game_ID == self.game_id]["Home Team"].to_list()[0]
        # self.home_team = self.start_team()
        # awayteam = self.boxscore.loc[self.boxscore.Game_ID == self.game_id]["Away Team"].to_list()[0]
        # self.away_team = self.start_team()
        # self.teams = {hometeam: self.home_team, awayteam: self.away_team}
        

    def add_player(self):
        #Do similar to above, but player_id instead of team and I think that's it, maybe minutes if they can sub but that's lame
        player_dict = {}
        player_dict["game_id"] = self.game_id
        for column in self.columns:
            player_dict[column] = 0
        return player_dict
    
    def get_distance(self, distance):
        try:
            distance = int(re.findall("[0-9]+", distance)[0]) #remove the "ft" from the shot depth
            return distance
        except TypeError as e:
            return 1

    def get_primary_stat(self, row):
        #Add to both team and player dicts
        play = row["play_type"]
        player = row["player_id"]
        sec_player = row["secondary_player_id"]

        #REBOUND
        if play == "Rebound":
            #No secondary play
            self.players[player]["rebounds"] += 1
            if row.rebound == "Offensive":
                self.players[player]["offensive rebounds"] += 1
            else:
                self.players[player]["defensive rebounds"] += 1
        
        #FOUL
        if play == "Foul":
            #secondary player draws foul
            self.players[player]["fouls"] += 1
            if not pd.isna(row.secondary_player_id):
                self.players[sec_player]["fouls drawn"] += 1

        #TURNOVER
        if play == "Turnover":
            #player id turns over, sec player steals
            self.players[player]["turnovers"] += 1
            if not pd.isna(row.secondary_player_id):
                self.players[sec_player]["steals"] += 1 
        

        

        #SHOT MISSED
        if play == "Shot Missed":
            
            #prim misses shot (figure out how much)
            #secondary blocks if applicable
            if row["shot_score"] == "free":
                self.players[player]["FTA"] += 1
            else:
                distance = self.get_distance(row["distance"])
                av_depth = self.players[player]["FGA depth"] * (self.players[player]["FGA"]) + distance

                self.players[player]["FGA"] += 1

                av_depth /= self.players[player]["FGA"]
                self.players[player]["FGA depth"] = av_depth

                if row["shot_score"] == "2-pt":
                    self.players[player]["2pa"] += 1
                if row["shot_score"] == "3-pt":
                    self.players[player]["3pa"] += 1
            #Need to add potential block
            if not pd.isna(row.secondary_player_id):
                self.players[sec_player]["blocks"] += 1


        
        if play == "Shot Made":
            if row["shot_score"] == "free":
                self.players[player]["FTA"] += 1
                self.players[player]["FTM"] += 1
                self.players[player]["points"] += 1
            else:
                distance = self.get_distance(row["distance"])
                av_depth = self.players[player]["FGA depth"] * (self.players[player]["FGA"]) + distance

                self.players[player]["FGA"] += 1

                av_depth /= self.players[player]["FGA"]
                self.players[player]["FGA depth"] = av_depth

                #Now av depth for made shots
                av_made_depth = self.players[player]["FGM depth"] * (self.players[player]["FGM"]) + distance

                self.players[player]["FGM"] += 1
                av_made_depth /= self.players[player]["FGM"]
                self.players[player]["FGM depth"] = av_made_depth

                if row["shot_score"] == "2-pt":

                    self.players[player]["2pa"] += 1
                    self.players[player]["2pm"] += 1
                    self.players[player]["points"] += 2

                if row["shot_score"] == "3-pt":
                    self.players[player]["3pa"] += 1
                    self.players[player]["3pm"] += 1
                    self.players[player]["points"] += 3
            if not pd.isna(row.secondary_player_id):
                self.players[sec_player]["assists"] += 1


    def loop(self):

        for _, row in self.pbp.iterrows():
            if row["game_id"] != self.game_id:
                self.reset_dicts()
                self.game_id = row["game_id"]


            if not self.is_clutch(row): #Only count plays within our clutch metric.
                continue

            if (row.player_id not in self.players):
                player = self.add_player()
                self.players[row.player_id] = player
            if (not pd.isna(row.secondary_player_id)) and (row.secondary_player_id not in self.players):
                player = self.add_player()
                self.players[row.secondary_player_id] = player
            
            # if (row.secondary_player_id):
            #     self.get_secondary_stat(row)
            self.get_primary_stat(row)
        


In [133]:
clutch = clutch_data()
clutch.loop()

In [134]:
clutch.reset_dicts()

In [33]:
pbp = pd.read_csv("../data/PlayByPlay.csv")
pbp.tail()


Unnamed: 0,game_id,time,quarter,player_id,play_type,secondary_player_id,away score,home score,rebound,shot_type,distance,shot_score,away_lineup_id,home_lineup_id
1323495,14384,0:10.0,4th quarter,agbajoc01,Enters,clarkjo01,112,114,,,,,12338.0,11700.0
1323496,14384,0:05.0,4th quarter,ingrabr01,Shot Missed,,112,114,,jump,21 ft,2-pt,12338.0,11700.0
1323497,14384,0:00.0,4th quarter,marshna01,Rebound,,112,114,Offensive,,,,12338.0,11700.0
1323498,14384,0:00.0,4th quarter,marshna01,Shot Missed,,112,114,,layup,,2-pt,12338.0,11700.0
1323499,14384,0:00.0,4th quarter,georgke01,Rebound,,112,114,Defensive,,,,12338.0,11700.0


In [32]:
test_dict1 = {"Hello": 1, "bad": 2}
test_dict2 = {"Hello": 2, "bad": 3}

dictionary = {"row1": test_dict1, "row2": test_dict2}

pd.DataFrame.from_dict(dictionary, orient="index").reset_index().rename(columns={"index": "row"})


Unnamed: 0,row,Hello,bad
0,row1,1,2
1,row2,2,3


In [34]:
pbp.play_type.unique()

array(['Shot Made', 'Shot Missed', 'Rebound', 'Foul', 'Turnover',
       'Enters', 'Timeout'], dtype=object)

In [46]:
int(re.findall("[0-9]+", "34hje")[0])

34