In [2]:
import pandas as pd
import numpy as np

pd.options.mode.chained_assignment = None
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [4]:
uchicago = pd.read_csv("w_uchicago_pbp.csv").iloc[:, 1:]

# uchicago = uchicago[uchicago.Opponent == 'Carthage']

In [5]:
def points_scored(action):
    if "3-pt" in action:
        return 3
    elif "free throw" in action:
        return 1
    else:
        return 2
    
def calculate_scores(df, lineup):
    current = df[df.Lineup == lineup]
    
    makes = current[current.Action.str.contains('made')]
    makes["Points"] = makes.Action.apply(points_scored)

    chicago_scores = makes[makes.Chicago == makes.Team].Points.sum()
    other_scores = makes[makes.Chicago != makes.Team].Points.sum()

    return (chicago_scores, other_scores)

In [6]:
def calculate_play(df, lineup, play):
    current = df[df.Lineup == lineup]
    plays = current[(current.Action.str.contains(play))]

    chicago = plays[(plays.Chicago == plays.Team)].shape[0]
    other = plays[(plays.Chicago != plays.Team)].shape[0]
    
    return (chicago, other)

In [7]:
def calculate_percentage(df, lineup, three=False, ft=False, att=False):
    current = df[df.Lineup == lineup]
    misses = current[(current.Action.str.contains("missed"))]
    makes = current[(current.Action.str.contains("made"))]
    
    if three:
        misses = misses[misses.Action.str.contains("3-pt")]
        makes = makes[makes.Action.str.contains("3-pt")]
    elif ft:
        misses = misses[misses.Action.str.contains("free throw")]
        makes = makes[makes.Action.str.contains("free throw")]
        
    chicago_misses = misses[(misses.Chicago == misses.Team)].shape[0]
    chicago_makes = makes[(makes.Chicago == makes.Team)].shape[0]
    other_misses = misses[(misses.Chicago != misses.Team)].shape[0]
    other_makes = makes[(makes.Chicago != makes.Team)].shape[0]
    
    chicago_attempts = chicago_misses + chicago_makes
    other_attempts = other_misses + other_makes
    
    if att:
        return (chicago_attempts, other_attempts)
    
    if (chicago_attempts == 0) and (other_attempts == 0):
        return (0, 0)
    elif chicago_attempts == 0:
        return (0, round(other_makes / (other_misses + other_makes), 2))
    elif other_attempts == 0:
        return (round(chicago_makes / (chicago_misses + chicago_makes), 2), 0)
    else:
        return (round(chicago_makes / (chicago_misses + chicago_makes), 2), 
                round(other_makes / (other_misses + other_makes), 2))

In [8]:
def sec_to_time(seconds):
    minutes = str(seconds // 60)
    
    if (seconds % 60) < 10:
        return minutes + ":0" + str(seconds % 60)
    else:
        return minutes + ":" + str(seconds % 60)

def time_passed(start, end):
    if start == '30300':
        start = "00:30"
    elif end == '30300':
        end = "00:30"
    
    start_time = int(start.split(":")[0])*60 + int(start.split(":")[1])
    end_time = int(end.split(":")[0])*60 + int(end.split(":")[1])
    
    if start_time > end_time:
        return start_time - end_time
    else:
        return 0

def calculate_time_played(df, lineup):
    total_time = 0
    for x in range(len(df[df.Lineup == lineup])-1):
        current_play = df[df.Lineup == lineup].reset_index(drop=False).values[x]
        next_play = df[df.Lineup == lineup].reset_index(drop=False).values[x+1]

        if current_play[0] != next_play[0] - 1:
            continue

        if current_play[-1] == 1:
            start = "20:00"
            end = next_play[5]
        elif next_play[-1] == -1:
            start = current_play[5]
            end = "00:00"
        else:
            start = current_play[5]
            end = next_play[5]

        total_time += time_passed(start, end)

    return round(total_time / 60, 2)

In [9]:
def calculate_gp(df, lineup):
    return len(pd.unique(df[df.Lineup == lineup].Game))

In [10]:
def analyze_lineup(df, lineup):
    return [calculate_gp(df, lineup), # GP
            calculate_time_played(df, lineup), # MP
            
            calculate_scores(df, lineup)[0], # PTS
            calculate_scores(df, lineup)[1], # OPP PTS
            
            calculate_scores(df, lineup)[0] - calculate_scores(df, lineup)[1], # +/-
            
            calculate_play(df, lineup, "Assist")[0], # AST
            
            calculate_play(df, lineup, "offensive rebound")[0], # OREB
            calculate_play(df, lineup, "offensive rebound")[1], # OPP OREB
            calculate_play(df, lineup, "defensive rebound")[0], # DREB
            
            calculate_percentage(df, lineup)[0], # FG%
            calculate_percentage(df, lineup, att=True)[0], # FGA
            calculate_percentage(df, lineup)[1], # OPP FG%
            calculate_percentage(df, lineup, att=True)[1], # OPP FGA
            
            calculate_percentage(df, lineup, three=True)[0], # 3P%
            calculate_percentage(df, lineup, three=True, att=True)[0], # 3PA
            calculate_percentage(df, lineup, three=True)[1], # OPP 3P%
            calculate_percentage(df, lineup, three=True, att=True)[1], # OPP 3PA
            
            calculate_percentage(df, lineup, ft=True, att=True)[0], # FTA
            calculate_percentage(df, lineup, ft=True)[0], # FT%
            
            calculate_play(df, lineup, "Turnover")[0], # TO
            calculate_play(df, lineup, "Turnover")[1],# OPP TO
            
            calculate_play(df, lineup, "Steal")[0], # STL
            calculate_play(df, lineup, "Block")[0], # BLK
            
            calculate_play(df, lineup, "Foul")[0], # PF
            calculate_play(df, lineup, "Foul")[1]] # OPP PF

In [11]:
lineups = [l for l in pd.unique(uchicago.Lineup) if len(l.split(", ")) == 5]

lineup_data = []
for l in lineups:
    lineup_data.append( [l] + analyze_lineup(uchicago, l) )

In [12]:
lineup_analysis = pd.DataFrame(lineup_data, 
                               columns = ['Lineup',
                                          'GP', 'MP', 
                                          'PTS', 'OPP PTS', 
                                          '+/-', 
                                          'AST',
                                          'OREB', 'OPP OREB', 'DREB', 
                                          'FG%', 'FGA', 'OPP FG%', 'OPP FGA',
                                          '3P%', '3PA', 'OPP 3P%', '3PA',
                                          'FTA', 'FT%', 
                                          'TO', 'OPP TO', 
                                          'STL', 'BLK',
                                          'PF', 'OPP PF'])

In [21]:
lineup_analysis = lineup_analysis.sort_values(by=['MP'], ascending=False).reset_index(drop=True)
# lineup_analysis.to_csv("w_lineups.csv")

lineup_analysis

Unnamed: 0,Lineup,GP,MP,PTS,OPP PTS,+/-,AST,OREB,OPP OREB,DREB,FG%,FGA,OPP FG%,OPP FGA,3P%,3PA,OPP 3P%,3PA.1,FTA,FT%,TO,OPP TO,STL,BLK,PF,OPP PF
0,"Elizabeth Shaw, Grace Hynes, Klaire Steffens, Mallory Brodnik, Peyton Van Soest",13,212.9,140,133,7,31,20,11,54,0.43,152,0.49,142,0.44,41,0.43,30,19,0.53,43,43,16,5,27,26
1,"Ellie Gross, Grace Hynes, Klaire Steffens, Mallory Brodnik, Peyton Van Soest",17,184.17,196,143,53,42,47,23,68,0.47,229,0.45,180,0.25,81,0.31,29,48,0.79,39,51,27,10,36,50
2,"Bella Alfaro, Grace Hynes, Klaire Steffens, Mallory Brodnik, Peyton Van Soest",8,47.03,57,36,21,7,5,12,36,0.49,59,0.28,67,0.29,14,0.17,18,10,0.5,17,10,5,3,9,12
3,"Ashley Gao, Ellie Gross, Grace Hynes, Klaire Steffens, Peyton Van Soest",7,29.28,49,41,8,9,9,6,17,0.49,63,0.5,50,0.2,10,0.5,4,20,0.75,8,18,7,1,13,15
4,"Ellie Gross, Grace Hynes, Klaire Steffens, Mallory Brodnik, Marissa Powe",13,26.0,42,39,3,9,8,4,27,0.52,50,0.39,51,0.43,7,0.19,16,15,0.87,12,12,7,5,4,14
5,"Ashley Gao, Ellie Gross, Grace Hynes, Klaire Steffens, Marissa Powe",11,22.78,49,41,8,13,12,2,9,0.47,55,0.65,34,0.4,15,0.5,10,15,0.6,5,12,8,1,9,14
6,"Elizabeth Shaw, Grace Hynes, Klaire Steffens, Mallory Brodnik, Marissa Powe",5,22.65,11,19,-8,3,4,2,6,0.29,21,0.53,17,0.0,1,0.43,7,2,0.5,8,8,5,0,5,5
7,"Elizabeth Shaw, Klaire Steffens, Mallory Brodnik, Marissa Powe, Peyton Van Soest",4,20.08,13,11,2,3,2,7,6,0.46,13,0.3,20,0.5,4,0.0,3,2,0.5,4,5,2,1,6,3
8,"Ellie Gross, Klaire Steffens, Mallory Brodnik, Marissa Powe, Peyton Van Soest",8,18.25,33,30,3,8,6,3,10,0.5,36,0.59,32,0.6,5,0.29,7,10,0.6,7,10,6,0,13,12
9,"Ashley Gao, Ellie Gross, Klaire Steffens, Marissa Powe, Peyton Van Soest",9,17.63,28,19,9,4,5,4,10,0.47,38,0.42,26,0.25,4,0.5,2,14,0.64,5,11,7,1,8,9
