In [31]:
import math
import pandas as pd
import numpy as np
df = pd.read_csv("../data/raw/playoff/round_3/KHIvsCSK.csv")

In [32]:
def adjustments():
    # change col name to Jersey
    df.rename(columns={"DORSAL": "JERSEY"},inplace=True)

    # delete empty columns
    del df["COMMENT"]
    del df["TYPE"]

    # get the int(min), int(sec) from time
    df["Q_MIN"] = df["MARKERTIME"].apply(lambda row: 0 if type(row) != str else int(row.split(":")[0]))
    df["Q_SEC"] = df["MARKERTIME"].apply(lambda row: 0 if type(row) != str else int(row.split(":")[1]))
    
    # obtain the game seconds columns and the seconds to next play column (mainly used in lineup analysis)
    df["SECOND"] = df.apply(lambda x: None if pd.isnull(x["MARKERTIME"]) else 60-int(x["MARKERTIME"].split(":")[1]), axis = 1)
    df["SECOND"] = df.apply(lambda x: x["SECOND"] if x["SECOND"] != 60 else 0, axis = 1)
    df["GAME_SECONDS"] = df.apply(lambda x: None if pd.isnull(x["MARKERTIME"]) else (x["MINUTE"]-1)*60+x["SECOND"], axis = 1)
    game_seconds = 0
    for i,r in df.iterrows():
        if r["GAME_SECONDS"] < game_seconds:
            df.loc[i,"GAME_SECONDS"] = game_seconds
        game_seconds = r["GAME_SECONDS"]
    df["SECONDS_TO_NEXT_PLAY"] = df["GAME_SECONDS"].diff()
    df["SECONDS_TO_NEXT_PLAY"].fillna(0, inplace = True)
  
    # remove blanks from CODETEAM
    df["CODETEAM"] = df["CODETEAM"].apply(lambda row: row[0:3])

    # get the points scored at the play from the cummulative
    df["POINTS_A"].fillna(method='ffill', inplace = True) 
    df["POINTS_B"].fillna(method='ffill', inplace = True)
    df["POINTS_A"].fillna(0, inplace = True)
    df["POINTS_B"].fillna(0, inplace = True)
    df["SCORE_A"] = df["POINTS_A"].diff()
    df["SCORE_B"] = df["POINTS_B"].diff()
    df["SCORE_A"].fillna(0, inplace = True)
    df["SCORE_B"].fillna(0, inplace = True)
    
adjustments()

In [33]:
# store the team code name
team_a_code = df.iloc[0,13]
team_b_code = df.iloc[0,15]

# store the seperate dfs, the respective indices and the respective last indices of team 
team_a_df = df[df["CODETEAM"]==team_a_code]
team_a_indices = team_a_df.index
last_team_a_index = team_a_indices[-1]
team_b_df = df[df["CODETEAM"]==team_b_code]
team_b_indices = team_b_df.index
last_team_b_index = team_b_indices[-1]

# store the sub keywords and create empty lists of the substition points
sub_words = ["IN","OUT"]
team_a_sub_points, team_b_sub_points = [], []

# go through the dataframes one by one and find each substitution point by flagging each point
# where there is no substitution keyword after a substitution keyword
row_iterator = team_a_df.iterrows()
for index, row in row_iterator:
    if row["PLAYTYPE"] in sub_words:
        next_index = index + 1
        if next_index > last_team_a_index:
            break
        while next_index not in team_a_indices:
            next_index += 1
            if next_index > last_team_a_index:
                break
        if team_a_df.loc[next_index,"PLAYTYPE"] not in sub_words:
            team_a_sub_points.append(index)
        
row_iterator = team_b_df.iterrows()
for index, row in row_iterator:
    if row["PLAYTYPE"] in sub_words:
        next_index = index + 1
        if next_index > last_team_b_index:
            break
        while next_index not in team_b_indices:
            next_index += 1
            if next_index > last_team_b_index:
                break
        if team_b_df.loc[next_index,"PLAYTYPE"] not in sub_words:
            team_b_sub_points.append(index)

# create a dictionary to store the lineups in with a key being the subsitution point as found before
# go through all substitution points and effectively segment the dataframe in sub point to sub point
# while doing so, go through each segment and append every player to the lineup list if he has not been
# appended before or if he is getting in now.
# also store the players that are getting in and the ones that are getting out so you can pass along a 
# clean version of the lineup list in the next loop
team_a_lineups = {}
start = 0
player_lineup = []
in_lineup = []
out_lineup = []
all_team_a_index = list(team_a_df.index)
for sub_point in team_a_sub_points:
    valid_indices = [item for item in team_a_indices if item >= start and item <= sub_point]
    for index in valid_indices:
        if type(team_a_df.loc[index, "PLAYER"]) is not str:
            continue
        
        if team_a_df.loc[index, "PLAYTYPE"] == "IN":
            in_lineup.append(team_a_df.loc[index, "PLAYER"])
            continue
        elif team_a_df.loc[index, "PLAYTYPE"] == "OUT":
            out_lineup.append(team_a_df.loc[index, "PLAYER"])
            
        if team_a_df.loc[index, "PLAYER"] not in player_lineup:
            player_lineup.append(team_a_df.loc[index, "PLAYER"])
            
        # in the case that not all 5 players have been in any plays until the first sub, keep looking
        # search in all the indices of the team and if its not a player that was subbed, append him to list
        if ((index == valid_indices[-1]) and (len(player_lineup) < 5)):
            indices_to_search_for_fifth = [i for i in all_team_a_index if i > index]
            for p in indices_to_search_for_fifth:
                if type(team_a_df.loc[p, "PLAYER"]) is not str:
                    continue
                elif team_a_df.loc[p, "PLAYTYPE"] in sub_words:
                    continue
                elif ((team_a_df.loc[p, "PLAYER"] not in player_lineup) and (team_a_df.loc[p, "PLAYER"] not in in_lineup)):
                    player_lineup.append(team_a_df.loc[p, "PLAYER"])
                
                if len(player_lineup)==5:
                    break
        
    team_a_lineups[sub_point] = player_lineup
    
    player_lineup = [player for player in player_lineup if player not in out_lineup]
    player_lineup = player_lineup + in_lineup
    
    out_lineup, in_lineup = [],[]
    
    start = sub_point + 1

team_b_lineups = {}
start = 0
player_lineup = []
in_lineup = []
out_lineup = []
all_team_b_index = list(team_b_df.index)
for sub_point in team_b_sub_points:
    valid_indices = [item for item in team_b_indices if item >= start and item <= sub_point]
    for index in valid_indices:
        if type(team_b_df.loc[index, "PLAYER"]) is not str:
            continue
        
        if team_b_df.loc[index, "PLAYTYPE"] == "IN":
            in_lineup.append(team_b_df.loc[index, "PLAYER"])
        elif team_b_df.loc[index, "PLAYTYPE"] == "OUT":
            out_lineup.append(team_b_df.loc[index, "PLAYER"])
            
        if ((team_b_df.loc[index, "PLAYER"] not in player_lineup) and (team_b_df.loc[index, "PLAYTYPE"] != "IN")):
            player_lineup.append(team_b_df.loc[index, "PLAYER"])
            
        # in the case that not all 5 players have been in any plays until the first sub, keep looking
        # search in all the indices of the team and if its not a player that was subbed, append him to list
        if ((index == valid_indices[-1]) and (len(player_lineup) < 5)):
            indices_to_search_for_fifth = [i for i in all_team_b_index if i > index]
            for p in indices_to_search_for_fifth:
                if type(team_b_df.loc[p, "PLAYER"]) is not str:
                    continue
                elif team_b_df.loc[p, "PLAYTYPE"] in sub_words:
                    continue
                elif ((team_b_df.loc[p, "PLAYER"] not in player_lineup) and (team_b_df.loc[p, "PLAYER"] not in in_lineup)):
                    player_lineup.append(team_b_df.loc[p, "PLAYER"])
                
                if len(player_lineup)==5:
                    break
        
    team_b_lineups[sub_point] = player_lineup
    
    player_lineup = [player for player in player_lineup if player not in out_lineup]
    player_lineup = player_lineup + in_lineup
    
    out_lineup, in_lineup = [],[]
    
    start = sub_point + 1
    
# AT THIS POINT ITERATE OVER SUB POINTS AND CREATE A COLUMN THAT INDICATES HOW LONG A 

In [34]:
#create new columns to store the lineups in
df["home_team_player_1"]=np.nan
df["home_team_player_2"]=np.nan
df["home_team_player_3"]=np.nan
df["home_team_player_4"]=np.nan
df["home_team_player_5"]=np.nan

df["away_team_player_1"]=np.nan
df["away_team_player_2"]=np.nan
df["away_team_player_3"]=np.nan
df["away_team_player_4"]=np.nan
df["away_team_player_5"]=np.nan

#add home team lineups to main dataframe
start_index = 0
for end_index in team_a_lineups:
    df.loc[start_index:end_index,"home_team_player_1"] = team_a_lineups[end_index][0]
    df.loc[start_index:end_index,"home_team_player_2"] = team_a_lineups[end_index][1]
    df.loc[start_index:end_index,"home_team_player_3"] = team_a_lineups[end_index][2]
    df.loc[start_index:end_index,"home_team_player_4"] = team_a_lineups[end_index][3]
    df.loc[start_index:end_index,"home_team_player_5"] = team_a_lineups[end_index][4]
    start_index = end_index

#add away team lineups to main dataframe
start_index = 0
for end_index in team_b_lineups:
    df.loc[start_index:end_index,"away_team_player_1"] = team_b_lineups[end_index][0]
    df.loc[start_index:end_index,"away_team_player_2"] = team_b_lineups[end_index][1]
    df.loc[start_index:end_index,"away_team_player_3"] = team_b_lineups[end_index][2]
    df.loc[start_index:end_index,"away_team_player_4"] = team_b_lineups[end_index][3]
    df.loc[start_index:end_index,"away_team_player_5"] = team_b_lineups[end_index][4]
    start_index = end_index

#forward fill as last subs stayed until end of game
df["home_team_player_1"].fillna(method='ffill', inplace = True)
df["home_team_player_2"].fillna(method='ffill', inplace = True)
df["home_team_player_3"].fillna(method='ffill', inplace = True)
df["home_team_player_4"].fillna(method='ffill', inplace = True)
df["home_team_player_5"].fillna(method='ffill', inplace = True)

df["away_team_player_1"].fillna(method='ffill', inplace = True)
df["away_team_player_2"].fillna(method='ffill', inplace = True)
df["away_team_player_3"].fillna(method='ffill', inplace = True)
df["away_team_player_4"].fillna(method='ffill', inplace = True)
df["away_team_player_5"].fillna(method='ffill', inplace = True)

In [35]:
pd.options.display.max_rows = 999
df

Unnamed: 0,CODETEAM,JERSEY,MARKERTIME,MINUTE,NUMBEROFPLAY,PLAYER,PLAYER_ID,PLAYINFO,PLAYTYPE,POINTS_A,POINTS_B,TEAM,home_team,home_team_code,away_team,away_team_code,Q_MIN,Q_SEC,SECOND,GAME_SECONDS,SECONDS_TO_NEXT_PLAY,SCORE_A,SCORE_B,home_team_player_1,home_team_player_2,home_team_player_3,home_team_player_4,home_team_player_5,away_team_player_1,away_team_player_2,away_team_player_3,away_team_player_4,away_team_player_5
0,,,,1,2,,,Begin Period,BP,0.0,0.0,,Khimki Moscow Region,KHI,CSKA Moscow,CSK,0,0,,,0.0,0.0,0.0,"THOMAS, MALCOLM","ANDERSON, JAMES","SHVED, ALEXEY","JENKINS, CHARLES","GILL, ANTHONY","HUNTER, OTHELLO","RODRIGUEZ, SERGIO","KURBANOV, NIKITA","ANTONOV, SEMEN","HIGGINS, CORY"
1,KHI,23.0,10:00,1,3,"THOMAS, MALCOLM",P004251,,TPOFF,0.0,0.0,Khimki Moscow Region,Khimki Moscow Region,KHI,CSKA Moscow,CSK,10,0,0.0,0.0,0.0,0.0,0.0,"THOMAS, MALCOLM","ANDERSON, JAMES","SHVED, ALEXEY","JENKINS, CHARLES","GILL, ANTHONY","HUNTER, OTHELLO","RODRIGUEZ, SERGIO","KURBANOV, NIKITA","ANTONOV, SEMEN","HIGGINS, CORY"
2,CSK,44.0,10:00,1,4,"HUNTER, OTHELLO",P005160,,TPOFF,0.0,0.0,CSKA Moscow,Khimki Moscow Region,KHI,CSKA Moscow,CSK,10,0,0.0,0.0,0.0,0.0,0.0,"THOMAS, MALCOLM","ANDERSON, JAMES","SHVED, ALEXEY","JENKINS, CHARLES","GILL, ANTHONY","HUNTER, OTHELLO","RODRIGUEZ, SERGIO","KURBANOV, NIKITA","ANTONOV, SEMEN","HIGGINS, CORY"
3,KHI,21.0,09:33,1,5,"ANDERSON, JAMES",P005938,Missed Three Pointer (0/1 - 0 pt),3FGA,0.0,0.0,Khimki Moscow Region,Khimki Moscow Region,KHI,CSKA Moscow,CSK,9,33,27.0,27.0,27.0,0.0,0.0,"THOMAS, MALCOLM","ANDERSON, JAMES","SHVED, ALEXEY","JENKINS, CHARLES","GILL, ANTHONY","HUNTER, OTHELLO","RODRIGUEZ, SERGIO","KURBANOV, NIKITA","ANTONOV, SEMEN","HIGGINS, CORY"
4,CSK,13.0,09:31,1,6,"RODRIGUEZ, SERGIO",PCVM,Def Rebound (1),D,0.0,0.0,CSKA Moscow,Khimki Moscow Region,KHI,CSKA Moscow,CSK,9,31,29.0,29.0,2.0,0.0,0.0,"THOMAS, MALCOLM","ANDERSON, JAMES","SHVED, ALEXEY","JENKINS, CHARLES","GILL, ANTHONY","HUNTER, OTHELLO","RODRIGUEZ, SERGIO","KURBANOV, NIKITA","ANTONOV, SEMEN","HIGGINS, CORY"
5,CSK,41.0,09:19,1,7,"KURBANOV, NIKITA",PJLX,Missed Two Pointer (0/1 - 0 pt),2FGA,0.0,0.0,CSKA Moscow,Khimki Moscow Region,KHI,CSKA Moscow,CSK,9,19,41.0,41.0,12.0,0.0,0.0,"THOMAS, MALCOLM","ANDERSON, JAMES","SHVED, ALEXEY","JENKINS, CHARLES","GILL, ANTHONY","HUNTER, OTHELLO","RODRIGUEZ, SERGIO","KURBANOV, NIKITA","ANTONOV, SEMEN","HIGGINS, CORY"
6,KHI,1.0,09:16,1,8,"SHVED, ALEXEY",PKVZ,Def Rebound (1),D,0.0,0.0,Khimki Moscow Region,Khimki Moscow Region,KHI,CSKA Moscow,CSK,9,16,44.0,44.0,3.0,0.0,0.0,"THOMAS, MALCOLM","ANDERSON, JAMES","SHVED, ALEXEY","JENKINS, CHARLES","GILL, ANTHONY","HUNTER, OTHELLO","RODRIGUEZ, SERGIO","KURBANOV, NIKITA","ANTONOV, SEMEN","HIGGINS, CORY"
7,KHI,1.0,09:08,1,9,"SHVED, ALEXEY",PKVZ,Foul Drawn (1),RV,0.0,0.0,Khimki Moscow Region,Khimki Moscow Region,KHI,CSKA Moscow,CSK,9,8,52.0,52.0,8.0,0.0,0.0,"THOMAS, MALCOLM","ANDERSON, JAMES","SHVED, ALEXEY","JENKINS, CHARLES","GILL, ANTHONY","HUNTER, OTHELLO","RODRIGUEZ, SERGIO","KURBANOV, NIKITA","ANTONOV, SEMEN","HIGGINS, CORY"
8,CSK,41.0,09:08,1,10,"KURBANOV, NIKITA",PJLX,Foul (1),CM,0.0,0.0,CSKA Moscow,Khimki Moscow Region,KHI,CSKA Moscow,CSK,9,8,52.0,52.0,0.0,0.0,0.0,"THOMAS, MALCOLM","ANDERSON, JAMES","SHVED, ALEXEY","JENKINS, CHARLES","GILL, ANTHONY","HUNTER, OTHELLO","RODRIGUEZ, SERGIO","KURBANOV, NIKITA","ANTONOV, SEMEN","HIGGINS, CORY"
9,CSK,11.0,09:08,1,11,"ANTONOV, SEMEN",P004941,Out,OUT,0.0,0.0,CSKA Moscow,Khimki Moscow Region,KHI,CSKA Moscow,CSK,9,8,52.0,52.0,0.0,0.0,0.0,"THOMAS, MALCOLM","ANDERSON, JAMES","SHVED, ALEXEY","JENKINS, CHARLES","GILL, ANTHONY","HUNTER, OTHELLO","RODRIGUEZ, SERGIO","KURBANOV, NIKITA","ANTONOV, SEMEN","HIGGINS, CORY"


In [28]:
df = pd.read_csv("../data/adjusted_with_lineups/f4/CSKvsMAD.csv")

In [29]:
df

Unnamed: 0,CODETEAM,JERSEY,MARKERTIME,MINUTE,NUMBEROFPLAY,PLAYER,PLAYER_ID,PLAYINFO,PLAYTYPE,POINTS_A,POINTS_B,TEAM,home_team,home_team_code,away_team,away_team_code,Q_MIN,Q_SEC,SECOND,GAME_SECONDS,SECONDS_TO_NEXT_PLAY,SCORE_A,SCORE_B,home_team_player_1,home_team_player_2,home_team_player_3,home_team_player_4,home_team_player_5,away_team_player_1,away_team_player_2,away_team_player_3,away_team_player_4,away_team_player_5
0,,,,1,2,,,Begin Period,BP,0.0,0.0,,CSKA Moscow,CSK,Real Madrid,MAD,0,0,,,,0.0,0.0,"HUNTER, OTHELLO","ANTONOV, SEMEN","HIGGINS, CORY","RODRIGUEZ, SERGIO","KURBANOV, NIKITA","AYON, GUSTAVO","DONCIC, LUKA","REYES, FELIPE","CAMPAZZO, FACUNDO","TAYLOR, JEFFERY"
1,CSK,44.0,10:00,1,3,"HUNTER, OTHELLO",P005160,,TPOFF,0.0,0.0,CSKA Moscow,CSKA Moscow,CSK,Real Madrid,MAD,10,0,0.0,0.0,,0.0,0.0,"HUNTER, OTHELLO","ANTONOV, SEMEN","HIGGINS, CORY","RODRIGUEZ, SERGIO","KURBANOV, NIKITA","AYON, GUSTAVO","DONCIC, LUKA","REYES, FELIPE","CAMPAZZO, FACUNDO","TAYLOR, JEFFERY"
2,MAD,14.0,10:00,1,4,"AYON, GUSTAVO",P005927,,TPOFF,0.0,0.0,Real Madrid,CSKA Moscow,CSK,Real Madrid,MAD,10,0,0.0,0.0,0.0,0.0,0.0,"HUNTER, OTHELLO","ANTONOV, SEMEN","HIGGINS, CORY","RODRIGUEZ, SERGIO","KURBANOV, NIKITA","AYON, GUSTAVO","DONCIC, LUKA","REYES, FELIPE","CAMPAZZO, FACUNDO","TAYLOR, JEFFERY"
3,CSK,11.0,09:45,1,5,"ANTONOV, SEMEN",P004941,Missed Two Pointer (0/1 - 0 pt),2FGA,0.0,0.0,CSKA Moscow,CSKA Moscow,CSK,Real Madrid,MAD,9,45,15.0,15.0,15.0,0.0,0.0,"HUNTER, OTHELLO","ANTONOV, SEMEN","HIGGINS, CORY","RODRIGUEZ, SERGIO","KURBANOV, NIKITA","AYON, GUSTAVO","DONCIC, LUKA","REYES, FELIPE","CAMPAZZO, FACUNDO","TAYLOR, JEFFERY"
4,MAD,7.0,09:45,1,6,"DONCIC, LUKA",P005929,Def Rebound (1),D,0.0,0.0,Real Madrid,CSKA Moscow,CSK,Real Madrid,MAD,9,45,15.0,15.0,0.0,0.0,0.0,"HUNTER, OTHELLO","ANTONOV, SEMEN","HIGGINS, CORY","RODRIGUEZ, SERGIO","KURBANOV, NIKITA","AYON, GUSTAVO","DONCIC, LUKA","REYES, FELIPE","CAMPAZZO, FACUNDO","TAYLOR, JEFFERY"
5,CSK,22.0,09:44,1,7,"HIGGINS, CORY",P006450,Foul (1),CM,0.0,0.0,CSKA Moscow,CSKA Moscow,CSK,Real Madrid,MAD,9,44,16.0,16.0,1.0,0.0,0.0,"HUNTER, OTHELLO","ANTONOV, SEMEN","HIGGINS, CORY","RODRIGUEZ, SERGIO","KURBANOV, NIKITA","AYON, GUSTAVO","DONCIC, LUKA","REYES, FELIPE","CAMPAZZO, FACUNDO","TAYLOR, JEFFERY"
6,MAD,7.0,09:44,1,8,"DONCIC, LUKA",P005929,Foul Drawn (1),RV,0.0,0.0,Real Madrid,CSKA Moscow,CSK,Real Madrid,MAD,9,44,16.0,16.0,0.0,0.0,0.0,"HUNTER, OTHELLO","ANTONOV, SEMEN","HIGGINS, CORY","RODRIGUEZ, SERGIO","KURBANOV, NIKITA","AYON, GUSTAVO","DONCIC, LUKA","REYES, FELIPE","CAMPAZZO, FACUNDO","TAYLOR, JEFFERY"
7,MAD,9.0,09:34,1,9,"REYES, FELIPE",PAAX,Offensive Foul (1),OF,0.0,0.0,Real Madrid,CSKA Moscow,CSK,Real Madrid,MAD,9,34,26.0,26.0,10.0,0.0,0.0,"HUNTER, OTHELLO","ANTONOV, SEMEN","HIGGINS, CORY","RODRIGUEZ, SERGIO","KURBANOV, NIKITA","AYON, GUSTAVO","DONCIC, LUKA","REYES, FELIPE","CAMPAZZO, FACUNDO","TAYLOR, JEFFERY"
8,MAD,9.0,09:34,1,10,"REYES, FELIPE",PAAX,Turnover (1),TO,0.0,0.0,Real Madrid,CSKA Moscow,CSK,Real Madrid,MAD,9,34,26.0,26.0,0.0,0.0,0.0,"HUNTER, OTHELLO","ANTONOV, SEMEN","HIGGINS, CORY","RODRIGUEZ, SERGIO","KURBANOV, NIKITA","AYON, GUSTAVO","DONCIC, LUKA","REYES, FELIPE","CAMPAZZO, FACUNDO","TAYLOR, JEFFERY"
9,CSK,11.0,09:34,1,11,"ANTONOV, SEMEN",P004941,Foul Drawn (1),RV,0.0,0.0,CSKA Moscow,CSKA Moscow,CSK,Real Madrid,MAD,9,34,26.0,26.0,0.0,0.0,0.0,"HUNTER, OTHELLO","ANTONOV, SEMEN","HIGGINS, CORY","RODRIGUEZ, SERGIO","KURBANOV, NIKITA","AYON, GUSTAVO","DONCIC, LUKA","REYES, FELIPE","CAMPAZZO, FACUNDO","TAYLOR, JEFFERY"
