In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os
import sys
sys.path.insert(0, '../../src/')
from data_loader import load_data, load_game_data
pd.set_option('display.max_columns', None)

In [2]:
path_lineups = "/home/matej/Documents/Projects/nba-data-mining/data/processed/lineups-all-seasons.csv"
path_game_data = "/home/matej/Documents/Projects/nba-data-mining/data/processed/game-data-extended.csv"
path_players = "/home/matej/Documents/Projects/nba-data-mining/data/raw/player-data/player_info.csv"

lineups = pd.read_csv(path_lineups)
game_data = pd.read_csv(path_game_data)
players = pd.read_csv(path_players)
players = players[players["Season"] != "2019-20"]

* * *
### Most common starting lineups
Binary feature (1 = team did start with most common lineup, 0 = team did not start with most common lineup)

In [3]:
game_data["season_name"] = game_data["season_name"].str.split("-").str[0].astype(int)

In [4]:
game_data["home_team_lineup"] = game_data[game_data.columns[63:68]].apply(lambda x: ",".join(x.astype(str)), axis=1)
game_data["visitor_team_lineup"] = game_data[game_data.columns[68:73]].apply(lambda x: ",".join(x.astype(str)), axis=1)

game_data["home_team_common_lineup"] = np.nan
game_data["visitor_team_common_lineup"] = np.nan

In [5]:
df_home_lineups = game_data.groupby(["season_name", "home_team_id"])["home_team_lineup"].agg(pd.Series.mode).astype(str)
df_visitor_lineups = game_data.groupby(["season_name", "visitor_team_id"])["visitor_team_lineup"].agg(pd.Series.mode).astype(str)


In [6]:
for i, row in game_data.iterrows():
    game_data.at[i, "home_team_common_lineup"] = np.asarray(df_home_lineups.iloc[(df_home_lineups.index.get_level_values("season_name") == row["season_name"]) & (df_home_lineups.index.get_level_values("home_team_id") == row["home_team_id"])] == row["home_team_lineup"])
    game_data.at[i, "visitor_team_common_lineup"] = np.asarray(df_visitor_lineups.iloc[(df_visitor_lineups.index.get_level_values("season_name") == row["season_name"]) & (df_visitor_lineups.index.get_level_values("visitor_team_id") == row["visitor_team_id"])] == row["visitor_team_lineup"])

In [7]:
game_data.drop(columns=["home_team_lineup", "visitor_team_lineup"], inplace=True)
game_data["home_team_common_lineup"] = game_data["home_team_common_lineup"].astype(int)
game_data["visitor_team_common_lineup"] = game_data["visitor_team_common_lineup"].astype(int)

* * *
### Win/Loss ratio - Most common starting lineups

In [27]:
home_common_won = game_data.groupby(["home_team_common_lineup"])["home_win"].sum()
home_common_lost = game_data.groupby(["home_team_common_lineup"])["home_win"].count() - home_common_won

In [23]:
visitor_common_lost = game_data.groupby(["visitor_team_common_lineup"])["home_win"].sum()
visitor_common_won = game_data.groupby(["visitor_team_common_lineup"])["home_win"].count() - visitor_common_lost

In [32]:
labels = ["Won", "Lost"]
titles = ["Home team most common lineup win/loss ratio", "Home team different lineup win/loss ratio"]
colors = ['lightgreen', 'crimson']

fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]], subplot_titles=titles)
fig.add_trace(go.Pie(labels=labels, values=[home_common_won[1], home_common_lost[1]], name="Home team most common lineup"),1, 1)
fig.add_trace(go.Pie(labels=labels, values=[home_common_won[0], home_common_lost[0]], name="Home team different lineup"),1, 2)
fig.update_layout(width=1500, height=500)
fig.update_traces(marker = dict(colors = colors))
fig.show()

In [33]:
labels = ["Won", "Lost"]
titles = ["Visitor team most common lineup win/loss ratio", "Visitor team different lineup win/loss ratio"]
colors = ['lightgreen', 'crimson']

fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]], subplot_titles=titles)
fig.add_trace(go.Pie(labels=labels, values=[visitor_common_won[1], visitor_common_lost[1]], name="Visitor team most common lineup"),1, 1)
fig.add_trace(go.Pie(labels=labels, values=[visitor_common_won[0], visitor_common_lost[0]], name="Visitor team different lineup"),1, 2)
fig.update_layout(width=1500, height=500)
fig.update_traces(marker = dict(colors = colors))
fig.show()