In [None]:
import os
import sys
import pandas as pd

from matplotlib import pyplot as plt

sys.path.append("../utils/")
from shot_chart_plots import plot_scatter
%matplotlib

plt.close("all")

In [None]:
data_dir = "~/Documents/DataScience/Projects/euroleague_api/notebooks/data"

In [None]:
shot_df = pd.read_csv(os.path.join(data_dir, "shot_data_2023.csv"))

# Shot Data

In [None]:
shot_df["ZONE"].unique()

In [None]:
mid_range_zones = ["D", "E", "F", "G"]

In [None]:
non_mid_range_zones = [u for u in shot_df["ZONE"].unique() if u not in mid_range_zones + [" "]]

In [None]:
team_shots_by_zone_df = shot_df.pivot_table(index="TEAM", columns="ZONE", values="ID_ACTION", aggfunc="count")

In [None]:
team_perc_midrange_df = (team_shots_by_zone_df[mid_range_zones].sum(axis=1) / team_shots_by_zone_df[non_mid_range_zones + mid_range_zones].sum(axis=1)).sort_values()
team_perc_midrange_df

In [None]:
for zone in shot_df.ZONE.unique():
    zone_df = shot_df[shot_df["ZONE"] == zone]
    made = zone_df[zone_df['ID_ACTION'].isin(['2FGM', '3FGM'])]
    missed = zone_df[zone_df['ID_ACTION'].isin(['2FGA', '3FGA'])]
    plot_scatter(made, missed, title=zone)

In [None]:
# ensure the shot is for 2. There are some data irregularities where a 3PT shot is registered in the mid-range zones.
midrange_df = shot_df[shot_df["ZONE"].isin(mid_range_zones) & shot_df["ID_ACTION"].isin(['2FGM', '2FGA'])]

In [None]:
n_shots_df = shot_df.groupby(["Gamecode", "TEAM"])[["Season"]].count()
n_midrange_df = midrange_df.groupby(["Gamecode", "TEAM"])[["Season"]].count()

In [None]:
# some team register no midrange shots, count the total shots too and merge (outer) to ensure all gamecodes and teams are considered
n_midrange_total_df = n_midrange_df.merge(n_shots_df, left_index=True, right_index=True, how="outer")
n_midrange_total_df.columns = ["midrange", "total"]
n_midrange_total_df.fillna(0, inplace=True)
n_midrange_total_df.head()

In [None]:
n_fg_shots_df = shot_df[shot_df["ID_ACTION"].isin(['2FGM', '3FGM', '2FGA', '3FGA'])].groupby(["Gamecode", "TEAM"])["Season"].count()

In [None]:
merged_df = pd.merge(n_midrange_total_df, n_fg_shots_df, left_index=True, right_index=True)
merged_df.columns = ["midrange", "total", "fg"]
merged_df["perc"] = merged_df["midrange"] / merged_df["fg"]
merged_df

In [None]:
# Same as above, some teams shot no midrange, count the total 2PTs (i.e. layups too) and merge so that there no missing teams after the groupby
# n_made_missed_midrange_df = midrange_df.groupby(["Gamecode", "TEAM", "ID_ACTION"])[["Season"]].count()
# n_made_missed_shots_df = shot_df[shot_df["ID_ACTION"].isin(['2FGM', '2FGA'])].groupby(["Gamecode", "TEAM", "ID_ACTION"])[["Season"]].count()

In [None]:
# n_made_missed_midrange_total2pt_df = n_made_missed_midrange_df.merge(n_made_missed_shots_df, left_index=True, right_index=True, how="outer")
# n_made_missed_midrange_total2pt_df.columns = ["midrage", "total"]
# n_made_missed_midrange_total2pt_df.fillna(0, inplace=True)
# n_made_missed_midrange_total2pt_df.head()

In [None]:
merged_df["t"] = ["A", "B"] * int(merged_df.shape[0] / 2 )
merged_reshaped_df = merged_df.reset_index().pivot(index="Gamecode", columns="t")[["TEAM", "perc"]]
merged_reshaped_df.columns = ["Team_A", "Team_B", "Midrange_perc_A", "Midrange_perc_B"]
merged_reshaped_df.head()

# Game Report

In [None]:
game_report_df = pd.read_csv(os.path.join(data_dir, "game_report_2023.csv"))

In [None]:
game_report_df.head()

In [None]:
abbr_to_name_dict = game_report_df[["road.club.code", "road.club.name"]].drop_duplicates().set_index("road.club.code").to_dict()["road.club.name"]

In [None]:
game_report_trunc_df = game_report_df[["Gamecode", "Round", "local.club.code", "road.club.code", "local.score", "road.score"]]

# Merge Game Report with Shot data analysis

In [None]:
master_table = merged_reshaped_df.reset_index().merge(game_report_df, on="Gamecode")

In [None]:
master_table["home_win"] = master_table["local.score"] > master_table["road.score"]

In [None]:
master_table.head()

In [None]:
for i, row in master_table.iterrows():
    # break
    if (row[["Team_A", "Team_B"]].values == row[["local.club.code", "road.club.code"]].values).all():
        continue
    else:
        master_table.loc[i, ["Team_A", "Team_B"]] = row[["Team_B", "Team_A"]].values
        master_table.loc[i, ["Midrange_perc_A", "Midrange_perc_B"]] = row[["Midrange_perc_B", "Midrange_perc_A"]].values

In [None]:
master_table["home_greater_midrange_volume"] = master_table["Midrange_perc_A"] > master_table["Midrange_perc_B"]
master_table.head()

In [None]:
n_wins_with_fewer_mid_range = (master_table["home_win"] & (~master_table["home_greater_midrange_volume"])).sum() + (~master_table["home_win"] & (master_table["home_greater_midrange_volume"])).sum()
n_games = master_table.shape[0]
ratio = n_wins_with_fewer_mid_range / n_games
print(ratio)

In [None]:
# master_table.pivot_table(index="home_midrange_volume", columns="home_win", values="Gamecode", aggfunc="count")