In this notebook, we analyse the Euroleague historic trend of midrange and 3pt attempts.

In [None]:
import os
import sys
import glob
import pandas as pd
import numpy as np

import plotly.express as px
from plotly import graph_objs as go
from matplotlib import pyplot as plt

sys.path.append("../utils/")
from shot_chart_plots import plot_scatter_single_df
# %matplotlib

plt.close("all")

In [None]:
mid_range_x = (-245, 245)
mid_range_y = 422.5

Data is collected via the [euroleague-api](https://pypi.org/project/euroleague-api/) and is stored in the folder `data` in the project directory.

In [None]:
# Data is collected using the
shot_df = []
for file in glob.glob("../data/*.csv"):
    df = pd.read_csv(file)
    shot_df.append(df)
shot_df = pd.concat(shot_df)

In [None]:
shot_df = shot_df.sort_values(["Season", "Gamecode"]).reset_index(drop=True)

In [None]:
dist = np.sqrt(shot_df["COORD_X"] ** 2 + shot_df["COORD_Y"] ** 2)

In [None]:
mid_range_definition = 1  # or 2.

if mid_range_definition == 1:
    # This is the mid-range definition of any 2FG outside the paint
    mid_range_def_mask = (
        (shot_df["COORD_X"] < mid_range_x[0]) | (shot_df["COORD_X"] > mid_range_x[1]) | (shot_df["COORD_Y"] > mid_range_y)
    )
elif mid_range_definition == 2:
    # This is the mid-range definition of and any 2FG and >8ft from the basket
    mid_range_def_mask = (dist >= 244)
else:
    print("Choose definition 1 or 2")

In [None]:
mid_range_zones = ["D", "E", "F", "G"]
non_mid_range_zones = [u for u in shot_df["ZONE"].unique() if u not in mid_range_zones + [" "]]
fg_zones = mid_range_zones + non_mid_range_zones

# ensure the shot is for 2. There are some data irregularities where a 3PT shot is registered in the mid-range zones.
mid_range_mask = (
    shot_df["ZONE"].isin(mid_range_zones)
    & shot_df["ID_ACTION"].isin(['2FGM', '2FGA', '2FGAB', "LAYUPATT", "LAYUPMD", "DUNK"])
    & mid_range_def_mask
    & (shot_df["COORD_X"] > -679)  # to remove outliers/irregularities
    & (dist <= 679)  # to remove outliers/irregularities
)

In [None]:
rim_mask = (
    shot_df["ID_ACTION"].isin(['2FGM', '2FGA', "LAYUPATT", "LAYUPMD", "DUNK", "2FGAB"])
    & (~mid_range_def_mask)
)

In [None]:
three_mask = (
    shot_df["ZONE"].isin(["H", "I", "J"])
    & shot_df["ID_ACTION"].isin(['3FGM', '3FGA', '3FGAB'])
    & (dist > 600)  # to remove outliers/irregularities
)

In [None]:
midrange_df = shot_df[mid_range_mask]
rim_df = shot_df[rim_mask]
three_df = shot_df[three_mask]

In [None]:
plot_scatter_single_df(midrange_df, title="Mid range attempts")
plot_scatter_single_df(rim_df, title="Paint attempts")
plot_scatter_single_df(three_df, title="3pt attempts")

In [None]:
midrange_byseason_df = midrange_df.groupby("Season").count()["ID_ACTION"]
rim_byseason_df = rim_df.groupby("Season").count()["ID_ACTION"]
three_byseason_df = three_df.groupby("Season").count()["ID_ACTION"]
n_games_df = shot_df.groupby("Season").nunique("Gamecode")["Gamecode"]

midrange_vol_df = midrange_byseason_df / n_games_df
midrange_vol_df = midrange_vol_df[midrange_vol_df.index >= 2010]

rim_vol_df = rim_byseason_df / n_games_df
rim_vol_df = rim_vol_df[rim_vol_df.index >= 2010]

three_vol_df = three_byseason_df / n_games_df
three_vol_df = three_vol_df[three_vol_df.index >= 2010]

In [None]:
shot_type_df = pd.concat([rim_byseason_df, midrange_byseason_df, three_byseason_df], axis=1)
shot_type_df.columns = ["paint", "mid-range", "3pt"]

perc_shot_type_df = 100 * shot_type_df.div(shot_type_df.sum(axis=1), axis=0)
perc_shot_type_df = perc_shot_type_df[perc_shot_type_df.index >= 2010]

In [None]:
data = [
    go.Scatter(x=perc_shot_type_df.index, y=perc_shot_type_df[u], name=u)
    for u in perc_shot_type_df.columns
]
layout = go.Layout(
    title="Euroleague historic trend in % of 3PT vs mid-range vs paint attempts per game",
    xaxis={"title": "Season"},
    yaxis={"title": "% Attempts"}
)
fig = go.Figure(data, layout)
fig.show()

In [None]:
data = [
    go.Scatter(x=midrange_vol_df.index, y=midrange_vol_df, name="mid-range"),
    go.Scatter(x=rim_vol_df.index, y=rim_vol_df, name="paint"),
    go.Scatter(x=three_vol_df.index, y=three_vol_df, name="3pt"),
]
layout = go.Layout(
    title="Euroleague historic trend in 3PT vs mid-range vs paint attempts",
    xaxis={"title": "Season"},
    yaxis={"title": "Attempts per game"},
)
fig = go.Figure(data, layout)
fig.show()