In [None]:
import pandas as pd
import numpy as np
import requests
import time
import matplotlib.pyplot as plt
import seaborn as sns
pd.options.display.max_columns = 999

In [None]:
years = [2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]

In [None]:
ff_dfs = [] # create empty list so we can eventually concat all dfs

for year in years: 
    df = pd.read_csv(f'./CSVs/four factors/league_four_factors_{year}.csv') # read in each csv and name appropriately
    # df = df[(df.Team != 'Average')] #drop the "average" column.  has nulls.
    df['year'] = f'{year}' # create a column to label the data by {year}
    ff_dfs.append(df) # add df to list
    
ff_df = pd.concat(ff_dfs) # concat all dfs in the list

percent_cols = ['OFFENSE: eFG%','OFFENSE: TOV%', 'OFFENSE: ORB%', 'DEFENSE: eFG%', 'DEFENSE: TOV%', 'DEFENSE: ORB%']

# Convert percentage columns to floats
for col in percent_cols:
    ff_df[col] = ff_df[col].str.rstrip('%').astype('float') / 100.0
    
ff_df.head()

In [None]:
play_context_halfcourt = []
for year in years: 
    df = pd.read_csv(f'./CSVs/play context/league_offense_halfcourt_and_putbacks_{year}.csv') # read in each csv and name appropriately
    df['year'] = f'{year}' # create a column to label the data by {year}
    play_context_halfcourt.append(df) # add df to list
    
playcontext_halfcourt_df = pd.concat(play_context_halfcourt) # concat all dfs in the list

# Convert percentage columns to floats
percent_cols = ['HALFCOURT: OREB%', 'HALFCOURT: % of Plays']
for col in percent_cols:
    playcontext_halfcourt_df[col] = playcontext_halfcourt_df[col].str.rstrip('%').astype('float') / 100.0
    
playcontext_halfcourt_df.head()

In [None]:
play_context_transition = []
for year in years: 
    df = pd.read_csv(f'./CSVs/play context/league_offense_transition_{year}.csv') # read in each csv and name appropriately
    df['year'] = f'{year}' # create a column to label the data by {year}
    play_context_transition.append(df) # add df to list
    
playcontext__transition_df = pd.concat(play_context_transition) # concat all dfs in the list

# Convert percentage columns to floats
percent_cols = ['ALL TRANSITION: Freq', 'OFF STEALS: Freq', 'OFF LIVE REBOUNDS: Freq']
for col in percent_cols:
    playcontext__transition_df[col] = playcontext__transition_df[col].str.rstrip('%').astype('float') / 100.0
    
playcontext__transition_df.head()   

In [None]:
shooting_accuracy = []
for year in years: 
    df = pd.read_csv(f'./CSVs/shooting/league_offense_shooting_accuracy_{year}.csv') # read in each csv and name appropriately
    df['year'] = f'{year}' # create a column to label the data by {year}
    shooting_accuracy.append(df) # add df to list
    
shooting_accuracy_df = pd.concat(shooting_accuracy) # concat all dfs in the list

# Convert percentage columns to floats
percent_cols = ['eFG%', 'Loc eFG%', 'Rim', 'Short Mid', 'Long Mid', 'All Mid', 'Corner Three', 'Non Corner', 'All Three']
for col in percent_cols:
    shooting_accuracy_df[col] = shooting_accuracy_df[col].str.rstrip('%').astype('float') / 100.0
    
shooting_accuracy_df.head()

In [None]:
shooting_frequency = []
for year in years: 
    df = pd.read_csv(f'./CSVs/shooting/league_offense_shooting_frequency_{year}.csv') # read in each csv and name appropriately
    df['year'] = f'{year}' # create a column to label the data by {year}
    shooting_frequency.append(df) # add df to list
    
shooting_frequency_df = pd.concat(shooting_frequency) # concat all dfs in the list
shooting_frequency_df.head()

# Convert percentage columns to floats
percent_cols = ['eFG%', 'Loc eFG%', 'Rim', 'Short Mid', 'Long Mid', 'All Mid', 'Corner Three', 'Non Corner', 'All Three']
for col in percent_cols:
    shooting_frequency_df[col] = shooting_frequency_df[col].str.rstrip('%').astype('float') / 100.0
    
shooting_frequency_df.head()

In [None]:
shooting_accuracy_df.groupby('year').mean()

In [None]:
plt.figure(figsize = (7,5))
shooting_accuracy_df.groupby('year').mean()['Rim'].plot()
plt.title('Accuracy at the Rim')
plt.ylabel('Shooting Percentage')
plt.xlabel('Year');

In [None]:
plt.figure(figsize = (7,5))
shooting_accuracy_df.groupby('year').mean()['All Mid'].plot()
plt.title('Accuracy from Mid-Range')
plt.ylabel('Shooting Percentage')
plt.xlabel('Year');

In [None]:
plt.figure(figsize = (7,5))
shooting_accuracy_df.groupby('year').mean()['All Three'].plot()
plt.title('Accuracy on Three Pointers')
plt.ylabel('Shooting Percentage')
plt.xlabel('Year');

In [None]:
shooting_accuracy_df['All Three'].hist()
# curious who these teams were who shot above 40% from Three.

In [None]:
# All Sniper Teams.  All teams who have shot over 40% as a team.
top_shooting_teams = shooting_accuracy_df[shooting_accuracy_df['All Three'] > .4]
top_shooting_teams = top_shooting_teams[['Team', 'All Three', 'year']]
top_shooting_teams.sort_values(by = 'All Three', ascending = False)

In [None]:
# Teams who shot the threes at the highest frequency.

top_shooting_teams = shooting_frequency_df[shooting_frequency_df['All Three'] >.4]
top_shooting_teams = top_shooting_teams[['Team', 'All Three', 'year']]
top_shooting_teams.sort_values(by = 'All Three', ascending = False).head(10)

In [None]:
# source: https://python.plainenglish.io/ridge-plots-with-pythons-seaborn-4de5725881af

sns.set_theme(style="white", rc={"axes.facecolor": (0, 0, 0, 0)})
g = sns.FacetGrid(shooting_frequency_df, row= 'year', aspect=9, height=1)
g.map_dataframe(sns.kdeplot, x= 'All Three', fill=True, alpha=1)
g.map_dataframe(sns.kdeplot, x='All Three', color='black')
g.fig.subplots_adjust(hspace=-.5)
g.set_titles("")
g.despine(left=True);
plt.savefig('./Visuals/Ridge Plot NBA Frequency.jpg', bbox_inches="tight", dpi=100)

# interesting to look at .4 and beyond on the x axis.  As the years go on, teams are shooting more and more frequently from three.