# NBA Team's Performance Based Rewards 
##### Identifies NBA teams that could be rewared or punished for long term performance.

### Interface Options

In [52]:
#Input Threshold for Playoff Streak & Window
playoff_streak_threshold = 8
playoff_window_threshold = 10

#Input Threshold for Consecutive Championships Steak & Window
champ_streak_threshold = 3
champ_window_threshold = 5

#Input Threshold for Consecutive Conference Championships Steak & Window
conference_champ_streak_threshold = 4
conference_champ_window_threshold = 6

#Input Threshold for Non-Playoff Streak & Window
non_playoff_streak_threshold = 5
non_playoff_window_threshold = 7

#Input Threshold for Lowest Win Streak & Window
count_lowest_win_teams = 3
lowest_win_streak_threshold = 2
lowest_win_window_threshold = 3

---

### Import Libraries

In [53]:
#Import Libraries
import pandas as pd
import numpy as np
import string
import os
import warnings
warnings.filterwarnings("ignore", category=pd.errors.SettingWithCopyWarning)

---

### Load Data

In [54]:
#Identify Working Directory
#print(os.getcwd())

In [55]:
#Read Standings Data File
standings_df = pd.read_csv('nba_standings.csv')

#Read Playoffs Data File
playoffs_df = pd.read_csv('nba_playoffs.csv')

---

### Clean & Prepare Data

In [56]:
#Direct Team Name Mappings
team_mapping = {
    'New Orleans Hornets': 'New Orleans Pelicans',
    'New Orleans/Oklahoma City Hornets': 'New Orleans Pelicans',
    'New Jersey Nets': 'Brooklyn Nets',
    'New York Nets': 'Brooklyn Nets',
    'Vancouver Grizzlies': 'Memphis Grizzlies',
    'Charlotte Bobcats': 'Charlotte Hornets',
    'Washington Bullets': 'Washington Wizards',
    'Baltimore Bullets': 'Washington Wizards',
    'Capital Bullets': 'Washington Wizards',
    'Chicago Packers': 'Washington Wizards',
    'Chicago Zephyrs': 'Washington Wizards',
    'Kansas City Kings': 'Sacramento Kings',
    'Kansas City-Omaha Kings': 'Sacramento Kings',
    'Cincinnati Royals': 'Sacramento Kings',
    'San Diego Clippers': 'Los Angeles Clippers',
    'Buffalo Braves': 'Los Angeles Clippers',
    'San Francisco Warriors': 'Golden State Warriors',
    'Philadelphia Warriors': 'Golden State Warriors',
    'San Diego Rockets': 'Houston Rockets',
    'New Orleans Jazz': 'Utah Jazz',
    'Syracuse Nationals': 'Philadelphia 76ers',
    'St. Louis Hawks': 'Atlanta Hawks',
    'Minneapolis Lakers': 'Los Angeles Lakers'
}

#Apply the Direct Team Name Mapping
standings_df['Team'] = standings_df['Team'].replace(team_mapping)
playoffs_df['Team'] = playoffs_df['Team'].replace(team_mapping)

#Handle special year-based changes. I am assuming Seattle will receive another team in the future. Accounting for this now.
standings_df.loc[(standings_df['Team'] == 'Charlotte Hornets') & (standings_df['Year'] <= 2002), 'Team'] = 'New Orleans Pelicans'
standings_df.loc[(standings_df['Team'] == 'Seattle SuperSonics') & (standings_df['Year'] <= 2008), 'Team'] = 'Oklahoma City Thunder'
playoffs_df.loc[(playoffs_df['Team'] == 'Charlotte Hornets') & (playoffs_df['Year'] <= 2002), 'Team'] = 'New Orleans Pelicans'
playoffs_df.loc[(playoffs_df['Team'] == 'Seattle SuperSonics') & (playoffs_df['Year'] <= 2008), 'Team'] = 'Oklahoma City Thunder'

In [57]:
#Create Playoff Teams Dataframe
playoff_teams = playoffs_df[['Year','Team','Wins','Champion','Conference_Champion']]

#Create Champion Teams Dataframe
champ_teams = playoff_teams[playoff_teams['Champion'] == 1]

#Create Conference Champion Teams Dataframe
conference_champ_teams = playoff_teams[playoff_teams['Conference_Champion'] == 1]

#Create Non-Playoff Teams Dataframe
non_playoff_teams = pd.merge(standings_df,
                             playoff_teams,
                             on = ['Team','Year'],
                             how = 'left',
                             indicator = True)
non_playoff_teams = non_playoff_teams[non_playoff_teams['_merge'] == 'left_only']
non_playoff_teams = non_playoff_teams[['Year','Team','Conference','W','L','WL_pct','GB','PPG','OPPG','SRS']]

#Create Lowest Win Teams Dataframe
lowest_win_teams = non_playoff_teams.sort_values(['Year', 'WL_pct'])
lowest_win_teams = lowest_win_teams.groupby('Year').head(count_lowest_win_teams)

---

### Create Functions to Identify Teams Meeting Streak Requirements

In [58]:
def build_streaks_non_overlapping(data, streak_length):
    """
    Build non-overlapping streaks for each team.

    This function identifies continuous runs of years in which a team made a streak,
    and then splits those runs into non-overlapping chunks of a given length.
    Only complete streaks of exactly `streak_length` years are included in the output.
    Any leftover years at the end of a run that do not meet the required length are discarded.

    Parameters
    ----------
    data : pandas.DataFrame
        A DataFrame containing at least:
        - 'Team' (team name or identifier)
        - 'Year' (year of appearance, integer)
    streak_length : int
        The required length of consecutive years to count as a streak.

    Returns
    -------
    pandas.DataFrame
        A DataFrame with columns:
        - 'Team': team name/identifier
        - 'Streak_Begin': first year of the streak
        - 'Streak_End': last year of the streak
        Sorted by Team and Streak_Begin.
    """
    #Sort data by team and year to ensure proper streak calculation
    df = data.sort_values(["Team", "Year"]).reset_index(drop=True)

    #Find breaks in consecutive years for each team
    #Year_Diff > 1 indicates a gap (non-consecutive years)
    df["Year_Diff"] = df.groupby("Team")["Year"].diff().fillna(1)
    df["Break"] = df["Year_Diff"] > 1

    #Assign a unique Run_ID to each continuous run of playoff years per team
    df["Run_ID"] = df.groupby("Team")["Break"].cumsum()

    #Create an object to store results from the below For Loop
    results = []

    #Process each continuous run separately
    for (team, run_id), group in df.groupby(["Team", "Run_ID"]):
        years = group["Year"].tolist()
        n = len(years)
        idx = 0

        #Step through the run in fixed streak_length chunks
        while idx + streak_length <= n:
            start_year = years[idx]
            end_year = years[idx + streak_length - 1]

            results.append({
                "Team": team,
                "Streak_Begin": start_year,
                "Streak_End": end_year
            })

            #Move forward by the streak length to prevent overlap
            idx += streak_length

    #Return final DataFrame, sorted for readability
    return (
        pd.DataFrame(results)
        .sort_values(["Team", "Streak_Begin"])
        .reset_index(drop=True)
    )


def find_window_nonoverlap(data, streak, window):
    """
    Identify non-overlapping windows where a team has a specified number of observations
    within a given rolling time window.

    This function checks each team's years in the data and finds groups of `streak` observations
    that occur within `window` consecutive years. Once a qualifying group is found, the search
    skips ahead by `streak` years (non-overlapping logic). Multiple qualifying groups inside
    the same time range will be recorded separately.

    Parameters
    ----------
    data : pandas.DataFrame
        DataFrame containing at least:
        - 'Team' (team name or identifier)
        - 'Year' (year of appearance, integer)
    streak : int
        Minimum number of observations required within the window.
    window : int
        Length of the time window in years (inclusive) to check for the streak.

    Returns
    -------
    pandas.DataFrame
        Columns:
        - 'Team': team name or identifier
        - 'Window_Start': first year in the qualifying window
        - 'Window_End': last year in the qualifying window
        - 'Count': number of observations in the window (equal to `streak`)
        - 'Years_In_Window': list of the years in the window
        Sorted by Team and Window_Start.
    """
    #Keep only required columns and clean data
    df = data[['Team', 'Year']].dropna().copy()
    df['Year'] = df['Year'].astype(int)
    df = df.drop_duplicates().sort_values(['Team', 'Year'])

    #Create an object to store results from the below For Loop
    results = []

    #Process each team separately
    for team, g in df.groupby('Team', sort=False):
        years = g['Year'].tolist()
        idx = 0

        #Scan through the years
        while idx < len(years):
            #Ensure there are at least `streak` remaining observations to check
            if idx + streak - 1 < len(years):
                #Take the first `streak` years starting from idx
                candidate_years = years[idx: idx + streak]

                #Check if these observations fit within the time window
                if candidate_years[-1] - candidate_years[0] <= window - 1:
                    results.append({
                        'Team': team,
                        'Window_Start': candidate_years[0],
                        'Window_End': candidate_years[-1],
                        'Count': streak,
                        'Years_In_Window': candidate_years
                    })

                    #Move pointer forward by `streak` (non-overlapping)
                    idx += streak
                    continue

            #If not qualifying, move forward by 1 year and re-check
            idx += 1

    #Return results sorted for readability
    return (
        pd.DataFrame(results)
        .sort_values(['Team', 'Window_Start'])
        .reset_index(drop=True)
    )

---

### Run Functions to Identify Teams Meeting Streak Requirements

In [59]:
#Update playoff window threshold if it is less than streak
if playoff_window_threshold <= playoff_streak_threshold:
    playoff_window_threshold = playoff_streak_threshold + 1

#Update champ window threshold if it is less than streak
if champ_window_threshold <= champ_streak_threshold:
    champ_window_threshold = champ_streak_threshold + 1

#Update conference champ window threshold if it is less than streak
if conference_champ_window_threshold <= conference_champ_streak_threshold:
    conference_champ_window_threshold = conference_champ_streak_threshold + 1

#Update non playoff window threshold if it is less than streak
if non_playoff_window_threshold <= non_playoff_streak_threshold:
    non_playoff_window_threshold = non_playoff_streak_threshold + 1

#Update lowest win window threshold if it is less than streak
if lowest_win_window_threshold <= lowest_win_streak_threshold:
    lowest_win_window_threshold = lowest_win_streak_threshold + 1

In [60]:
#Run function to identify teams with a Playoff streak and multiple Playoff appearances within a given number of years
playoff_streaks = build_streaks_non_overlapping(playoff_teams, playoff_streak_threshold)
playoff_windows = find_window_nonoverlap(playoff_teams, playoff_streak_threshold, playoff_window_threshold)

#Run function to identify teams with a Championship streak and multiple Championships within a given number of years
champ_streaks = build_streaks_non_overlapping(champ_teams, champ_streak_threshold)
champ_windows = find_window_nonoverlap(champ_teams, champ_streak_threshold, champ_window_threshold)

#Run function to identify teams with a Conference Championship streak and multiple Conference Championships within a given number of years
conference_champ_streaks = build_streaks_non_overlapping(conference_champ_teams, conference_champ_streak_threshold)
conference_champ_windows = find_window_nonoverlap(conference_champ_teams, conference_champ_streak_threshold, conference_champ_window_threshold)

#Run function to identify teams with a Non-Playoff streak and multiple Non-Playoff appearances within a given number of years
non_playoff_streaks = build_streaks_non_overlapping(non_playoff_teams, non_playoff_streak_threshold)
non_playoff_windows = find_window_nonoverlap(non_playoff_teams, non_playoff_streak_threshold, non_playoff_window_threshold)

#Run function to identify teams with a Non-Playoff streak and multiple Non-Playoff appearances within a given number of years
lowest_win_streaks = build_streaks_non_overlapping(lowest_win_teams, lowest_win_streak_threshold)
lowest_win_windows = find_window_nonoverlap(lowest_win_teams, lowest_win_streak_threshold, lowest_win_window_threshold)

---

### Save Outputs to csv

In [71]:
playoff_streaks.to_csv("Output_Playoff_Streaks.csv", index=False)
playoff_windows.to_csv("Output_Playoff_Windows.csv", index=False)
champ_streaks.to_csv("Output_Championship_Streaks.csv", index=False)
champ_windows.to_csv("Output_Championship_Windows.csv", index=False)
conference_champ_streaks.to_csv("Output_Conference_Championship_Streaks.csv", index=False)
conference_champ_windows.to_csv("Output_Conference_Championship_Windows.csv", index=False)
non_playoff_streaks.to_csv("Output_Non_Playoff_Streaks.csv", index=False)
non_playoff_windows.to_csv("Output_Non_Playoff_Windows.csv", index=False)
lowest_win_streaks.to_csv("Output_Lowest_Win_Streaks.csv", index=False)
lowest_win_windows.to_csv("Output_Lowest_Win_Windows.csv", index=False)

In [None]:
#The End