In [16]:
import pandas as pd
import numpy as np
import os

# Dataset Preparation

In [17]:
# Load datasets from 2020-21, 2021-22
teams21 = pd.read_csv("team-data/teams21.csv")
teams22 = pd.read_csv("team-data/teams22.csv")

# Add columns
teams21['Season'] = 2021
teams22['Season'] = 2022

# Merge dataframes
teams = teams21.merge(teams22, on='TEAM', suffixes=['21', '22'])

# Win Differential

In [18]:
# set directory for exporting csv files
os.makedirs('team-data/', exist_ok=True)

In [19]:
# Calculate win differential
teams['d_W'] = teams.W22 - teams.W21

# Sort by win differential, reset indices
teams_d = teams.sort_values(by=['d_W'], ascending=False)
teams_d = teams_d.reset_index(drop=True)

# Display dataframe
display( teams_d[['TEAM', 'W21', 'W22', 'd_W']] )

# Export dataframe as csv
teams_d.to_csv("team-data/teams_d.csv")

Unnamed: 0,TEAM,W21,W22,d_W
0,Minnesota Timberwolves,23,46,23
1,Cleveland Cavaliers,22,44,22
2,Toronto Raptors,27,48,21
3,Memphis Grizzlies,38,56,18
4,Boston Celtics,36,51,15
5,Chicago Bulls,31,46,15
6,Golden State Warriors,39,53,14
7,Phoenix Suns,51,64,13
8,Miami Heat,40,53,13
9,Dallas Mavericks,42,52,10


# Images

In [20]:
# Concatenate datasets (useful for visualization later)
teams_logo = pd.concat([teams21, teams22])

# Create new column for teams' mascot
teams_logo['MASCOT'] = teams_logo.TEAM.apply(lambda x : x.rsplit(" ",1)[1].lower())

# Create new column with link to logo
url_prefix = "https://raw.githubusercontent.com/jeremydumalig/Most-Improved-Player/main/logos/"
teams_logo['URL'] = teams_logo.MASCOT.apply(lambda x : url_prefix + x + ".png")

# Export dataframe as csv
teams_logo.to_csv("team-data/teams_logo.csv")