# Data preparation

## Database Connection

We used a free service to host our database. The Database is in PostgreSQL.

In [1]:
import json
import psycopg2
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
import warnings
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', None)

In [2]:
# DB Credentials
with open("../config.json") as config_file:
    config = json.load(config_file)

host = config["db_host"]
user = config["db_user"]
password = config["db_password"]
database = config["db_database"]
schema = config["db_schema"]

In [3]:
connection = psycopg2.connect(
    host=host,
    user=user,
    password=password,
    database=database
)

cursor = connection.cursor()

def execute(query):
    cursor.execute(query)
    connection.commit()
    return cursor.fetchall()

def fetch(query):
    cursor.execute(query)
    return cursor.fetchall()

SELECT = "SELECT * FROM " + schema + "." # + table_name 
INSERT = "INSERT INTO " + schema + "." # + table_name + " VALUES " + values
UPDATE = "UPDATE " + schema + "." # + table_name + " SET " + column_name + " = " + value
DELETE = "DELETE FROM " + schema + "."  # + table_name + " WHERE " + column_name + " = " + value

In [4]:
awards_players = fetch(SELECT + "awards_players") # awards and prizes received by players across 10 seasons,
coaches = fetch(SELECT + "coaches") # all coaches who've managed the teams during the time period,
players = fetch(SELECT + "players") # details of all players,
players_teams = fetch(SELECT + "players_teams") # performance of each player for each team they played,
series_post = fetch(SELECT + "series_post") # series' results,
teams = fetch(SELECT + "teams") # performance of the teams for each season,
teams_post = fetch(SELECT + "teams_post") # results of each team at the post-season.

In [5]:
#save the data in a dataframe
awards_players_df = pd.DataFrame(awards_players, columns=['playerID', 'award', 'year', 'lgID'])
coaches_df = pd.DataFrame(coaches, columns=['coachID', 'year', 'tmID', 'lgID', 'stint', 'won', 'lost', 'post_wins', 'post_losses'])
players_df = pd.DataFrame(players, columns=['bioID', 'pos', 'firstseason', 'lastseason', 'height', 'weight', 'college', 'collegeOther', 'birthDate', 'deathDate'])
players_teams_df = pd.DataFrame(players_teams, columns=['playerID', 'year', 'stint', 'tmID', 'lgID', 'GP', 'GS', 'minutes', 'points', 'oRebounds', 'dRebounds', 'rebounds', 'assists', 'steals', 'blocks', 'turnovers', 'PF', 'fgAttempted', 'fgMade', 'ftAttempted', 'ftMade', 'threeAttempted', 'threeMade', 'dq', 'PostGP', 'PostGS', 'PostMinutes', 'PostPoints', 'PostoRebounds', 'PostdRebounds', 'PostRebounds', 'PostAssists', 'PostSteals', 'PostBlocks', 'PostTurnovers', 'PostPF', 'PostfgAttempted', 'PostfgMade', 'PostftAttempted', 'PostftMade', 'PostthreeAttempted', 'PostthreeMade', 'PostDQ'])
series_post_df = pd.DataFrame(series_post, columns=['year', 'round', 'series', 'tmIDWinner', 'lgIDWinner', 'tmIDLoser', 'lgIDLoser', 'W', 'L'])
teams_df = pd.DataFrame(teams, columns=['year', 'lgID', 'tmID', 'franchID', 'confID', 'divID', 'rank', 'playoff', 'seeded', 'firstRound', 'semis', 'finals', 'name', 'o_fgm', 'o_fga', 'o_ftm', 'o_fta', 'o_3pm', 'o_3pa', 'o_oreb', 'o_dreb', 'o_reb', 'o_asts', 'o_pf', 'o_stl', 'o_to', 'o_blk', 'o_pts', 'd_fgm', 'd_fga', 'd_ftm', 'd_fta', 'd_3pm', 'd_3pa', 'd_oreb', 'd_dreb', 'd_reb', 'd_asts', 'd_pf', 'd_stl', 'd_to', 'd_blk', 'd_pts', 'tmORB', 'tmDRB', 'tmTRB', 'opptmORB', 'opptmDRB', 'opptmTRB', 'won', 'lost', 'GP', 'homeW', 'homeL', 'awayW', 'awayL', 'confW', 'confL', 'min', 'attend', 'arena'])
teams_post_df = pd.DataFrame(teams_post, columns=['year', 'tmID', 'lgID', 'W', 'L'])

#make a dictionary with all the dataframes
dfs = {'awards_players_df': awards_players_df, 'coaches_df': coaches_df, 'players_df': players_df, 'players_teams_df': players_teams_df, 'series_post_df': series_post_df, 'teams_df': teams_df, 'teams_post_df': teams_post_df}

So with that we end our understanding phase.
Our main takeaways are:
- There are dead players in the players table. We should take that into account when doing the analysis.
- There are players that have not played any season of the seasons given. We should take that into account when doing the analysis. There are 338 players that have not played any season.
- There are no Null entries (although there values that are simply an empty string)
- There are some columns with the DataType "object", most of them being strings.
- There are binary objects (like confID and playoff, in the 'teams' table, with the values "Y" or "N") that could be substituted by a binary, as well as ternary objects (like the firstRound, semis and finals in the 'teams' table, with the values "W", "L" or "") that could also be transformed.
- There are players with no position and no college assigned ("").
- There are players with no date of birth in the record (0000-00-00).
- There is the need to do null value uniformization, as there are some columns with empty strings, others with default 0 values and other values that represent null.
- The height and weight variables have default 0 values and should be treated as null values.
- The number of games played by each team differs (there may be teams that are no longer playing), so we can't compare the number of wins and losses directly. Win percentage should be used.
- In terms of win percentage, it seems like a competitive league, with more than half of the teams having a win percentage of 50% or more, taking advantage of the worst teams. There is also just one team below 40% of wins.
- There are teams that are no longer playing.
- There are a lot of highly correlated variables.

## Preparing the data for the model

In this notebook, we will prepare the data for the model. Having done the understanding in the [previous notebook](understanding.ipynb), we will now prepare the data for the model. From the understanding we came to the following conclusions:

So with that we end our understanding phase.
Our main takeaways are:
- There are dead players in the players table. We should take that into account when doing the analysis.
- There are players that have not played any season of the seasons given. We should take that into account when doing the analysis. There are 338 players that have not played any season.
- There are no Null entries (although there values that are simply an empty string)
- There are some columns with the DataType "object", most of them being strings.
- There are binary objects (like confID and playoff, in the 'teams' table, with the values "Y" or "N") that could be substituted by a binary, as well as ternary objects (like the firstRound, semis and finals in the 'teams' table, with the values "W", "L" or "") that could also be transformed.
- There are players with no position and no college assigned ("").
- There are players with no date of birth in the record (0000-00-00).
- There is the need to do null value uniformization, as there are some columns with empty strings, others with default 0 values and other values that represent null.
- The height and weight variables have default 0 values and should be treated as null values.
- The number of games played by each team differs (there may be teams that are no longer playing), so we can't compare the number of wins and losses directly. Win percentage should be used.
- In terms of win percentage, it seems like a competitive league, with more than half of the teams having a win percentage of 50% or more, taking advantage of the worst teams. There is also just one team below 40% of wins.
- There are teams that are no longer playing.
- There are a lot of highly correlated variables.

After considering our takeaways, we will now prepare the data for the model. We will do the following:
- Remove the players that have not played any season, and, if a player died, remove the seasons after the death.
- Transform the binary objects into binary values.
- Transform the ternary objects into binary values. (where the third value is a null value - after the null uniformization these are considered as binary objects too)
- Null uniformization: transform the empty strings and default 0 values into null values.
- Analysis null values: analyze the null values and decide what to do with them.
- Calculate win percentage for each team and add it to the teams table.

We begin by excluding columns that consistently have identical values since they do not contribute any valuable information to the model. However, we will retain the 'first season' and 'last season' of a player, as we intend to populate them with data.

In [6]:
# Drop columns whose values are always the same
for df in dfs:
    for col in dfs[df].columns:
        if len(dfs[df][col].unique()) == 1 and col not in ['firstseason', 'lastseason'] :
            print(df, col)
            dfs[df].drop(col, inplace=True, axis=1)

awards_players_df lgID
coaches_df lgID
players_teams_df lgID
series_post_df lgIDWinner
series_post_df lgIDLoser
teams_df lgID
teams_df divID
teams_df seeded
teams_df tmORB
teams_df tmDRB
teams_df tmTRB
teams_df opptmORB
teams_df opptmDRB
teams_df opptmTRB
teams_post_df lgID


### Null uniformization

We identified the following columns that have null values, but are not identified as such:
- players: height, weight, birthDate, position, college, deathDate
- teams: firstRound, semis, finals

In [7]:
#If date == 00-00-00, replace with null (birthDate and deathDate)

dfs["players_df"]["birthDate"] = dfs["players_df"]["birthDate"].replace('00-00-00', None)
dfs["players_df"]["birthDate"] = dfs["players_df"]["birthDate"].replace('0000-00-00', None)
dfs["players_df"]["deathDate"] = dfs["players_df"]["deathDate"].replace('00-00-00', None)
dfs["players_df"]["deathDate"] = dfs["players_df"]["deathDate"].replace('0000-00-00', None)

# If value == 0, replace with median (height, weight)

dfs["players_df"]["height"].fillna(dfs["players_df"]["height"].mean(), inplace=True)
dfs["players_df"]["weight"].fillna(dfs["players_df"]["weight"].mean(), inplace=True)

# If value == "", replace with null (college, collegeOther, firstRound, semis, finals)

dfs["players_df"]["college"] = dfs["players_df"]["college"].replace('', None)
dfs["players_df"]["collegeOther"] = dfs["players_df"]["collegeOther"].replace('', None)
dfs["teams_df"]["firstRound"] = dfs["teams_df"]["firstRound"].replace('', 'NQ')
dfs["teams_df"]["semis"] = dfs["teams_df"]["semis"].replace('', 'NQ')
dfs["teams_df"]["finals"] = dfs["teams_df"]["finals"].replace('', 'NQ')

dfs["players_df"].head()

Unnamed: 0,bioID,pos,firstseason,lastseason,height,weight,college,collegeOther,birthDate,deathDate
0,abrahta01w,C,0,0,74.0,190,George Washington,,1975-09-27,
1,abrossv01w,F,0,0,74.0,169,Connecticut,,1980-07-09,
2,adairje01w,C,0,0,76.0,197,George Washington,,1986-12-19,
3,adamsda01w,F-C,0,0,73.0,239,Texas A&M,Jefferson College (JC),1989-02-19,
4,adamsjo01w,C,0,0,75.0,180,New Mexico,,1981-05-24,


### Remove the players that have not played any season

In [8]:
#players that have not played in the last 10 years
players_not_played = fetch("SELECT p.bioid FROM wnba.players p WHERE p.bioid not in (select pt.playerid  from wnba.players_teams pt)")
print("Number of players that haven't played: " + 
      str(len(players_not_played)))

players_not_played_df = pd.DataFrame(players_not_played, columns=['bioID'])

players_not_played_df.head()

Number of players that haven't played: 338


Unnamed: 0,bioID
0,abrahta01w
1,adairje01w
2,adamsda01w
3,adamsmi01w
4,adubari99w


In [9]:
#Print the number of players that have not played in the last 10 years, and the lenght of the 3 dataframes that contain the playerID
print("Number of players that have not played: ", len(players_not_played_df['bioID'].unique()))
print("-------------------------------------------")
print("Number of values in the players_team_df: ", len(dfs['players_teams_df']['playerID'].unique()))
print("Number of values in the awards_players_df: ", len(dfs['awards_players_df']['playerID'].unique()))
print("Number of values in the players_df: ", len(dfs['players_df']['bioID'].unique()))

#Remove the players that have not played in the last 10 years
for df in dfs:
    if(df == 'players_teams_df' or df == 'awards_players_df'):
        dfs[df] = dfs[df][~dfs[df]['playerID'].isin(players_not_played_df['bioID'])]
    if(df == 'players_df'):
        dfs[df] = dfs[df][~dfs[df]['bioID'].isin(players_not_played_df['bioID'])]

#Print the number of players that have not played in the last 10 years, and the lenght of the 3 dataframes that contain the playerID
print('\n')
print("Number of values in the players_team_df: ", len(dfs['players_teams_df']['playerID'].unique()))
print("Number of values in the awards_players_df: ", len(dfs['awards_players_df']['playerID'].unique()))
print("Number of values in the players_df: ", len(dfs['players_df']['bioID'].unique()))

Number of players that have not played:  338
-------------------------------------------
Number of values in the players_team_df:  555
Number of values in the awards_players_df:  58
Number of values in the players_df:  893


Number of values in the players_team_df:  555
Number of values in the awards_players_df:  51
Number of values in the players_df:  555


Drop Death date as only 4 players have it and it is not relevant for the analysis, as we are not interested in the death date of the players.

In [10]:
dfs["players_df"] = dfs["players_df"].drop('deathDate', axis=1)

Trasnform birthDate into birthYear, so the model can use it for analysis.

In [11]:
# Convert 'birthDate' to datetime if it's not already
dfs["players_df"]['birthDate'] = pd.to_datetime(dfs["players_df"]['birthDate'])

# Create a new 'birthYear' column
dfs["players_df"]['birthYear'] = dfs["players_df"]['birthDate'].dt.year
dfs["players_df"] = dfs["players_df"].drop('birthDate', axis=1)

### Populate first and last seasons of a player in the wnba

As we mentioned before we will populate first and last season of a player in the wnba. We will do this by looking at the seasons table and finding the first and last season of a player. We will then populate the first and last season of a player in the players table.

In [12]:
# Group the players_teams_df by 'playerID' to find the first and last seasons.
first_seasons = dfs['players_teams_df'].groupby('playerID')['year'].min()
last_seasons = dfs['players_teams_df'].groupby('playerID')['year'].max()

# Use .loc to set the values in players_df without the warning.
dfs['players_df'].loc[:, 'firstseason'] = dfs['players_df']['bioID'].map(first_seasons)
dfs['players_df'].loc[:, 'lastseason'] = dfs['players_df']['bioID'].map(last_seasons)

dfs['players_df'].head()

Unnamed: 0,bioID,pos,firstseason,lastseason,height,weight,college,collegeOther,birthYear
1,abrossv01w,F,2,9,74.0,169,Connecticut,,1980
4,adamsjo01w,C,4,4,75.0,180,New Mexico,,1981
8,aguilel01w,G,3,3,67.0,165,George Washington,,1976
9,ajavoma01w,G,9,10,68.0,160,Rutgers,,1986
11,aldrima01w,G,1,2,71.0,153,UNC Charlotte,,1973


### Transform the binary objects into binary values

List the columns with binary objects:

In [13]:
# Get all the binary columns from all the dataframes
binary_columns = []
for df in dfs:
    binary_columns = binary_columns + [(df, list(dfs[df].columns[(dfs[df].nunique() == 2) | (dfs[df].nunique() == 3)]))]
# Print the binary columns uniques values
for i in binary_columns:
    if(len(i[1]) < 0):
        continue

    for j in i[1]:
        print("-------")
        print(i[0], j)
        print(dfs[i[0]][j].unique())

-------
coaches_df stint
[0 1 2]
-------
players_teams_df PostDQ
[0 1 2]
-------
series_post_df round
['FR' 'CF' 'F']
-------
series_post_df W
[2 3]
-------
series_post_df L
[1 0 2]
-------
teams_df confID
['EA' 'WE']
-------
teams_df playoff
['N' 'Y']
-------
teams_df firstRound
['NQ' 'L' 'W']
-------
teams_df semis
['NQ' 'W' 'L']
-------
teams_df finals
['NQ' 'L' 'W']
-------
teams_df GP
[34 32]


GP is the number of games played and should also not be converted to binary, as we will need this value to calculate the win percentage. (it is only binary because seasons have been of 32 games or 34 games). The W value in the series_post represents the number of wins a team winned in the playoffs. All the playoffs games are in the best of 3 or 5, so the winning team wins 2 or 3 games.
The other binary values are binary and should be converted to binary.

In [14]:
# Convert the binary columns to 0 and 1 (confID, playoff, firstRound, semis, finals)
binary_columns = ["confID", "playoff"]
trenary_columns = ["firstRound", "semis", "finals"]

for col in binary_columns + trenary_columns:
    dfs["teams_df"][col] = dfs["teams_df"][col].replace('EA', 0)
    dfs["teams_df"][col] = dfs["teams_df"][col].replace('WE', 1)
    dfs["teams_df"][col] = dfs["teams_df"][col].replace('W', 1)
    dfs["teams_df"][col] = dfs["teams_df"][col].replace('L', 0.5)
    dfs["teams_df"][col] = dfs["teams_df"][col].replace('NQ', 0)
    dfs["teams_df"][col] = dfs["teams_df"][col].replace('N', 0)
    dfs["teams_df"][col] = dfs["teams_df"][col].replace('Y', 1)

# Change the type of the column to int
for col in binary_columns:
    dfs["teams_df"][col] = dfs["teams_df"][col].astype("Int64")

# Change the type of the column to float
for col in trenary_columns:
    dfs["teams_df"][col] = dfs["teams_df"][col].astype("float")

dfs["teams_df"].head()

Unnamed: 0,year,tmID,franchID,confID,rank,playoff,firstRound,semis,finals,name,o_fgm,o_fga,o_ftm,o_fta,o_3pm,o_3pa,o_oreb,o_dreb,o_reb,o_asts,o_pf,o_stl,o_to,o_blk,o_pts,d_fgm,d_fga,d_ftm,d_fta,d_3pm,d_3pa,d_oreb,d_dreb,d_reb,d_asts,d_pf,d_stl,d_to,d_blk,d_pts,won,lost,GP,homeW,homeL,awayW,awayL,confW,confL,min,attend,arena
0,9,ATL,ATL,0,7,0,0.0,0.0,0.0,Atlanta Dream,895,2258,542,725,202,598,340,737,1077,492,796,285,593,142,2534,1014,2254,679,918,172,502,401,864,1265,684,726,310,561,134,2879,4,30,34,1,16,3,14,2,18,6825,141379,Philips Arena
1,10,ATL,ATL,0,2,1,0.5,0.0,0.0,Atlanta Dream,1089,2428,569,755,114,374,404,855,1259,547,741,329,590,121,2861,996,2363,624,807,181,530,353,821,1174,615,700,347,601,133,2797,18,16,34,12,5,6,11,10,12,6950,120737,Philips Arena
2,1,CHA,CHA,0,8,0,0.0,0.0,0.0,Charlotte Sting,812,1903,431,577,131,386,305,630,935,551,713,222,496,90,2186,879,1930,533,716,138,423,326,664,990,596,596,259,426,123,2429,8,24,32,5,11,3,13,5,16,6475,90963,Charlotte Coliseum
3,2,CHA,CHA,0,4,1,1.0,1.0,0.5,Charlotte Sting,746,1780,410,528,153,428,309,639,948,467,605,217,474,114,2055,732,1846,431,562,114,369,344,567,911,443,579,257,447,124,2009,18,14,32,11,5,7,9,15,6,6500,105525,Charlotte Coliseum
4,3,CHA,CHA,0,2,1,0.5,0.0,0.0,Charlotte Sting,770,1790,490,663,211,527,302,653,955,496,647,241,408,105,2241,778,1807,444,598,133,372,295,620,915,489,600,208,424,103,2133,18,14,32,11,5,7,9,12,9,6450,106670,Charlotte Coliseum


## Feature Engineering

### Calculate win percentage

For each team, we want to calculate the following:
- Win percentage
- Loss percentage
- Wins at home percentage
- Losses at home percentage
- Wins away percentage
- Losses away percentage
- Conference wins percentage
- Conference losses percentage

In [15]:
# Calculate win percentage, loss percentage, wins at home percentage, losses at home percentage, wins away percentage, losses away percentage, wins at conference percentage, losses at conference percentage

dfs["teams_df"]["win_percentage"] = dfs["teams_df"]["won"] / (dfs["teams_df"]["won"] + dfs["teams_df"]["lost"])
dfs["teams_df"]["loss_percentage"] = dfs["teams_df"]["lost"] / (dfs["teams_df"]["won"] + dfs["teams_df"]["lost"])
dfs["teams_df"]["home_win_percentage"] = dfs["teams_df"]["homeW"] / (dfs["teams_df"]["homeW"] + dfs["teams_df"]["homeL"])
dfs["teams_df"]["home_loss_percentage"] = dfs["teams_df"]["homeL"] / (dfs["teams_df"]["homeW"] + dfs["teams_df"]["homeL"])
dfs["teams_df"]["away_win_percentage"] = dfs["teams_df"]["awayW"] / (dfs["teams_df"]["awayW"] + dfs["teams_df"]["awayL"])
dfs["teams_df"]["away_loss_percentage"] = dfs["teams_df"]["awayL"] / (dfs["teams_df"]["awayW"] + dfs["teams_df"]["awayL"])
dfs["teams_df"]["conference_win_percentage"] = dfs["teams_df"]["confW"] / (dfs["teams_df"]["confW"] + dfs["teams_df"]["confL"])
dfs["teams_df"]["conference_loss_percentage"] = dfs["teams_df"]["confL"] / (dfs["teams_df"]["confW"] + dfs["teams_df"]["confL"])

# Drop the columns that are not needed anymore
dfs["teams_df"] = dfs["teams_df"].drop(columns=['won', 'lost', 'homeW', 'homeL', 'awayW', 'awayL', 'confW', 'confL'])

## Data Preparation on players

### Position Uniformization

From the list below we can see that there are 7 different positions. We will uniformize the positions to the following:
- Guard (G)
- Forward (F)
- Center (C)
- Guard-Forward (G-F)
- Forward-Center (F-C)

But, as we can see from the distinct positions, we have 2 more positions that are not in the list above. These are:
(C-F) and (F-G). We will uniformize these positions to the ones above.

In [16]:
unique_positions = dfs['players_df']['pos'].unique()
print(unique_positions)

# Define specific_position_mapping
specific_position_mapping = {
    'F-G': 'G-F',
    'C-F': 'F-C'
}

# Use .loc to update the 'pos' column in players_df
dfs['players_df'].loc[:, 'pos'] = dfs['players_df'].loc[:, 'pos'].replace(specific_position_mapping)

# Check the unique values after mapping
print("After mapping")
unique_positions = dfs['players_df']['pos'].unique()
print(unique_positions)

['F' 'C' 'G' 'F-G' 'G-F' 'F-C' 'C-F']
After mapping
['F' 'C' 'G' 'G-F' 'F-C']


Create a column with the number of seasons a player played in the wnba.

In [17]:
dfs['players_df'].loc[:, 'num_seasons'] = dfs['players_df']['lastseason'] - dfs['players_df']['firstseason'] + 1
dfs['players_df'] = dfs['players_df'].drop(columns=['firstseason', 'lastseason'])

dfs['players_df'].head()

Unnamed: 0,bioID,pos,height,weight,college,collegeOther,birthYear,num_seasons
1,abrossv01w,F,74.0,169,Connecticut,,1980,8
4,adamsjo01w,C,75.0,180,New Mexico,,1981,1
8,aguilel01w,G,67.0,165,George Washington,,1976,1
9,ajavoma01w,G,68.0,160,Rutgers,,1986,2
11,aldrima01w,G,71.0,153,UNC Charlotte,,1973,2


Create:
- Total Points in the season
- Total Rebounds in the season
- Total Assists in the season
- Total Steals in the season
- Total Turnovers in the season
- Total Goal Percentage in the season
- Total Three Point Percentage in the season
- Total Free Throw Percentage in the season
- Won award in the season
- Metric by position

Group by 'playerID', 'year' and add the stats from the many stints, keep the last team played for in the season.

In [18]:
# Group by 'playerID' and 'year', sum the stats from the stints, and keep the last 'teamID'
dfs["players_teams_df"] = dfs["players_teams_df"].groupby(['playerID', 'year']).agg({**{col: 'sum' for col in dfs["players_teams_df"].columns if col not in ['playerID', 'year', 'tmID']}, **{'tmID': 'last'}}).reset_index()
dfs["players_teams_df"].drop(columns=['stint'], inplace=True)

In [19]:
dfs["players_teams_df"]["total_points"] = (
    dfs["players_teams_df"]["points"] + dfs["players_teams_df"]["PostPoints"]
)
dfs["players_teams_df"]["total_rebounds"] = (
    dfs["players_teams_df"]["rebounds"] + dfs["players_teams_df"]["PostRebounds"]
)
dfs["players_teams_df"]["total_assists"] = (
    dfs["players_teams_df"]["assists"] + dfs["players_teams_df"]["PostAssists"]
)
dfs["players_teams_df"]["total_blocks"] = (
    dfs["players_teams_df"]["blocks"] + dfs["players_teams_df"]["PostBlocks"]
)
dfs["players_teams_df"]["total_steals"] = (
    dfs["players_teams_df"]["steals"] + dfs["players_teams_df"]["PostSteals"]
)
dfs["players_teams_df"]["total_turnovers"] = (
    dfs["players_teams_df"]["turnovers"] + dfs["players_teams_df"]["PostTurnovers"]
)
dfs["players_teams_df"]["FG%"] = (
    dfs["players_teams_df"]["fgMade"] + dfs["players_teams_df"]["PostfgMade"]
) / (
    dfs["players_teams_df"]["fgAttempted"] + dfs["players_teams_df"]["PostfgAttempted"]
)
dfs["players_teams_df"]["FT%"] = (
    dfs["players_teams_df"]["ftMade"] + dfs["players_teams_df"]["PostftMade"]
) / (
    dfs["players_teams_df"]["ftAttempted"] + dfs["players_teams_df"]["PostftAttempted"]
)
dfs["players_teams_df"]["FG%"].fillna(0, inplace=True)
dfs["players_teams_df"]["FT%"].fillna(0, inplace=True)

# Create a new 'award' column
dfs["players_teams_df"]['award'] = dfs["players_teams_df"].set_index(['playerID', 'year']).index.isin(dfs["awards_players_df"].groupby(['playerID', 'year']).any().index).astype(int)

# Average Stats
dfs["players_teams_df"]["Avg_Points_Per_Game"] = (
    dfs["players_teams_df"]["points"] / dfs["players_teams_df"]["GP"]
)
dfs["players_teams_df"]["Avg_Rebounds_Per_Game"] = (
    dfs["players_teams_df"]["rebounds"] / dfs["players_teams_df"]["GP"]
)
dfs["players_teams_df"]["Avg_Assists_Per_Game"] = (
    dfs["players_teams_df"]["assists"] / dfs["players_teams_df"]["GP"]
)
dfs["players_teams_df"]["Avg_Blocks_Per_Game"] = (
    dfs["players_teams_df"]["blocks"] / dfs["players_teams_df"]["GP"]
)
dfs["players_teams_df"]["Avg_Steals_Per_Game"] = (
    dfs["players_teams_df"]["steals"] / dfs["players_teams_df"]["GP"]
)
dfs["players_teams_df"]["Avg_Turnovers_Per_Game"] = (
    dfs["players_teams_df"]["turnovers"] / dfs["players_teams_df"]["GP"]
)

# Usability Ratio (Check if this is correct)
dfs["players_teams_df"]["Usability"] = (
    dfs["players_teams_df"]["minutes"]
) / (dfs["players_teams_df"]["GP"] * 40)

# Get the player position from dfs['players']['pos'], match ['bioid'] to ['playerID'] in dfs['players_teams_df']
position_mapping = dfs["players_df"].set_index("bioID")["pos"]
dfs["players_teams_df"]["pos"] = dfs["players_teams_df"]["playerID"].map(
    position_mapping
)


# Define a dictionary of position-specific metrics
position_metrics = {
    "G": "AST_TO_RATIO",
    "F": "REBOUND_EFFICIENCY",
    "G-F": "3P_SHOOTING_PERCENT",
    "C": "BLOCK_EFFICIENCY",
    "F-C": "SCORING_EFFICIENCY",
}

# Calculate position-specific metrics and update the DataFrame
for position, metric in position_metrics.items():
    position_df = dfs["players_teams_df"][dfs["players_teams_df"]["pos"] == position]
    dfs["players_teams_df"][metric] = position_df.apply(
        lambda row: row["assists"] / row["turnovers"]
        if metric == "AST_TO_RATIO" and row["turnovers"] != 0
        else (row["oRebounds"] + row["dRebounds"]) / row["GP"]
        if metric == "REBOUND_EFFICIENCY" and row["GP"] != 0
        else row["threeMade"] / row["threeAttempted"]
        if metric == "3P_SHOOTING_PERCENT" and row["threeAttempted"] != 0
        else row["blocks"] / row["GP"]
        if metric == "BLOCK_EFFICIENCY" and row["GP"] != 0
        else row["points"] / row["fgAttempted"]
        if metric == "SCORING_EFFICIENCY" and row["fgAttempted"] != 0
        else None,
        axis=1,
    )

    # Normalize the metric ( so that it's between 0 and 1 and can be compared to other positions)
    scaler = MinMaxScaler()
    dfs["players_teams_df"][[metric]] = scaler.fit_transform(dfs["players_teams_df"][[metric]])


# Join the position-specific metrics to the one column in dfs['players_teams_df']
def extract_first_non_null(row):
    for column in list(position_metrics.values()):
        if not pd.isnull(row[column]):
            return row[column]
    return None
dfs["players_teams_df"]["POSITION_METRIC"] = dfs["players_teams_df"].apply(
    extract_first_non_null, axis=1
)
dfs["players_teams_df"].drop(columns=list(position_metrics.values()), inplace=True)


nulls_in_position_metric = dfs["players_teams_df"]["POSITION_METRIC"].isnull().sum()
print(f"Number of null values in POSITION_METRIC due to having 0 at position caracteristic: {nulls_in_position_metric}")
dfs['players_teams_df']['POSITION_METRIC'].fillna(0, inplace=True)

display(dfs["players_teams_df"].head())
display(dfs["teams_df"].head())

Number of null values in POSITION_METRIC due to having 0 at position caracteristic: 30


Unnamed: 0,playerID,year,GP,GS,minutes,points,oRebounds,dRebounds,rebounds,assists,steals,blocks,turnovers,PF,fgAttempted,fgMade,ftAttempted,ftMade,threeAttempted,threeMade,dq,PostGP,PostGS,PostMinutes,PostPoints,PostoRebounds,PostdRebounds,PostRebounds,PostAssists,PostSteals,PostBlocks,PostTurnovers,PostPF,PostfgAttempted,PostfgMade,PostftAttempted,PostftMade,PostthreeAttempted,PostthreeMade,PostDQ,tmID,total_points,total_rebounds,total_assists,total_blocks,total_steals,total_turnovers,FG%,FT%,award,Avg_Points_Per_Game,Avg_Rebounds_Per_Game,Avg_Assists_Per_Game,Avg_Blocks_Per_Game,Avg_Steals_Per_Game,Avg_Turnovers_Per_Game,Usability,pos,POSITION_METRIC
0,abrossv01w,2,26,23,846,343,43,131,174,53,42,9,85,70,293,114,132,96,76,19,2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,MIN,343,174,53,9,42,85,0.389078,0.727273,0,13.192308,6.692308,2.038462,0.346154,1.615385,3.269231,0.813462,F,0.576923
1,abrossv01w,3,27,27,805,314,45,101,146,60,42,10,92,73,316,119,116,56,60,20,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,MIN,314,146,60,10,42,92,0.376582,0.482759,0,11.62963,5.407407,2.222222,0.37037,1.555556,3.407407,0.74537,F,0.466156
2,abrossv01w,4,30,25,792,318,44,97,141,82,44,11,90,79,285,112,98,69,82,25,0,3,3,69,23,1,4,5,4,4,1,8,8,22,6,8,8,7,3,0,MIN,341,146,86,12,48,98,0.384365,0.726415,0,10.6,4.7,2.733333,0.366667,1.466667,3.0,0.66,F,0.405172
3,abrossv01w,5,22,11,462,146,17,57,74,45,30,2,43,42,139,49,46,28,53,20,0,2,2,67,20,3,6,9,3,1,2,3,7,23,8,4,2,8,2,0,MIN,166,83,48,4,31,46,0.351852,0.6,0,6.636364,3.363636,2.045455,0.090909,1.363636,1.954545,0.525,F,0.289969
4,abrossv01w,6,31,31,777,304,29,78,107,60,48,6,80,86,276,109,73,53,82,33,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,MIN,304,107,60,6,48,80,0.394928,0.726027,0,9.806452,3.451613,1.935484,0.193548,1.548387,2.580645,0.626613,F,0.297553


Unnamed: 0,year,tmID,franchID,confID,rank,playoff,firstRound,semis,finals,name,o_fgm,o_fga,o_ftm,o_fta,o_3pm,o_3pa,o_oreb,o_dreb,o_reb,o_asts,o_pf,o_stl,o_to,o_blk,o_pts,d_fgm,d_fga,d_ftm,d_fta,d_3pm,d_3pa,d_oreb,d_dreb,d_reb,d_asts,d_pf,d_stl,d_to,d_blk,d_pts,GP,min,attend,arena,win_percentage,loss_percentage,home_win_percentage,home_loss_percentage,away_win_percentage,away_loss_percentage,conference_win_percentage,conference_loss_percentage
0,9,ATL,ATL,0,7,0,0.0,0.0,0.0,Atlanta Dream,895,2258,542,725,202,598,340,737,1077,492,796,285,593,142,2534,1014,2254,679,918,172,502,401,864,1265,684,726,310,561,134,2879,34,6825,141379,Philips Arena,0.117647,0.882353,0.058824,0.941176,0.176471,0.823529,0.1,0.9
1,10,ATL,ATL,0,2,1,0.5,0.0,0.0,Atlanta Dream,1089,2428,569,755,114,374,404,855,1259,547,741,329,590,121,2861,996,2363,624,807,181,530,353,821,1174,615,700,347,601,133,2797,34,6950,120737,Philips Arena,0.529412,0.470588,0.705882,0.294118,0.352941,0.647059,0.454545,0.545455
2,1,CHA,CHA,0,8,0,0.0,0.0,0.0,Charlotte Sting,812,1903,431,577,131,386,305,630,935,551,713,222,496,90,2186,879,1930,533,716,138,423,326,664,990,596,596,259,426,123,2429,32,6475,90963,Charlotte Coliseum,0.25,0.75,0.3125,0.6875,0.1875,0.8125,0.238095,0.761905
3,2,CHA,CHA,0,4,1,1.0,1.0,0.5,Charlotte Sting,746,1780,410,528,153,428,309,639,948,467,605,217,474,114,2055,732,1846,431,562,114,369,344,567,911,443,579,257,447,124,2009,32,6500,105525,Charlotte Coliseum,0.5625,0.4375,0.6875,0.3125,0.4375,0.5625,0.714286,0.285714
4,3,CHA,CHA,0,2,1,0.5,0.0,0.0,Charlotte Sting,770,1790,490,663,211,527,302,653,955,496,647,241,408,105,2241,778,1807,444,598,133,372,295,620,915,489,600,208,424,103,2133,32,6450,106670,Charlotte Coliseum,0.5625,0.4375,0.6875,0.3125,0.4375,0.5625,0.571429,0.428571


### Check correlation

We will remove the most correlated variables, as they do not add any value to the model.

In [20]:
MAX_CORRELATION = 0.95

In [21]:
# Add column to indicate if the player went to the playoffs to look for correlations
def went_to_playoff(df, dfs):
    returned_df = df.copy()
    
    playoff_data = dfs['teams_df'][['tmID', 'year', 'playoff']]

    returned_df = returned_df.merge(playoff_data, on=['tmID', 'year'], how='left')
    
    return returned_df

def delete_most_correlated(df):
    df_copy = df.copy()

    correlation_matrix = df_copy.corr()

    sorted_correlations = correlation_matrix.unstack().sort_values(ascending=False)

    # Get the pairs of attributes with the highest correlation values
    most_correlated_pairs = sorted_correlations[sorted_correlations > MAX_CORRELATION]
    most_correlated_pairs = most_correlated_pairs[most_correlated_pairs < 1.0]

    # Delete repeated pairs (e.g. (a,b) and (b,a))
    most_correlated_pairs = most_correlated_pairs[::2]
    print(most_correlated_pairs)

    # Drop the attributes with the highest correlation values
    for pair in most_correlated_pairs.index:
        if 'Usability' not in pair and 'year' not in pair and 'playoff' not in pair and 'confID' not in pair:
            if pair[0] in df_copy.columns:
                df_copy.drop(pair[0], inplace=True, axis=1)

    return df_copy

In [22]:
for df in dfs:
    print(df)
    if 'players_teams_df' in df:
        dfs[df] = went_to_playoff(dfs[df], dfs)

    # Select only the numerical columns
    new_df = delete_most_correlated(dfs[df].select_dtypes(include=np.number))

    # Merge new_df with the categorical columns
    dfs[df] = new_df.merge(dfs[df].select_dtypes(exclude=np.number), left_index=True, right_index=True)

awards_players_df
Series([], dtype: float64)
coaches_df
Series([], dtype: float64)
players_df
Series([], dtype: float64)
players_teams_df
PostPoints            PostfgMade               0.992340
blocks                total_blocks             0.991821
total_assists         assists                  0.990934
steals                total_steals             0.990340
points                fgMade                   0.990305
total_points          points                   0.989701
rebounds              total_rebounds           0.989526
turnovers             total_turnovers          0.989048
PostftMade            PostftAttempted          0.987631
PostdRebounds         PostRebounds             0.986612
points                fgAttempted              0.986184
ftAttempted           ftMade                   0.985914
fgMade                fgAttempted              0.985844
threeMade             threeAttempted           0.984584
PostfgMade            PostfgAttempted          0.984323
PostfgAttempted       

### Remove categorical variables 

There are several categorical variables that we will remove from the model, as they do not add any value to the model.

In [23]:
# Remove categorical columns from team_df that are not needed 
# name, arena

dfs['teams_df'] = dfs['teams_df'].drop(columns=['name', 'arena'])

Some teams changed their names, but they maintained the same Franchise. We will map the teams to their Franchise and replace the team ID with the Franchise ID.

In [24]:
# Iterate over each row in the DataFrame
def franchise_mapping(teams_df, dfs):
    team_franchise_mapping = {}
    for _, row in teams_df.iterrows():
        # Extract team and franchise IDs from the current row
        team_id = row['tmID']
        franchise_id = row['franchID']

        # Check if the team ID is not already in the mapping dictionary
        if team_id not in team_franchise_mapping:
            # Add the team ID and its corresponding franchise ID to the mapping
            team_franchise_mapping[team_id] = franchise_id

    # Now, team_franchise_mapping contains the mapping between team IDs and franchise IDs
    print(team_franchise_mapping)

    # Replace team IDs with franchise IDs in all the DataFrames
    for _, df in dfs.items():
        # Check if 'tmID' is a column in the current DataFrame
        if 'tmID' in df.columns:
            # Replace team IDs with franchise IDs using the mapping
            df['tmID'] = df['tmID'].map(team_franchise_mapping)
        if 'franchID' in df.columns:
            # Drop the 'franchID' column
            df.drop(columns=['franchID'], inplace=True)
        
from sklearn.preprocessing import LabelEncoder

def one_hot_encode_team_id(df):
    le = LabelEncoder()

    # Encode the 'tmID' column in teams_df
    encoded = le.fit_transform(df['tmID'])

    # Add the encoded column to teams_df
    df['tmID_encoded'] = encoded

    return df
        
franchise_mapping(dfs['teams_df'], dfs)
dfs['teams_df'] = one_hot_encode_team_id(dfs['teams_df'])

{'ATL': 'ATL', 'CHA': 'CHA', 'CHI': 'CHI', 'CLE': 'CLE', 'CON': 'CON', 'DET': 'DET', 'HOU': 'HOU', 'IND': 'IND', 'LAS': 'LAS', 'MIA': 'MIA', 'MIN': 'MIN', 'NYL': 'NYL', 'ORL': 'CON', 'PHO': 'PHO', 'POR': 'POR', 'SAC': 'SAC', 'SAS': 'SAS', 'SEA': 'SEA', 'UTA': 'SAS', 'WAS': 'WAS'}


### Saving the data

In [25]:
# Saving the tables in a csv file, inside the prep_data folder
for df in dfs:
    dfs[df].to_csv('../prep_data/dfs/' + df + '.csv', index=False)

## Merge information into a single table


The next step is to merge all the information into a single table, so it can be used for the model.

Insights on merging tables:
- Players with players_teams.
- Combine players stastics with teams statistics.
- Create a rolling window for the combined statistics.

In [26]:
# join players (bioID) with players_teams (playerID)
players_df  = dfs['players_df'].rename(columns={'bioID': 'playerID'})
dfs['players_teams_df'] = dfs['players_teams_df'].merge(players_df, on=['playerID'], how='left')
dfs['players_teams_df'].head()

# to csv
# players_teams_df.to_csv('../prep_data/players_df_merge_with_players_teams.csv', index=False)

Unnamed: 0,year,GP,GS,minutes,oRebounds,PF,fgAttempted,ftMade,threeAttempted,dq,PostGP,PostGS,PostoRebounds,PostRebounds,PostAssists,PostSteals,PostBlocks,PostTurnovers,PostPF,PostftAttempted,PostthreeAttempted,PostDQ,total_rebounds,total_blocks,total_steals,total_turnovers,FG%,FT%,award,Avg_Rebounds_Per_Game,Avg_Steals_Per_Game,Avg_Turnovers_Per_Game,Usability,POSITION_METRIC,playoff,playerID,tmID,pos_x,height,weight,birthYear,num_seasons,pos_y,college,collegeOther
0,2,26,23,846,43,70,293,96,76,2,0,0,0,0,0,0,0,0,0,0,0,0,174,9,42,85,0.389078,0.727273,0,6.692308,1.615385,3.269231,0.813462,0.576923,0,abrossv01w,MIN,F,74.0,169,1980,8,F,Connecticut,
1,3,27,27,805,45,73,316,56,60,0,0,0,0,0,0,0,0,0,0,0,0,0,146,10,42,92,0.376582,0.482759,0,5.407407,1.555556,3.407407,0.74537,0.466156,0,abrossv01w,MIN,F,74.0,169,1980,8,F,Connecticut,
2,4,30,25,792,44,79,285,69,82,0,3,3,1,5,4,4,1,8,8,8,7,0,146,12,48,98,0.384365,0.726415,0,4.7,1.466667,3.0,0.66,0.405172,1,abrossv01w,MIN,F,74.0,169,1980,8,F,Connecticut,
3,5,22,11,462,17,42,139,28,53,0,2,2,3,9,3,1,2,3,7,4,8,0,83,4,31,46,0.351852,0.6,0,3.363636,1.363636,1.954545,0.525,0.289969,1,abrossv01w,MIN,F,74.0,169,1980,8,F,Connecticut,
4,6,31,31,777,29,86,276,53,82,0,0,0,0,0,0,0,0,0,0,0,0,0,107,6,48,80,0.394928,0.726027,0,3.451613,1.548387,2.580645,0.626613,0.297553,0,abrossv01w,MIN,F,74.0,169,1980,8,F,Connecticut,


In [27]:
# Join players statistics with teams statistics by doing a weighted mean based on players usability (minutes played)
teams_and_players_df = dfs['teams_df'].copy()

# Iterate over each column in players_df
numeric_columns = dfs['players_teams_df'].select_dtypes(include=np.number).columns
exclude_columns = {'year', 'tmID', 'playerID', 'firstseason', 'lastseason', 'playoff'}
numeric_columns = [col for col in numeric_columns if col not in exclude_columns]

for col in numeric_columns:
    # Calculate the weighted mean using 'minutes' as weights within the groupby operation
    weighted_mean = dfs['players_teams_df'].groupby(['year', 'tmID', 'playoff'])[col].apply(lambda x: np.average(x, weights=players_teams_df.loc[x.index, 'minutes'])).reset_index(name=col)

    # Merge the weighted mean into teams_df2
    teams_and_players_df = teams_and_players_df.merge(weighted_mean, on=['year', 'tmID', 'playoff'], how='left')

# to csv
# teams_and_players_df.to_csv('../prep_data/teams_and_players_df.csv', index=False)

teams_and_players_df.head()

Unnamed: 0,year,confID,rank,playoff,firstRound,semis,finals,o_fgm,o_fga,o_ftm,o_fta,o_3pa,o_oreb,o_dreb,o_reb,o_asts,o_pf,o_stl,o_to,o_blk,d_fgm,d_fga,d_fta,d_3pm,d_3pa,d_oreb,d_reb,d_asts,d_pf,d_stl,d_to,d_blk,GP_x,attend,win_percentage,loss_percentage,home_win_percentage,home_loss_percentage,away_win_percentage,away_loss_percentage,conference_win_percentage,conference_loss_percentage,tmID,tmID_encoded,GP_y,GS,minutes,oRebounds,PF,fgAttempted,ftMade,threeAttempted,dq,PostGP,PostGS,PostoRebounds,PostRebounds,PostAssists,PostSteals,PostBlocks,PostTurnovers,PostPF,PostftAttempted,PostthreeAttempted,PostDQ,total_rebounds,total_blocks,total_steals,total_turnovers,FG%,FT%,award,Avg_Rebounds_Per_Game,Avg_Steals_Per_Game,Avg_Turnovers_Per_Game,Usability,POSITION_METRIC,height,weight,birthYear,num_seasons
0,9,0,7,0,0.0,0.0,0.0,895,2258,542,725,598,340,737,1077,492,796,285,593,142,1014,2254,918,172,502,401,1265,684,726,310,561,134,34,141379,0.117647,0.882353,0.058824,0.941176,0.176471,0.823529,0.1,0.9,ATL,0,27.274687,14.437036,561.78728,24.271607,58.224316,197.614966,43.3131,56.01522,0.649393,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,78.815546,7.432506,25.12466,47.181555,0.384856,0.728837,0.0,3.11739,0.935894,1.641947,0.502972,0.37411,72.427795,158.826055,1982.416923,5.699402
1,10,0,2,1,0.5,0.0,0.0,1089,2428,569,755,374,404,855,1259,547,741,329,590,121,996,2363,807,181,530,353,1174,615,700,347,601,133,34,120737,0.529412,0.470588,0.705882,0.294118,0.352941,0.647059,0.454545,0.545455,ATL,0,30.844431,9.966259,543.728874,23.507017,52.44461,158.381905,39.892804,38.127799,0.226038,1.51269,0.903553,1.20424,3.424306,2.666468,0.636907,0.147507,2.581069,3.256494,4.881159,3.825022,0.0,77.269931,6.147507,22.113168,45.418334,0.40329,0.774866,0.076142,2.263962,0.683185,1.365744,0.434415,0.304803,70.392356,153.024485,1983.388474,3.929531
2,1,0,8,0,0.0,0.0,0.0,812,1903,431,577,386,305,630,935,551,713,222,496,90,879,1930,716,138,423,326,990,596,596,259,426,123,32,90963,0.25,0.75,0.3125,0.6875,0.1875,0.8125,0.238095,0.761905,CHA,1,23.833001,15.626952,571.975241,28.422399,57.381688,162.028581,40.717182,30.078099,0.94317,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,84.671818,7.158691,18.18993,42.973247,0.424436,0.747583,0.0,3.226214,0.711357,1.651841,0.55788,0.32566,71.228481,165.4332,1973.19126,3.904121
3,2,0,4,1,1.0,1.0,0.5,746,1780,410,528,428,309,639,948,467,605,217,474,114,732,1846,562,114,369,344,911,443,579,257,447,124,32,105525,0.5625,0.4375,0.6875,0.3125,0.4375,0.5625,0.714286,0.285714,CHA,1,25.795107,14.207187,564.738723,31.667049,57.995222,159.345183,38.382645,23.251529,0.596713,6.349006,3.259939,6.465596,20.629205,7.828937,3.950879,3.14526,8.296827,13.158448,9.669916,4.685589,0.0,116.319572,17.618119,22.823968,47.561544,0.391101,0.668004,0.0,3.396549,0.622552,1.416535,0.479169,0.368094,72.927561,172.179664,1974.413035,4.857416
4,3,0,2,1,0.5,0.0,0.0,770,1790,490,663,527,302,653,955,496,647,241,408,105,778,1807,598,133,372,295,915,489,600,208,424,103,32,106670,0.5625,0.4375,0.6875,0.3125,0.4375,0.5625,0.571429,0.428571,CHA,1,22.150164,14.148974,520.29438,25.296908,49.196402,152.843146,46.099911,38.021707,0.615522,1.490633,0.896521,2.201754,5.467142,2.66518,1.498365,0.240559,2.355189,2.293934,1.881951,3.34047,0.0,86.824859,9.169789,21.828427,32.497472,0.344791,0.699543,0.0,3.006011,0.812454,0.992726,0.475065,0.281135,72.370503,176.037913,1972.943503,5.584002


In [28]:
new_df = delete_most_correlated(teams_and_players_df.select_dtypes(include=np.number))
teams_and_players_df = new_df.merge(teams_and_players_df.select_dtypes(exclude=np.number), left_index=True, right_index=True)

# Update dfs['teams_df'] and save to csv
# dfs['teams_df'] = teams_and_players_df.copy()
teams_and_players_df.to_csv('../prep_data/prepared_dataset.csv', index=False)

teams_and_players_df.head()

PostoRebounds  PostRebounds       0.983814
PostGP         PostPF             0.974419
               PostTurnovers      0.973205
PostRebounds   PostGP             0.972877
PostPF         PostTurnovers      0.970144
PostRebounds   PostPF             0.968746
PostTurnovers  PostRebounds       0.956189
PostRebounds   PostftAttempted    0.954327
PostPF         PostGS             0.950757
PostAssists    PostGP             0.950383
dtype: float64


Unnamed: 0,year,confID,rank,playoff,firstRound,semis,finals,o_fgm,o_fga,o_ftm,o_fta,o_3pa,o_oreb,o_dreb,o_reb,o_asts,o_pf,o_stl,o_to,o_blk,d_fgm,d_fga,d_fta,d_3pm,d_3pa,d_oreb,d_reb,d_asts,d_pf,d_stl,d_to,d_blk,GP_x,attend,win_percentage,loss_percentage,home_win_percentage,home_loss_percentage,away_win_percentage,away_loss_percentage,conference_win_percentage,conference_loss_percentage,tmID_encoded,GP_y,GS,minutes,oRebounds,PF,fgAttempted,ftMade,threeAttempted,dq,PostGS,PostSteals,PostBlocks,PostftAttempted,PostthreeAttempted,PostDQ,total_rebounds,total_blocks,total_steals,total_turnovers,FG%,FT%,award,Avg_Rebounds_Per_Game,Avg_Steals_Per_Game,Avg_Turnovers_Per_Game,Usability,POSITION_METRIC,height,weight,birthYear,num_seasons,tmID
0,9,0,7,0,0.0,0.0,0.0,895,2258,542,725,598,340,737,1077,492,796,285,593,142,1014,2254,918,172,502,401,1265,684,726,310,561,134,34,141379,0.117647,0.882353,0.058824,0.941176,0.176471,0.823529,0.1,0.9,0,27.274687,14.437036,561.78728,24.271607,58.224316,197.614966,43.3131,56.01522,0.649393,0.0,0.0,0.0,0.0,0.0,0.0,78.815546,7.432506,25.12466,47.181555,0.384856,0.728837,0.0,3.11739,0.935894,1.641947,0.502972,0.37411,72.427795,158.826055,1982.416923,5.699402,ATL
1,10,0,2,1,0.5,0.0,0.0,1089,2428,569,755,374,404,855,1259,547,741,329,590,121,996,2363,807,181,530,353,1174,615,700,347,601,133,34,120737,0.529412,0.470588,0.705882,0.294118,0.352941,0.647059,0.454545,0.545455,0,30.844431,9.966259,543.728874,23.507017,52.44461,158.381905,39.892804,38.127799,0.226038,0.903553,0.636907,0.147507,4.881159,3.825022,0.0,77.269931,6.147507,22.113168,45.418334,0.40329,0.774866,0.076142,2.263962,0.683185,1.365744,0.434415,0.304803,70.392356,153.024485,1983.388474,3.929531,ATL
2,1,0,8,0,0.0,0.0,0.0,812,1903,431,577,386,305,630,935,551,713,222,496,90,879,1930,716,138,423,326,990,596,596,259,426,123,32,90963,0.25,0.75,0.3125,0.6875,0.1875,0.8125,0.238095,0.761905,1,23.833001,15.626952,571.975241,28.422399,57.381688,162.028581,40.717182,30.078099,0.94317,0.0,0.0,0.0,0.0,0.0,0.0,84.671818,7.158691,18.18993,42.973247,0.424436,0.747583,0.0,3.226214,0.711357,1.651841,0.55788,0.32566,71.228481,165.4332,1973.19126,3.904121,CHA
3,2,0,4,1,1.0,1.0,0.5,746,1780,410,528,428,309,639,948,467,605,217,474,114,732,1846,562,114,369,344,911,443,579,257,447,124,32,105525,0.5625,0.4375,0.6875,0.3125,0.4375,0.5625,0.714286,0.285714,1,25.795107,14.207187,564.738723,31.667049,57.995222,159.345183,38.382645,23.251529,0.596713,3.259939,3.950879,3.14526,9.669916,4.685589,0.0,116.319572,17.618119,22.823968,47.561544,0.391101,0.668004,0.0,3.396549,0.622552,1.416535,0.479169,0.368094,72.927561,172.179664,1974.413035,4.857416,CHA
4,3,0,2,1,0.5,0.0,0.0,770,1790,490,663,527,302,653,955,496,647,241,408,105,778,1807,598,133,372,295,915,489,600,208,424,103,32,106670,0.5625,0.4375,0.6875,0.3125,0.4375,0.5625,0.571429,0.428571,1,22.150164,14.148974,520.29438,25.296908,49.196402,152.843146,46.099911,38.021707,0.615522,0.896521,1.498365,0.240559,1.881951,3.34047,0.0,86.824859,9.169789,21.828427,32.497472,0.344791,0.699543,0.0,3.006011,0.812454,0.992726,0.475065,0.281135,72.370503,176.037913,1972.943503,5.584002,CHA


### Create rolling window (Get players from team and merge players stats from previous seasons)

Retrieve data from competition year

In [29]:
schema = config["db_11_schema"]

SELECT = "SELECT * FROM " + schema + "."
coaches = fetch(SELECT + "coaches")
players_teams = fetch(SELECT + "players_teams")
teams = fetch(SELECT + "teams")

players_teams_11_df = pd.DataFrame(players_teams, columns=['playerID', 'year', 'stint', 'tmID', 'lgID'])
teams_11_df = pd.DataFrame(teams, columns=['year', 'lgID', 'tmID', 'franchID', 'confID', 'name', 'arena', 'playoff'])
coaches_11_df = pd.DataFrame(coaches, columns=['coachID', 'year', 'tmID', 'lgID', 'stint'])

Make preprocessing for the competition year

In [30]:
binary_columns = ['confID', 'playoff']

for col in binary_columns:
    teams_11_df[col] = teams_11_df[col].replace('EA', 0)
    teams_11_df[col] = teams_11_df[col].replace('WE', 1)
    teams_11_df[col] = teams_11_df[col].replace('N', 0)
    teams_11_df[col] = teams_11_df[col].replace('Y',1)

franchise_mapping(teams_11_df, {'teams': teams_11_df, 'players_teams': players_teams_11_df, 'coaches':coaches_11_df})

players_teams_11_df = players_teams_11_df.reindex(columns=dfs['players_teams_df'].columns)
dfs["players_teams_df"] = pd.concat([dfs['players_teams_df'], players_teams_11_df])

teams_11_df = teams_11_df.reindex(columns=dfs["teams_df"].columns)
dfs["teams_df"] = pd.concat([dfs["teams_df"], teams_11_df])

coaches_11_df = coaches_11_df.reindex(columns=dfs["coaches_df"].columns)
dfs["coaches_df"] = pd.concat([dfs["coaches_df"], coaches_11_df])

{'SEA': 'SEA', 'WAS': 'WAS', 'LAS': 'LAS', 'MIN': 'MIN', 'PHO': 'PHO', 'SAS': 'SAS', 'TUL': 'DET', 'ATL': 'ATL', 'CHI': 'CHI', 'CON': 'CON', 'IND': 'IND', 'NYL': 'NYL'}


In [31]:
test_year = 11
num_previous_years = test_year - 1  # Number of previous years to consider

def weighted_mean(arr):
    num_years = len(arr)
    weights = np.arange(1, num_years + 1 ) * 1.0 / 10
    return np.sum(arr * weights) / np.sum(weights)

def weighted_average(arr, weights):
    if len(arr) != len(weights):
        raise ValueError("Array and weights must have the same length")
    return np.sum(arr * weights) / np.sum(weights)

for i in range(1, num_previous_years + 1):

    # Sort the players_teams DataFrame by player ID and season
    players_teams_sorted = dfs["players_teams_df"] .sort_values(by=['playerID', 'year'], ascending=True)
    # players_teams_sorted.to_csv('../prep_data/rolling_window/debug.csv', index=False)

    numeric_columns = players_teams_sorted.select_dtypes(include=np.number).columns.difference(['year', 'stint', 'playoff'])

    for col in numeric_columns:
        players_teams_sorted[col] = (
            players_teams_sorted.groupby('playerID')[col]
            .rolling(window=i, min_periods=1, closed='left')
            .apply(weighted_mean, raw=False)
            .reset_index(0, drop=True)
        )

    # Group by 'teamId' and 'year' and aggregate the cumulative statistics (Weighted average by time of play of previous years)
    team_players_year_stats = (
        players_teams_sorted.groupby(['tmID', 'year'])
        .apply(lambda x: pd.Series({
            col: weighted_average(x[col], x['minutes']) for col in numeric_columns
        })).reset_index()
    )

    # Apply the same to team stats
    teams_sorted = dfs["teams_df"].sort_values(by=['tmID', 'year'], ascending=True)

    numeric_columns = teams_sorted.select_dtypes(include=np.number).columns.difference(['year', 'confID', 'playoff', 'tmID_encoded'])

    for col in numeric_columns:
        teams_sorted[col] = (
            teams_sorted.groupby('tmID')[col]
            .rolling(window=i, min_periods=1, closed='left')
            .apply(weighted_mean, raw=False)
            .reset_index(0, drop=True)
        )

    df = teams_sorted.merge(team_players_year_stats, on=['tmID', 'year'], how='left', suffixes=('', f'_{i}'))

    # If first appearance of team/player on wnba, fill the stats with the average of the other rows
    # Calculate the mean of each numeric column
    average_numeric = df.select_dtypes(include=np.number).mean()

    # Fill NaN values in numeric columns with their means
    df.loc[:, df.dtypes == np.number] = df.loc[:, df.dtypes == np.number].fillna(average_numeric)


    df = df[df['year'] != 1]
    df = df.round(2)

    new_df = delete_most_correlated(df.select_dtypes(include=np.number))
    df = new_df.merge(df.select_dtypes(exclude=np.number), left_index=True, right_index=True)


    df.to_csv('../prep_data/rolling_window/data_with_' + str(i) + '_years_in_the_past.csv', index=False)
    display(df.head())

PostRebounds   PostoRebounds    0.982210
               PostPF           0.977278
PostPF         PostGP           0.974453
PostGS         PostGP           0.971195
PostPF         PostTurnovers    0.969652
PostRebounds   PostGP           0.969410
PostGS         PostRebounds     0.966571
PostRebounds   PostTurnovers    0.964973
PostGS         PostPF           0.964796
PostGP         PostTurnovers    0.963950
PostGS         PostTurnovers    0.961992
PostAssists    PostPF           0.960372
PostTurnovers  PostAssists      0.955397
PostGS         PostAssists      0.953431
PostPF         PostoRebounds    0.951237
dtype: float64


Unnamed: 0,year,confID,rank,playoff,firstRound,semis,finals,o_fgm,o_fga,o_ftm,o_fta,o_3pa,o_oreb,o_dreb,o_reb,o_asts,o_pf,o_stl,o_to,o_blk,d_fgm,d_fga,d_fta,d_3pm,d_3pa,d_oreb,d_reb,d_asts,d_pf,d_stl,d_to,d_blk,GP,attend,win_percentage,loss_percentage,home_win_percentage,home_loss_percentage,away_win_percentage,away_loss_percentage,conference_win_percentage,conference_loss_percentage,tmID_encoded,Avg_Rebounds_Per_Game,Avg_Steals_Per_Game,Avg_Turnovers_Per_Game,FG%,FT%,GP_1,GS,PF,POSITION_METRIC,PostBlocks,PostDQ,PostSteals,PostftAttempted,PostoRebounds,PostthreeAttempted,Usability,award,birthYear,dq,fgAttempted,ftMade,height,minutes,num_seasons,oRebounds,threeAttempted,total_blocks,total_rebounds,total_steals,total_turnovers,weight,tmID
0,9,0,4.03,0,0.44,0.21,0.11,860.47,2038.74,488.24,650.74,464.18,329.89,731.95,1061.84,520.96,653.28,262.68,509.8,122.5,860.24,2040.76,650.5,156.9,463.34,330.71,1061.6,520.77,653.22,263.12,509.15,121.46,33.32,141785.94,0.5,0.5,0.61,0.39,0.39,0.61,0.5,0.5,0.0,2.85,0.75,1.87,0.42,0.78,32.67,20.71,59.23,0.34,0.47,0.29,0.91,2.26,1.62,9.61,0.54,0.0,1980.61,0.0,249.16,59.34,70.68,719.78,7.51,25.9,86.85,7.79,101.46,26.39,65.99,149.91,ATL
1,10,0,7.0,1,0.0,0.0,0.0,895.0,2258.0,542.0,725.0,598.0,340.0,737.0,1077.0,492.0,796.0,285.0,593.0,142.0,1014.0,2254.0,918.0,172.0,502.0,401.0,1265.0,684.0,726.0,310.0,561.0,134.0,34.0,141379.0,0.12,0.88,0.06,0.94,0.18,0.82,0.1,0.9,0.0,3.91,0.96,1.76,0.43,0.73,30.22,20.61,74.39,0.34,0.0,0.0,0.0,0.0,0.0,0.0,0.58,0.0,1981.52,1.0,231.87,50.79,72.39,702.22,6.37,36.74,57.05,12.33,112.77,27.82,50.71,155.94,ATL
2,11,0,2.0,1,0.5,0.0,0.0,1089.0,2428.0,569.0,755.0,374.0,404.0,855.0,1259.0,547.0,741.0,329.0,590.0,121.0,996.0,2363.0,807.0,181.0,530.0,353.0,1174.0,615.0,700.0,347.0,601.0,133.0,34.0,120737.0,0.53,0.47,0.71,0.29,0.35,0.65,0.45,0.55,9.42,4.16,1.05,1.74,0.44,0.74,33.33,21.03,72.1,0.38,0.25,0.0,1.83,4.43,1.51,2.3,0.58,0.09,1983.61,0.7,264.77,56.53,72.7,779.36,4.38,42.36,41.02,15.77,145.29,37.4,64.04,165.18,ATL
4,2,0,8.0,1,0.0,0.0,0.0,812.0,1903.0,431.0,577.0,386.0,305.0,630.0,935.0,551.0,713.0,222.0,496.0,90.0,879.0,1930.0,716.0,138.0,423.0,326.0,990.0,596.0,596.0,259.0,426.0,123.0,32.0,90963.0,0.25,0.75,0.31,0.69,0.19,0.81,0.24,0.76,1.0,3.01,0.99,1.92,0.42,0.77,29.94,17.27,69.46,0.35,0.19,0.0,0.28,0.9,1.0,1.27,0.63,0.0,1972.13,0.59,228.86,53.81,70.18,763.84,5.82,26.22,66.67,9.35,91.66,30.44,59.83,157.16,CHA
5,3,0,4.0,1,1.0,1.0,0.5,746.0,1780.0,410.0,528.0,428.0,309.0,639.0,948.0,467.0,605.0,217.0,474.0,114.0,732.0,1846.0,562.0,114.0,369.0,344.0,911.0,443.0,579.0,257.0,447.0,124.0,32.0,105525.0,0.56,0.44,0.69,0.31,0.44,0.56,0.71,0.29,1.0,3.28,0.84,1.82,0.4,0.77,30.58,21.98,66.15,0.33,3.03,0.0,5.98,10.29,6.63,13.96,0.62,0.0,1972.98,0.68,220.32,48.14,70.89,777.66,6.17,34.2,58.42,15.54,124.92,32.49,69.57,163.45,CHA


PostoRebounds  PostRebounds     0.984165
PostRebounds   PostPF           0.978623
PostTurnovers  PostPF           0.977064
PostGP         PostPF           0.973681
PostRebounds   PostTurnovers    0.967500
PostTurnovers  PostGP           0.967288
PostRebounds   PostGP           0.966226
PostPF         PostGS           0.962983
PostAssists    PostPF           0.962861
PostGS         PostTurnovers    0.962427
PostRebounds   PostGS           0.962153
PostGP         PostGS           0.960967
PostTurnovers  PostAssists      0.958953
dtype: float64


Unnamed: 0,year,confID,rank,playoff,firstRound,semis,finals,o_fgm,o_fga,o_ftm,o_fta,o_3pa,o_oreb,o_dreb,o_reb,o_asts,o_pf,o_stl,o_to,o_blk,d_fgm,d_fga,d_fta,d_3pm,d_3pa,d_oreb,d_reb,d_asts,d_pf,d_stl,d_to,d_blk,GP,attend,win_percentage,loss_percentage,home_win_percentage,home_loss_percentage,away_win_percentage,away_loss_percentage,conference_win_percentage,conference_loss_percentage,tmID_encoded,Avg_Rebounds_Per_Game,Avg_Steals_Per_Game,Avg_Turnovers_Per_Game,FG%,FT%,GP_2,GS,PF,POSITION_METRIC,PostBlocks,PostDQ,PostSteals,PostftAttempted,PostthreeAttempted,Usability,award,birthYear,dq,fgAttempted,ftMade,height,minutes,num_seasons,oRebounds,threeAttempted,total_blocks,total_rebounds,total_steals,total_turnovers,weight,tmID
0,9,0,4.06,0,0.44,0.21,0.11,856.13,2027.43,485.95,648.79,460.58,329.05,726.56,1055.61,519.57,653.02,262.06,508.68,122.0,856.14,2029.57,648.8,155.27,459.01,330.0,1055.77,519.95,652.69,262.5,507.98,120.54,33.26,141962.82,0.5,0.5,0.61,0.39,0.39,0.61,0.5,0.5,0.0,2.69,0.73,1.78,0.42,0.74,31.82,19.36,59.42,0.32,0.39,0.2,1.8,4.24,10.36,0.49,0.0,1980.62,0.22,236.11,50.38,70.6,661.35,7.34,24.12,74.41,5.77,96.62,26.25,63.56,149.84,ATL
1,10,0,7.0,1,0.0,0.0,0.0,895.0,2258.0,542.0,725.0,598.0,340.0,737.0,1077.0,492.0,796.0,285.0,593.0,142.0,1014.0,2254.0,918.0,172.0,502.0,401.0,1265.0,684.0,726.0,310.0,561.0,134.0,34.0,141379.0,0.12,0.88,0.06,0.94,0.18,0.82,0.1,0.9,0.0,3.9,0.87,1.78,0.44,0.72,29.23,18.15,66.34,0.35,0.11,0.0,0.13,0.38,1.56,0.54,0.03,1981.21,0.74,210.28,46.66,72.82,634.41,6.8,36.31,46.71,12.97,110.98,24.46,48.83,156.89,ATL
2,11,0,3.67,1,0.33,0.0,0.0,1024.33,2371.33,560.0,745.0,448.67,382.67,815.67,1198.33,528.67,759.33,314.33,591.0,128.0,1002.0,2326.67,844.0,178.0,520.67,369.0,1204.33,638.0,708.67,334.67,587.67,133.33,34.0,127617.67,0.39,0.61,0.49,0.51,0.29,0.71,0.34,0.66,9.42,3.73,1.02,1.62,0.44,0.74,31.51,16.34,64.33,0.35,0.15,0.0,1.22,3.1,1.76,0.54,0.1,1983.58,0.75,225.58,48.67,72.43,688.71,4.35,36.31,40.26,12.87,120.78,33.66,55.41,163.46,ATL
4,2,0,8.0,1,0.0,0.0,0.0,812.0,1903.0,431.0,577.0,386.0,305.0,630.0,935.0,551.0,713.0,222.0,496.0,90.0,879.0,1930.0,716.0,138.0,423.0,326.0,990.0,596.0,596.0,259.0,426.0,123.0,32.0,90963.0,0.25,0.75,0.31,0.69,0.19,0.81,0.24,0.76,1.0,3.01,0.99,1.92,0.42,0.77,29.94,17.27,69.46,0.35,0.19,0.0,0.28,0.9,1.27,0.63,0.0,1972.13,0.59,228.86,53.81,70.18,763.84,5.82,26.22,66.67,9.35,91.66,30.44,59.83,157.16,CHA
5,3,0,5.33,1,0.67,0.67,0.33,768.0,1821.0,417.0,544.33,414.0,307.67,636.0,943.67,495.0,641.0,218.67,481.33,106.0,781.0,1874.0,613.33,122.0,387.0,338.0,937.33,494.0,584.67,257.67,440.0,123.67,32.0,100671.0,0.46,0.54,0.56,0.44,0.35,0.65,0.56,0.44,1.0,3.15,0.86,1.82,0.4,0.77,29.9,20.23,66.29,0.32,2.3,0.0,4.16,7.53,9.68,0.61,0.0,1972.82,0.75,214.73,50.13,70.82,746.93,6.01,31.66,57.33,14.76,111.7,30.66,64.84,162.81,CHA


PostoRebounds  PostRebounds     0.983933
PostPF         PostTurnovers    0.978750
PostRebounds   PostPF           0.976851
PostGP         PostPF           0.973785
               PostTurnovers    0.967311
PostRebounds   PostTurnovers    0.965824
PostGS         PostTurnovers    0.964326
               PostPF           0.963623
PostAssists    PostPF           0.960622
PostGP         PostRebounds     0.959959
PostGS         PostRebounds     0.958693
PostAssists    PostTurnovers    0.957304
PostGS         PostGP           0.955831
PostSteals     PostGS           0.954206
dtype: float64


Unnamed: 0,year,confID,rank,playoff,firstRound,semis,finals,o_fgm,o_fga,o_ftm,o_fta,o_3pa,o_oreb,o_dreb,o_reb,o_asts,o_pf,o_stl,o_to,o_blk,d_fgm,d_fga,d_fta,d_3pm,d_3pa,d_oreb,d_reb,d_asts,d_pf,d_stl,d_to,d_blk,GP,attend,win_percentage,loss_percentage,home_win_percentage,home_loss_percentage,away_win_percentage,away_loss_percentage,conference_win_percentage,conference_loss_percentage,tmID_encoded,Avg_Rebounds_Per_Game,Avg_Steals_Per_Game,Avg_Turnovers_Per_Game,FG%,FT%,GP_3,GS,PF,POSITION_METRIC,PostBlocks,PostDQ,PostTurnovers,PostftAttempted,PostthreeAttempted,Usability,award,birthYear,dq,fgAttempted,ftMade,height,minutes,num_seasons,oRebounds,threeAttempted,total_blocks,total_rebounds,total_steals,total_turnovers,weight,tmID
0,9,0,4.08,0,0.43,0.21,0.11,851.95,2017.58,482.96,645.41,457.05,328.2,722.16,1050.37,518.04,651.21,261.62,506.94,121.64,852.03,2019.48,645.55,153.98,455.31,329.14,1050.69,518.85,650.77,262.05,506.36,120.05,33.2,142144.47,0.5,0.5,0.61,0.39,0.39,0.61,0.5,0.5,0.0,2.7,0.73,1.75,0.41,0.74,31.33,18.76,59.74,0.31,0.32,0.15,4.25,4.97,9.84,0.49,0.0,1980.63,0.22,228.33,47.83,70.62,645.58,7.33,23.29,68.92,5.57,95.56,26.06,61.63,149.96,ATL
1,10,0,7.0,1,0.0,0.0,0.0,895.0,2258.0,542.0,725.0,598.0,340.0,737.0,1077.0,492.0,796.0,285.0,593.0,142.0,1014.0,2254.0,918.0,172.0,502.0,401.0,1265.0,684.0,726.0,310.0,561.0,134.0,34.0,141379.0,0.12,0.88,0.06,0.94,0.18,0.82,0.1,0.9,0.0,3.95,0.87,1.83,0.45,0.73,29.37,17.99,67.54,0.36,0.35,0.0,0.89,1.44,1.85,0.55,0.04,1981.03,0.72,214.36,48.83,72.83,645.11,6.93,37.16,44.25,12.76,115.33,25.06,50.79,157.08,ATL
2,11,0,3.67,1,0.33,0.0,0.0,1024.33,2371.33,560.0,745.0,448.67,382.67,815.67,1198.33,528.67,759.33,314.33,591.0,128.0,1002.0,2326.67,844.0,178.0,520.67,369.0,1204.33,638.0,708.67,334.67,587.67,133.33,34.0,127617.67,0.39,0.61,0.49,0.51,0.29,0.71,0.34,0.66,9.42,3.59,1.0,1.56,0.44,0.73,31.22,15.51,61.62,0.34,0.12,0.0,3.02,2.54,2.27,0.53,0.12,1983.61,0.65,214.24,46.77,72.35,663.74,4.31,34.84,40.86,12.07,114.36,32.32,52.33,162.92,ATL
4,2,0,8.0,1,0.0,0.0,0.0,812.0,1903.0,431.0,577.0,386.0,305.0,630.0,935.0,551.0,713.0,222.0,496.0,90.0,879.0,1930.0,716.0,138.0,423.0,326.0,990.0,596.0,596.0,259.0,426.0,123.0,32.0,90963.0,0.25,0.75,0.31,0.69,0.19,0.81,0.24,0.76,1.0,3.01,0.99,1.92,0.42,0.77,29.94,17.27,69.46,0.35,0.19,0.0,0.94,0.9,1.27,0.63,0.0,1972.13,0.59,228.86,53.81,70.18,763.84,5.82,26.22,66.67,9.35,91.66,30.44,59.83,157.16,CHA
5,3,0,5.33,1,0.67,0.67,0.33,768.0,1821.0,417.0,544.33,414.0,307.67,636.0,943.67,495.0,641.0,218.67,481.33,106.0,781.0,1874.0,613.33,122.0,387.0,338.0,937.33,494.0,584.67,257.67,440.0,123.67,32.0,100671.0,0.46,0.54,0.56,0.44,0.35,0.65,0.56,0.44,1.0,3.15,0.86,1.82,0.4,0.77,29.9,20.23,66.29,0.32,2.3,0.0,8.87,7.53,9.68,0.61,0.0,1972.82,0.75,214.73,50.13,70.82,746.93,6.01,31.66,57.33,14.76,111.7,30.66,64.84,162.81,CHA


PostoRebounds  PostRebounds     0.983755
PostPF         PostTurnovers    0.979946
PostRebounds   PostPF           0.974871
PostGP         PostPF           0.973215
PostTurnovers  PostGP           0.965955
               PostGS           0.964153
PostGS         PostPF           0.963745
PostRebounds   PostTurnovers    0.962896
PostAssists    PostPF           0.957269
PostRebounds   PostGS           0.956149
PostAssists    PostTurnovers    0.955887
PostRebounds   PostGP           0.954712
PostSteals     PostGS           0.954208
PostGP         PostGS           0.950779
PostSteals     PostTurnovers    0.950495
dtype: float64


Unnamed: 0,year,confID,rank,playoff,firstRound,semis,finals,o_fgm,o_fga,o_ftm,o_fta,o_3pa,o_oreb,o_dreb,o_reb,o_asts,o_pf,o_stl,o_to,o_blk,d_fgm,d_fga,d_fta,d_3pm,d_3pa,d_oreb,d_reb,d_asts,d_pf,d_stl,d_to,d_blk,GP,attend,win_percentage,loss_percentage,home_win_percentage,home_loss_percentage,away_win_percentage,away_loss_percentage,conference_win_percentage,conference_loss_percentage,tmID_encoded,Avg_Rebounds_Per_Game,Avg_Steals_Per_Game,Avg_Turnovers_Per_Game,FG%,FT%,GP_4,GS,PF,POSITION_METRIC,PostBlocks,PostDQ,PostftAttempted,PostthreeAttempted,Usability,award,birthYear,dq,fgAttempted,ftMade,height,minutes,num_seasons,oRebounds,threeAttempted,total_blocks,total_rebounds,total_steals,total_turnovers,weight,tmID
0,9,0,4.11,0,0.43,0.21,0.11,847.58,2007.6,480.2,642.4,453.48,327.34,718.05,1045.39,516.29,649.32,261.1,505.02,121.38,847.88,2009.59,642.4,152.8,451.95,328.21,1045.67,517.42,648.88,261.57,504.32,119.75,33.13,142427.59,0.5,0.5,0.61,0.39,0.39,0.61,0.5,0.5,0.0,2.67,0.71,1.71,0.41,0.74,30.66,17.47,58.45,0.3,0.3,0.12,5.5,9.53,0.48,0.03,1980.6,0.21,218.62,44.39,70.59,620.31,7.31,22.39,63.93,5.48,93.67,25.25,60.01,150.21,ATL
1,10,0,7.0,1,0.0,0.0,0.0,895.0,2258.0,542.0,725.0,598.0,340.0,737.0,1077.0,492.0,796.0,285.0,593.0,142.0,1014.0,2254.0,918.0,172.0,502.0,401.0,1265.0,684.0,726.0,310.0,561.0,134.0,34.0,141379.0,0.12,0.88,0.06,0.94,0.18,0.82,0.1,0.9,0.0,4.01,0.86,1.82,0.45,0.73,29.54,18.24,68.62,0.37,0.44,0.0,2.21,1.93,0.55,0.04,1980.97,0.75,218.31,50.49,72.87,656.34,6.98,37.5,42.68,12.85,118.52,25.35,51.5,157.32,ATL
2,11,0,3.67,1,0.33,0.0,0.0,1024.33,2371.33,560.0,745.0,448.67,382.67,815.67,1198.33,528.67,759.33,314.33,591.0,128.0,1002.0,2326.67,844.0,178.0,520.67,369.0,1204.33,638.0,708.67,334.67,587.67,133.33,34.0,127617.67,0.39,0.61,0.49,0.51,0.29,0.71,0.34,0.66,9.42,3.43,0.98,1.53,0.43,0.73,31.14,14.75,59.96,0.33,0.11,0.0,2.5,2.48,0.52,0.12,1983.62,0.61,207.05,45.62,72.26,652.27,4.27,33.09,41.49,11.18,109.48,31.79,51.22,162.46,ATL
4,2,0,8.0,1,0.0,0.0,0.0,812.0,1903.0,431.0,577.0,386.0,305.0,630.0,935.0,551.0,713.0,222.0,496.0,90.0,879.0,1930.0,716.0,138.0,423.0,326.0,990.0,596.0,596.0,259.0,426.0,123.0,32.0,90963.0,0.25,0.75,0.31,0.69,0.19,0.81,0.24,0.76,1.0,3.01,0.99,1.92,0.42,0.77,29.94,17.27,69.46,0.35,0.19,0.0,0.9,1.27,0.63,0.0,1972.13,0.59,228.86,53.81,70.18,763.84,5.82,26.22,66.67,9.35,91.66,30.44,59.83,157.16,CHA
5,3,0,5.33,1,0.67,0.67,0.33,768.0,1821.0,417.0,544.33,414.0,307.67,636.0,943.67,495.0,641.0,218.67,481.33,106.0,781.0,1874.0,613.33,122.0,387.0,338.0,937.33,494.0,584.67,257.67,440.0,123.67,32.0,100671.0,0.46,0.54,0.56,0.44,0.35,0.65,0.56,0.44,1.0,3.15,0.86,1.82,0.4,0.77,29.9,20.23,66.29,0.32,2.3,0.0,7.53,9.68,0.61,0.0,1972.82,0.75,214.73,50.13,70.82,746.93,6.01,31.66,57.33,14.76,111.7,30.66,64.84,162.81,CHA


PostoRebounds  PostRebounds     0.983468
PostPF         PostTurnovers    0.980058
PostRebounds   PostPF           0.973640
PostPF         PostGP           0.972199
PostGS         PostTurnovers    0.964581
PostPF         PostGS           0.964460
PostGP         PostTurnovers    0.964237
PostTurnovers  PostRebounds     0.960795
PostPF         PostAssists      0.956723
PostAssists    PostTurnovers    0.955104
PostGS         PostRebounds     0.954996
               PostSteals       0.953617
PostTurnovers  PostSteals       0.951998
PostGP         PostRebounds     0.950766
dtype: float64


Unnamed: 0,year,confID,rank,playoff,firstRound,semis,finals,o_fgm,o_fga,o_ftm,o_fta,o_3pa,o_oreb,o_dreb,o_reb,o_asts,o_pf,o_stl,o_to,o_blk,d_fgm,d_fga,d_fta,d_3pm,d_3pa,d_oreb,d_reb,d_asts,d_pf,d_stl,d_to,d_blk,GP,attend,win_percentage,loss_percentage,home_win_percentage,home_loss_percentage,away_win_percentage,away_loss_percentage,conference_win_percentage,conference_loss_percentage,tmID_encoded,Avg_Rebounds_Per_Game,Avg_Steals_Per_Game,Avg_Turnovers_Per_Game,FG%,FT%,GP_5,GS,PF,POSITION_METRIC,PostBlocks,PostDQ,PostSteals,PostftAttempted,PostthreeAttempted,Usability,award,birthYear,dq,fgAttempted,ftMade,height,minutes,num_seasons,oRebounds,threeAttempted,total_blocks,total_rebounds,total_steals,total_turnovers,weight,tmID
0,9,0,4.13,0,0.43,0.21,0.11,843.68,1998.93,477.74,639.69,450.39,326.65,714.47,1041.12,514.77,647.69,260.62,503.33,121.28,844.23,2001.14,639.57,151.81,449.2,327.49,1041.45,516.15,647.28,261.17,502.46,119.58,33.08,142793.66,0.5,0.5,0.61,0.39,0.39,0.61,0.5,0.5,0.0,2.58,0.68,1.65,0.41,0.73,30.2,15.65,57.01,0.29,0.28,0.1,2.36,5.46,9.03,0.46,0.04,1980.61,0.19,208.62,40.83,70.58,590.21,7.27,21.48,60.29,5.4,90.16,24.08,57.83,150.51,ATL
1,10,0,7.0,1,0.0,0.0,0.0,895.0,2258.0,542.0,725.0,598.0,340.0,737.0,1077.0,492.0,796.0,285.0,593.0,142.0,1014.0,2254.0,918.0,172.0,502.0,401.0,1265.0,684.0,726.0,310.0,561.0,134.0,34.0,141379.0,0.12,0.88,0.06,0.94,0.18,0.82,0.1,0.9,0.0,4.1,0.87,1.8,0.45,0.72,29.41,18.18,68.66,0.37,0.45,0.0,0.66,2.38,1.84,0.56,0.04,1980.92,0.73,219.65,51.27,72.9,657.62,7.01,37.99,40.66,13.06,121.21,25.48,51.2,157.63,ATL
2,11,0,3.67,1,0.33,0.0,0.0,1024.33,2371.33,560.0,745.0,448.67,382.67,815.67,1198.33,528.67,759.33,314.33,591.0,128.0,1002.0,2326.67,844.0,178.0,520.67,369.0,1204.33,638.0,708.67,334.67,587.67,133.33,34.0,127617.67,0.39,0.61,0.49,0.51,0.29,0.71,0.34,0.66,9.42,3.39,0.97,1.52,0.43,0.73,31.13,14.52,59.5,0.33,0.11,0.0,0.97,2.47,2.48,0.51,0.12,1983.63,0.59,203.33,45.2,72.24,648.29,4.26,32.84,40.56,10.96,108.53,31.38,50.84,162.4,ATL
4,2,0,8.0,1,0.0,0.0,0.0,812.0,1903.0,431.0,577.0,386.0,305.0,630.0,935.0,551.0,713.0,222.0,496.0,90.0,879.0,1930.0,716.0,138.0,423.0,326.0,990.0,596.0,596.0,259.0,426.0,123.0,32.0,90963.0,0.25,0.75,0.31,0.69,0.19,0.81,0.24,0.76,1.0,3.01,0.99,1.92,0.42,0.77,29.94,17.27,69.46,0.35,0.19,0.0,0.28,0.9,1.27,0.63,0.0,1972.13,0.59,228.86,53.81,70.18,763.84,5.82,26.22,66.67,9.35,91.66,30.44,59.83,157.16,CHA
5,3,0,5.33,1,0.67,0.67,0.33,768.0,1821.0,417.0,544.33,414.0,307.67,636.0,943.67,495.0,641.0,218.67,481.33,106.0,781.0,1874.0,613.33,122.0,387.0,338.0,937.33,494.0,584.67,257.67,440.0,123.67,32.0,100671.0,0.46,0.54,0.56,0.44,0.35,0.65,0.56,0.44,1.0,3.15,0.86,1.82,0.4,0.77,29.9,20.23,66.29,0.32,2.3,0.0,4.16,7.53,9.68,0.61,0.0,1972.82,0.75,214.73,50.13,70.82,746.93,6.01,31.66,57.33,14.76,111.7,30.66,64.84,162.81,CHA


PostoRebounds  PostRebounds     0.983163
PostPF         PostTurnovers    0.980153
PostRebounds   PostPF           0.973019
PostPF         PostGP           0.971590
PostGS         PostTurnovers    0.964973
PostPF         PostGS           0.964490
PostGP         PostTurnovers    0.963608
PostTurnovers  PostRebounds     0.959776
year           GP               0.959701
PostAssists    PostPF           0.955867
PostRebounds   PostGS           0.954549
PostTurnovers  PostAssists      0.954348
PostSteals     PostGS           0.953806
               PostTurnovers    0.953150
dtype: float64


Unnamed: 0,year,confID,rank,playoff,firstRound,semis,finals,o_fgm,o_fga,o_ftm,o_fta,o_3pa,o_oreb,o_dreb,o_reb,o_asts,o_pf,o_stl,o_to,o_blk,d_fgm,d_fga,d_fta,d_3pm,d_3pa,d_oreb,d_reb,d_asts,d_pf,d_stl,d_to,d_blk,GP,attend,win_percentage,loss_percentage,home_win_percentage,home_loss_percentage,away_win_percentage,away_loss_percentage,conference_win_percentage,conference_loss_percentage,tmID_encoded,Avg_Rebounds_Per_Game,Avg_Steals_Per_Game,Avg_Turnovers_Per_Game,FG%,FT%,GP_6,GS,PF,POSITION_METRIC,PostBlocks,PostDQ,PostftAttempted,PostthreeAttempted,Usability,award,birthYear,dq,fgAttempted,ftMade,height,minutes,num_seasons,oRebounds,threeAttempted,total_blocks,total_rebounds,total_steals,total_turnovers,weight,tmID
0,9,0,4.16,0,0.42,0.21,0.11,840.46,1991.98,475.7,637.46,447.87,326.15,711.52,1037.67,513.61,646.37,260.3,501.96,121.24,841.27,1994.32,637.27,151.04,446.98,326.97,1038.15,515.19,645.93,260.89,500.97,119.5,33.04,143149.86,0.5,0.5,0.61,0.39,0.39,0.61,0.5,0.5,0.0,2.53,0.67,1.64,0.41,0.73,30.15,14.87,57.12,0.29,0.28,0.09,5.35,8.71,0.46,0.04,1980.64,0.21,206.27,39.46,70.59,582.78,7.25,20.91,61.0,5.39,88.17,23.68,57.09,150.56,ATL
1,10,0,7.0,1,0.0,0.0,0.0,895.0,2258.0,542.0,725.0,598.0,340.0,737.0,1077.0,492.0,796.0,285.0,593.0,142.0,1014.0,2254.0,918.0,172.0,502.0,401.0,1265.0,684.0,726.0,310.0,561.0,134.0,34.0,141379.0,0.12,0.88,0.06,0.94,0.18,0.82,0.1,0.9,0.0,4.19,0.87,1.77,0.45,0.72,29.3,18.16,68.88,0.38,0.49,0.0,2.63,1.79,0.56,0.04,1980.87,0.73,220.43,51.35,72.9,658.48,7.03,38.54,39.28,13.45,124.04,25.68,50.77,157.79,ATL
2,11,0,3.67,1,0.33,0.0,0.0,1024.33,2371.33,560.0,745.0,448.67,382.67,815.67,1198.33,528.67,759.33,314.33,591.0,128.0,1002.0,2326.67,844.0,178.0,520.67,369.0,1204.33,638.0,708.67,334.67,587.67,133.33,34.0,127617.67,0.39,0.61,0.49,0.51,0.29,0.71,0.34,0.66,9.42,3.39,0.96,1.5,0.43,0.73,31.04,14.32,59.22,0.33,0.11,0.0,2.39,2.39,0.51,0.12,1983.63,0.59,200.35,44.81,72.23,643.45,4.25,32.95,38.79,10.99,108.41,31.21,50.37,162.5,ATL
4,2,0,8.0,1,0.0,0.0,0.0,812.0,1903.0,431.0,577.0,386.0,305.0,630.0,935.0,551.0,713.0,222.0,496.0,90.0,879.0,1930.0,716.0,138.0,423.0,326.0,990.0,596.0,596.0,259.0,426.0,123.0,32.0,90963.0,0.25,0.75,0.31,0.69,0.19,0.81,0.24,0.76,1.0,3.01,0.99,1.92,0.42,0.77,29.94,17.27,69.46,0.35,0.19,0.0,0.9,1.27,0.63,0.0,1972.13,0.59,228.86,53.81,70.18,763.84,5.82,26.22,66.67,9.35,91.66,30.44,59.83,157.16,CHA
5,3,0,5.33,1,0.67,0.67,0.33,768.0,1821.0,417.0,544.33,414.0,307.67,636.0,943.67,495.0,641.0,218.67,481.33,106.0,781.0,1874.0,613.33,122.0,387.0,338.0,937.33,494.0,584.67,257.67,440.0,123.67,32.0,100671.0,0.46,0.54,0.56,0.44,0.35,0.65,0.56,0.44,1.0,3.15,0.86,1.82,0.4,0.77,29.9,20.23,66.29,0.32,2.3,0.0,7.53,9.68,0.61,0.0,1972.82,0.75,214.73,50.13,70.82,746.93,6.01,31.66,57.33,14.76,111.7,30.66,64.84,162.81,CHA


PostoRebounds  PostRebounds     0.983123
PostPF         PostTurnovers    0.980169
               PostRebounds     0.972743
PostGP         PostPF           0.971077
PostGS         PostTurnovers    0.965097
PostPF         PostGS           0.964204
PostTurnovers  PostGP           0.963333
year           GP               0.960999
PostRebounds   PostTurnovers    0.959481
PostPF         PostAssists      0.954900
PostRebounds   PostGS           0.954197
PostGS         PostSteals       0.954144
PostTurnovers  PostSteals       0.954122
PostAssists    PostTurnovers    0.953661
minutes        Usability        0.950845
dtype: float64


Unnamed: 0,year,confID,rank,playoff,firstRound,semis,finals,o_fgm,o_fga,o_ftm,o_fta,o_3pa,o_oreb,o_dreb,o_reb,o_asts,o_pf,o_stl,o_to,o_blk,d_fgm,d_fga,d_fta,d_3pm,d_3pa,d_oreb,d_reb,d_asts,d_pf,d_stl,d_to,d_blk,GP,attend,win_percentage,loss_percentage,home_win_percentage,home_loss_percentage,away_win_percentage,away_loss_percentage,conference_win_percentage,conference_loss_percentage,tmID_encoded,Avg_Rebounds_Per_Game,Avg_Steals_Per_Game,Avg_Turnovers_Per_Game,FG%,FT%,GP_7,GS,PF,POSITION_METRIC,PostBlocks,PostDQ,PostSteals,PostftAttempted,PostthreeAttempted,Usability,award,birthYear,dq,fgAttempted,ftMade,height,minutes,num_seasons,oRebounds,threeAttempted,total_blocks,total_rebounds,total_steals,total_turnovers,weight,tmID
0,9,0,4.17,0,0.42,0.21,0.11,837.84,1986.32,474.07,635.71,445.78,325.74,709.09,1034.83,512.61,645.38,260.04,500.87,121.21,838.89,1988.76,635.47,150.41,445.12,326.55,1035.5,514.4,644.92,260.64,499.8,119.46,33.02,143429.9,0.5,0.5,0.6,0.4,0.39,0.61,0.5,0.5,0.0,2.5,0.66,1.62,0.41,0.72,29.89,14.15,56.39,0.29,0.28,0.07,2.25,5.16,8.36,0.45,0.04,1980.69,0.21,201.36,37.95,70.62,570.49,7.22,20.32,60.67,5.38,85.61,23.09,55.74,150.65,ATL
1,10,0,7.0,1,0.0,0.0,0.0,895.0,2258.0,542.0,725.0,598.0,340.0,737.0,1077.0,492.0,796.0,285.0,593.0,142.0,1014.0,2254.0,918.0,172.0,502.0,401.0,1265.0,684.0,726.0,310.0,561.0,134.0,34.0,141379.0,0.12,0.88,0.06,0.94,0.18,0.82,0.1,0.9,0.0,4.23,0.87,1.75,0.45,0.72,29.34,18.24,68.66,0.39,0.5,0.0,0.83,2.82,1.87,0.56,0.05,1980.84,0.69,222.4,51.5,72.88,662.02,7.05,38.54,39.78,13.48,125.39,25.99,50.69,157.78,ATL
2,11,0,3.67,1,0.33,0.0,0.0,1024.33,2371.33,560.0,745.0,448.67,382.67,815.67,1198.33,528.67,759.33,314.33,591.0,128.0,1002.0,2326.67,844.0,178.0,520.67,369.0,1204.33,638.0,708.67,334.67,587.67,133.33,34.0,127617.67,0.39,0.61,0.49,0.51,0.29,0.71,0.34,0.66,9.42,3.39,0.96,1.49,0.43,0.72,30.94,14.12,59.09,0.33,0.11,0.0,1.01,2.3,2.31,0.51,0.12,1983.62,0.6,198.2,44.34,72.23,639.91,4.24,33.06,37.71,10.99,108.27,31.09,49.86,162.55,ATL
4,2,0,8.0,1,0.0,0.0,0.0,812.0,1903.0,431.0,577.0,386.0,305.0,630.0,935.0,551.0,713.0,222.0,496.0,90.0,879.0,1930.0,716.0,138.0,423.0,326.0,990.0,596.0,596.0,259.0,426.0,123.0,32.0,90963.0,0.25,0.75,0.31,0.69,0.19,0.81,0.24,0.76,1.0,3.01,0.99,1.92,0.42,0.77,29.94,17.27,69.46,0.35,0.19,0.0,0.28,0.9,1.27,0.63,0.0,1972.13,0.59,228.86,53.81,70.18,763.84,5.82,26.22,66.67,9.35,91.66,30.44,59.83,157.16,CHA
5,3,0,5.33,1,0.67,0.67,0.33,768.0,1821.0,417.0,544.33,414.0,307.67,636.0,943.67,495.0,641.0,218.67,481.33,106.0,781.0,1874.0,613.33,122.0,387.0,338.0,937.33,494.0,584.67,257.67,440.0,123.67,32.0,100671.0,0.46,0.54,0.56,0.44,0.35,0.65,0.56,0.44,1.0,3.15,0.86,1.82,0.4,0.77,29.9,20.23,66.29,0.32,2.3,0.0,4.16,7.53,9.68,0.61,0.0,1972.82,0.75,214.73,50.13,70.82,746.93,6.01,31.66,57.33,14.76,111.7,30.66,64.84,162.81,CHA


PostRebounds   PostoRebounds    0.983227
PostTurnovers  PostPF           0.980152
PostPF         PostRebounds     0.972537
PostGP         PostPF           0.970696
PostGS         PostTurnovers    0.965523
PostPF         PostGS           0.964222
PostTurnovers  PostGP           0.963194
PostRebounds   PostTurnovers    0.959430
year           GP               0.955635
PostSteals     PostTurnovers    0.954711
               PostGS           0.954434
PostRebounds   PostGS           0.954204
PostAssists    PostPF           0.954155
PostTurnovers  PostAssists      0.953135
Usability      minutes          0.951741
dtype: float64


Unnamed: 0,year,confID,rank,playoff,firstRound,semis,finals,o_fgm,o_fga,o_ftm,o_fta,o_3pa,o_oreb,o_dreb,o_reb,o_asts,o_pf,o_stl,o_to,o_blk,d_fgm,d_fga,d_fta,d_3pm,d_3pa,d_oreb,d_reb,d_asts,d_pf,d_stl,d_to,d_blk,GP,attend,win_percentage,loss_percentage,home_win_percentage,home_loss_percentage,away_win_percentage,away_loss_percentage,conference_win_percentage,conference_loss_percentage,tmID_encoded,Avg_Rebounds_Per_Game,Avg_Steals_Per_Game,Avg_Turnovers_Per_Game,FG%,FT%,GP_8,GS,PF,POSITION_METRIC,PostBlocks,PostDQ,PostftAttempted,PostoRebounds,PostthreeAttempted,Usability,award,birthYear,dq,fgAttempted,ftMade,height,minutes,num_seasons,oRebounds,threeAttempted,total_blocks,total_rebounds,total_steals,total_turnovers,weight,tmID
0,9,0,4.19,0,0.42,0.2,0.11,835.93,1982.07,472.89,634.48,444.17,325.42,707.28,1032.7,511.85,644.65,259.84,500.07,121.16,837.15,1984.59,634.14,149.91,443.7,326.2,1033.44,513.79,644.2,260.45,498.94,119.41,32.99,143601.36,0.5,0.5,0.6,0.4,0.39,0.61,0.5,0.5,0.0,2.5,0.66,1.61,0.4,0.72,29.74,13.8,56.25,0.29,0.28,0.06,4.99,1.29,8.07,0.45,0.05,1980.72,0.24,199.81,37.26,70.64,565.23,7.21,20.18,60.89,5.41,84.5,22.91,55.09,150.68,ATL
1,10,0,7.0,1,0.0,0.0,0.0,895.0,2258.0,542.0,725.0,598.0,340.0,737.0,1077.0,492.0,796.0,285.0,593.0,142.0,1014.0,2254.0,918.0,172.0,502.0,401.0,1265.0,684.0,726.0,310.0,561.0,134.0,34.0,141379.0,0.12,0.88,0.06,0.94,0.18,0.82,0.1,0.9,0.0,4.27,0.88,1.75,0.45,0.72,29.35,18.51,68.69,0.39,0.51,0.0,2.9,0.91,1.91,0.56,0.05,1980.82,0.69,225.07,52.09,72.89,665.95,7.06,38.76,39.96,13.52,126.98,26.17,51.01,157.87,ATL
2,11,0,3.67,1,0.33,0.0,0.0,1024.33,2371.33,560.0,745.0,448.67,382.67,815.67,1198.33,528.67,759.33,314.33,591.0,128.0,1002.0,2326.67,844.0,178.0,520.67,369.0,1204.33,638.0,708.67,334.67,587.67,133.33,34.0,127617.67,0.39,0.61,0.49,0.51,0.29,0.71,0.34,0.66,9.42,3.39,0.96,1.49,0.43,0.72,30.95,14.21,59.25,0.33,0.11,0.0,2.32,0.81,2.33,0.51,0.12,1983.6,0.6,198.76,44.34,72.21,641.6,4.26,33.14,37.84,10.97,108.41,31.14,49.89,162.48,ATL
4,2,0,8.0,1,0.0,0.0,0.0,812.0,1903.0,431.0,577.0,386.0,305.0,630.0,935.0,551.0,713.0,222.0,496.0,90.0,879.0,1930.0,716.0,138.0,423.0,326.0,990.0,596.0,596.0,259.0,426.0,123.0,32.0,90963.0,0.25,0.75,0.31,0.69,0.19,0.81,0.24,0.76,1.0,3.01,0.99,1.92,0.42,0.77,29.94,17.27,69.46,0.35,0.19,0.0,0.9,1.0,1.27,0.63,0.0,1972.13,0.59,228.86,53.81,70.18,763.84,5.82,26.22,66.67,9.35,91.66,30.44,59.83,157.16,CHA
5,3,0,5.33,1,0.67,0.67,0.33,768.0,1821.0,417.0,544.33,414.0,307.67,636.0,943.67,495.0,641.0,218.67,481.33,106.0,781.0,1874.0,613.33,122.0,387.0,338.0,937.33,494.0,584.67,257.67,440.0,123.67,32.0,100671.0,0.46,0.54,0.56,0.44,0.35,0.65,0.56,0.44,1.0,3.15,0.86,1.82,0.4,0.77,29.9,20.23,66.29,0.32,2.3,0.0,7.53,4.8,9.68,0.61,0.0,1972.82,0.75,214.73,50.13,70.82,746.93,6.01,31.66,57.33,14.76,111.7,30.66,64.84,162.81,CHA


PostoRebounds  PostRebounds     0.983243
PostPF         PostTurnovers    0.980192
               PostRebounds     0.972551
               PostGP           0.970607
PostGS         PostTurnovers    0.965571
               PostPF           0.964043
PostTurnovers  PostGP           0.963206
PostRebounds   PostTurnovers    0.959471
PostSteals     PostTurnovers    0.955018
               PostGS           0.954583
PostRebounds   PostGS           0.954204
PostPF         PostAssists      0.954021
PostAssists    PostTurnovers    0.952874
Usability      minutes          0.951126
dtype: float64


Unnamed: 0,year,confID,rank,playoff,firstRound,semis,finals,o_fgm,o_fga,o_ftm,o_fta,o_3pa,o_oreb,o_dreb,o_reb,o_asts,o_pf,o_stl,o_to,o_blk,d_fgm,d_fga,d_fta,d_3pm,d_3pa,d_oreb,d_reb,d_asts,d_pf,d_stl,d_to,d_blk,GP,attend,win_percentage,loss_percentage,home_win_percentage,home_loss_percentage,away_win_percentage,away_loss_percentage,conference_win_percentage,conference_loss_percentage,tmID_encoded,Avg_Rebounds_Per_Game,Avg_Steals_Per_Game,Avg_Turnovers_Per_Game,FG%,FT%,GP_9,GS,PF,POSITION_METRIC,PostBlocks,PostDQ,PostGP,PostftAttempted,PostthreeAttempted,Usability,award,birthYear,dq,fgAttempted,ftMade,height,minutes,num_seasons,oRebounds,threeAttempted,total_blocks,total_rebounds,total_steals,total_turnovers,weight,tmID
0,9,0,4.19,0,0.42,0.2,0.11,834.76,1979.44,472.16,633.71,443.1,325.21,706.14,1031.35,511.37,644.2,259.73,499.64,121.11,836.1,1982.01,633.32,149.61,442.81,325.98,1032.15,513.46,643.75,260.35,498.46,119.37,32.98,143677.89,0.5,0.5,0.6,0.4,0.39,0.61,0.5,0.5,0.0,2.5,0.66,1.61,0.4,0.72,29.74,13.8,56.25,0.29,0.28,0.06,3.41,4.99,8.07,0.45,0.05,1980.72,0.24,199.81,37.26,70.64,565.23,7.21,20.18,60.89,5.41,84.5,22.91,55.09,150.68,ATL
1,10,0,7.0,1,0.0,0.0,0.0,895.0,2258.0,542.0,725.0,598.0,340.0,737.0,1077.0,492.0,796.0,285.0,593.0,142.0,1014.0,2254.0,918.0,172.0,502.0,401.0,1265.0,684.0,726.0,310.0,561.0,134.0,34.0,141379.0,0.12,0.88,0.06,0.94,0.18,0.82,0.1,0.9,0.0,4.27,0.88,1.75,0.45,0.72,29.35,18.51,68.69,0.39,0.51,0.0,1.44,2.9,1.91,0.56,0.05,1980.82,0.69,225.07,52.09,72.89,665.95,7.06,38.76,39.96,13.52,126.98,26.17,51.01,157.87,ATL
2,11,0,3.67,1,0.33,0.0,0.0,1024.33,2371.33,560.0,745.0,448.67,382.67,815.67,1198.33,528.67,759.33,314.33,591.0,128.0,1002.0,2326.67,844.0,178.0,520.67,369.0,1204.33,638.0,708.67,334.67,587.67,133.33,34.0,127617.67,0.39,0.61,0.49,0.51,0.29,0.71,0.34,0.66,9.42,3.39,0.96,1.49,0.43,0.72,30.92,14.27,59.27,0.33,0.11,0.0,1.22,2.33,2.34,0.51,0.12,1983.6,0.6,198.9,44.35,72.21,641.92,4.26,33.14,37.87,10.96,108.44,31.15,49.88,162.46,ATL
4,2,0,8.0,1,0.0,0.0,0.0,812.0,1903.0,431.0,577.0,386.0,305.0,630.0,935.0,551.0,713.0,222.0,496.0,90.0,879.0,1930.0,716.0,138.0,423.0,326.0,990.0,596.0,596.0,259.0,426.0,123.0,32.0,90963.0,0.25,0.75,0.31,0.69,0.19,0.81,0.24,0.76,1.0,3.01,0.99,1.92,0.42,0.77,29.94,17.27,69.46,0.35,0.19,0.0,0.92,0.9,1.27,0.63,0.0,1972.13,0.59,228.86,53.81,70.18,763.84,5.82,26.22,66.67,9.35,91.66,30.44,59.83,157.16,CHA
5,3,0,5.33,1,0.67,0.67,0.33,768.0,1821.0,417.0,544.33,414.0,307.67,636.0,943.67,495.0,641.0,218.67,481.33,106.0,781.0,1874.0,613.33,122.0,387.0,338.0,937.33,494.0,584.67,257.67,440.0,123.67,32.0,100671.0,0.46,0.54,0.56,0.44,0.35,0.65,0.56,0.44,1.0,3.15,0.86,1.82,0.4,0.77,29.9,20.23,66.29,0.32,2.3,0.0,4.92,7.53,9.68,0.61,0.0,1972.82,0.75,214.73,50.13,70.82,746.93,6.01,31.66,57.33,14.76,111.7,30.66,64.84,162.81,CHA


PostoRebounds  PostRebounds     0.983246
PostPF         PostTurnovers    0.980212
               PostRebounds     0.972609
               PostGP           0.970532
PostGS         PostTurnovers    0.965537
PostPF         PostGS           0.964116
PostGP         PostTurnovers    0.963191
PostTurnovers  PostRebounds     0.959518
PostSteals     PostTurnovers    0.955090
               PostGS           0.954734
PostRebounds   PostGS           0.954410
PostPF         PostAssists      0.953957
PostTurnovers  PostAssists      0.952816
Usability      minutes          0.951039
dtype: float64


Unnamed: 0,year,confID,rank,playoff,firstRound,semis,finals,o_fgm,o_fga,o_ftm,o_fta,o_3pa,o_oreb,o_dreb,o_reb,o_asts,o_pf,o_stl,o_to,o_blk,d_fgm,d_fga,d_fta,d_3pm,d_3pa,d_oreb,d_reb,d_asts,d_pf,d_stl,d_to,d_blk,GP,attend,win_percentage,loss_percentage,home_win_percentage,home_loss_percentage,away_win_percentage,away_loss_percentage,conference_win_percentage,conference_loss_percentage,tmID_encoded,Avg_Rebounds_Per_Game,Avg_Steals_Per_Game,Avg_Turnovers_Per_Game,FG%,FT%,GP_10,GS,PF,POSITION_METRIC,PostAssists,PostBlocks,PostDQ,PostftAttempted,PostthreeAttempted,Usability,award,birthYear,dq,fgAttempted,ftMade,height,minutes,num_seasons,oRebounds,threeAttempted,total_blocks,total_rebounds,total_steals,total_turnovers,weight,tmID
0,9,0,4.2,0,0.42,0.2,0.11,834.23,1978.22,471.85,633.4,442.57,325.12,705.6,1030.73,511.15,644.02,259.68,499.47,121.07,835.63,1980.82,632.97,149.47,442.4,325.87,1031.54,513.32,643.59,260.3,498.27,119.34,32.97,143710.73,0.5,0.5,0.6,0.4,0.39,0.61,0.5,0.5,0.0,2.5,0.66,1.61,0.4,0.72,29.74,13.8,56.25,0.29,5.68,0.28,0.06,4.99,8.07,0.45,0.05,1980.72,0.24,199.81,37.26,70.64,565.23,7.21,20.18,60.89,5.41,84.5,22.91,55.09,150.68,ATL
1,10,0,7.0,1,0.0,0.0,0.0,895.0,2258.0,542.0,725.0,598.0,340.0,737.0,1077.0,492.0,796.0,285.0,593.0,142.0,1014.0,2254.0,918.0,172.0,502.0,401.0,1265.0,684.0,726.0,310.0,561.0,134.0,34.0,141379.0,0.12,0.88,0.06,0.94,0.18,0.82,0.1,0.9,0.0,4.27,0.88,1.75,0.45,0.72,29.35,18.51,68.69,0.39,1.47,0.51,0.0,2.9,1.91,0.56,0.05,1980.82,0.69,225.07,52.09,72.89,665.95,7.06,38.76,39.96,13.52,126.98,26.17,51.01,157.87,ATL
2,11,0,3.67,1,0.33,0.0,0.0,1024.33,2371.33,560.0,745.0,448.67,382.67,815.67,1198.33,528.67,759.33,314.33,591.0,128.0,1002.0,2326.67,844.0,178.0,520.67,369.0,1204.33,638.0,708.67,334.67,587.67,133.33,34.0,127617.67,0.39,0.61,0.49,0.51,0.29,0.71,0.34,0.66,9.42,3.39,0.96,1.49,0.43,0.72,30.92,14.27,59.27,0.33,2.25,0.11,0.0,2.33,2.34,0.51,0.12,1983.6,0.6,198.9,44.35,72.21,641.92,4.26,33.14,37.87,10.96,108.44,31.15,49.88,162.46,ATL
4,2,0,8.0,1,0.0,0.0,0.0,812.0,1903.0,431.0,577.0,386.0,305.0,630.0,935.0,551.0,713.0,222.0,496.0,90.0,879.0,1930.0,716.0,138.0,423.0,326.0,990.0,596.0,596.0,259.0,426.0,123.0,32.0,90963.0,0.25,0.75,0.31,0.69,0.19,0.81,0.24,0.76,1.0,3.01,0.99,1.92,0.42,0.77,29.94,17.27,69.46,0.35,0.74,0.19,0.0,0.9,1.27,0.63,0.0,1972.13,0.59,228.86,53.81,70.18,763.84,5.82,26.22,66.67,9.35,91.66,30.44,59.83,157.16,CHA
5,3,0,5.33,1,0.67,0.67,0.33,768.0,1821.0,417.0,544.33,414.0,307.67,636.0,943.67,495.0,641.0,218.67,481.33,106.0,781.0,1874.0,613.33,122.0,387.0,338.0,937.33,494.0,584.67,257.67,440.0,123.67,32.0,100671.0,0.46,0.54,0.56,0.44,0.35,0.65,0.56,0.44,1.0,3.15,0.86,1.82,0.4,0.77,29.9,20.23,66.29,0.32,10.18,2.3,0.0,7.53,9.68,0.61,0.0,1972.82,0.75,214.73,50.13,70.82,746.93,6.01,31.66,57.33,14.76,111.7,30.66,64.84,162.81,CHA


In [32]:
connection.close()