In [2]:
import pandas as pd
df_season = pd.read_csv('players_season.csv')
df_playoffs = pd.read_csv('players_playoffs.csv')

Do these things:
* split Unnamed column to 'id' and 'Year' Columns
* move the new columns to the start and delete Unnamed column
* Delete all the players that didn't play in the NBA
* Convert height to cm and weight to kg

In [3]:
def get_id(s):
    return s.split(" ")[0]


def get_year(s):
    return s.split(" ")[1]


def lbs_to_kg(weight):
    if weight is not None and weight != "":
        return 0.45359237 * weight


def feet_to_cm(height):
    if height is not None and height != "":
        feet = height.split("-")[0]
        inches = height.split("-")[1]
        return 30.48 * float(feet) + 2.54 * float(inches)

In [4]:
def format_df(df):
    # get name and year from
    df['id'] = df['Unnamed: 0'].apply(get_id)
    df['Year'] = df['Unnamed: 0'].apply(get_year)
    # move id and year to start of table
    cols = df.columns.tolist()
    cols = [cols[-2]] + cols[1:6] + [cols[-1]] + cols[6:-2]
    df = df[cols]
    # delete everyone that is not in the NBA
    df = df[df['Lg'] == 'NBA']
    # convert height and weight to cm and kg (respectively)
    df['Height'] = df['Height'].apply(feet_to_cm)
    df['Weight'] = df['Weight'].apply(lbs_to_kg)
    return df

In [5]:
df_season = format_df(df_season)
df_playoffs = format_df(df_playoffs)

Read Teams dataframe

In [6]:
df_teams = pd.read_csv('Teams.csv')

* Rename Unnamed column to Tm
* keep only teams with a positive Win%

In [7]:
df_teams = df_teams.rename(columns={"Unnamed: 0": "Tm"})

Add win rate to df_season 

In [8]:
def get_win_rate(flag):
    if flag == "TOT":
        win_rate = 48
    else:
        win_rate = df_teams[df_teams['Tm'] == flag].iloc[0]['WR']
    return win_rate

df_season['TmWin%'] = df_season['Tm'].apply(get_win_rate)

Convert all numeric cols to float

In [9]:
cols_to_convert = [
        "From", "To", "Height", "Weight", "Year", "Age", "G", "GS", "MP", "FG", "FGA", "FG%", "3P", "3PA", "3P%", "2P",
        "2PA", "2P%", "eFG%", "FT", "FTA", "FT%", "ORB", "DRB", "TRB", "AST", "STL", "BLK", "TOV", "PF", "PTS", "TmWin%"
    ]
df_season[cols_to_convert] = df_season[cols_to_convert].apply(pd.to_numeric)
df_playoffs[cols_to_convert[:-1]] = df_playoffs[cols_to_convert[:-1]].apply(pd.to_numeric)

filter players by these filters:
#### Season:
Games >= 40

Team win rate >= 48%

year >= 1978

#### Playoffs:
Games >= 3

year >= 1978

df_season = df_season[(df_season['G'] >= 40) & (df_season['Year'] >= 1978) & (df_season['TmWin%'] >= 48)]
df_playoffs = df_playoffs[(df_playoffs['G'] >= 3) & (df_playoffs['Year'] >= 1978)]

if 2p stats are NaN replace fg

fill NaN with zeros

df_season = df_season.fillna(0)
df_playoffs = df_playoffs.fillna(0)

In [10]:
df_season = df_season.fillna(0)
df_playoffs = df_playoffs.fillna(0)

In [11]:
df_season = df_season.fillna(0)
df_playoffs = df_playoffs.fillna(0)

In [12]:
### Constants

### Formula

In [13]:
def calc_score(player_stats):
    p3_in = player_stats['3P']
    p3_ratio = player_stats['3P%']

    p2_in = player_stats['2P']
    p2_ratio = player_stats['2P%']

    ft_in = player_stats['FT']
    ft_ratio = player_stats['FT%']

    p3_on_me = 0
    p3_ratio_on_me = 0

    p2_on_me = 0
    p2_ratio_on_me = 0

    ft_on_me = 0
    ft_ratio_on_me = 0

    assists = player_stats['AST']
    d_rebounds = player_stats['DRB']
    off_rebound = player_stats['ORB']
    steals = player_stats['STL']
    blocks = player_stats['BLK']
    turnovers = player_stats['TOV']

    p3_league_attack_ratio = LeagueStats.p3_league_attack_ratio
    p2_league_attack_ratio = LeagueStats.p2_league_attack_ratio
    ft_league_attack_ratio = LeagueStats.ft_league_attack_ratio

    p3_league_ratio = LeagueStats.p3_league_ratio
    p2_league_ratio = LeagueStats.p2_league_ratio
    ft_league_ratio = LeagueStats.ft_league_ratio

    if assists <= LeagueStats.ast_min_val:
        assists = LeagueStats.ast_min_val

    if p3_ratio >= LeagueStats.good_shooter_minimum_ratio and p3_in >= LeagueStats.good_shooter_minimum_3p:
        p3_multiplier = LeagueStats.good_shooter_3p_multiplier
    else:
        p3_multiplier = 3

    z1 = 3 * p3_league_attack_ratio * ((p3_league_ratio + LeagueStats.stl_p3) ** 2) + 2 * p2_league_attack_ratio * (
            (p2_league_ratio + LeagueStats.stl_p2) ** 2) + 2 * (
                 ft_league_ratio ** 2) * ft_league_attack_ratio - LeagueStats.block_chance * (
                 3 * LeagueStats.p3_league_attack_ratio * (
                 LeagueStats.p3_league_ratio ** 2) + 2 * LeagueStats.p2_league_attack_ratio * (
                         p2_league_ratio ** 2))
    z2 = 3 * p3_league_attack_ratio * ((p3_league_ratio + LeagueStats.tov_p3) ** 2) + 2 * p2_league_attack_ratio * (
            (p2_league_ratio + LeagueStats.tov_p2) ** 2) + 2 * (
                 ft_league_ratio ** 2) * ft_league_attack_ratio - LeagueStats.block_chance * (
                 3 * LeagueStats.p3_league_attack_ratio * (
                 LeagueStats.p3_league_ratio ** 2) + 2 * LeagueStats.p2_league_attack_ratio * (
                         p2_league_ratio ** 2))

    tov_value = (z2 - LeagueStats.stl_chance * z1) / (1 + LeagueStats.tov_chance * LeagueStats.stl_chance)

    stl_value = z1 - LeagueStats.tov_chance * tov_value
    assist_val = 0.66 * (
            3 * LeagueStats.p3_league_ratio * LeagueStats.p3_league_attack_from_assist_ratio + 2 * LeagueStats.p2_league_ratio * LeagueStats.p2_league_attack_from_assist_ratio)
    d_rebound_val = 3 * p3_league_attack_ratio * (p3_league_ratio ** 2) + 2 * p2_league_attack_ratio * (
            p2_league_ratio ** 2) + 2 * ft_league_ratio * (
                            ft_league_attack_ratio ** 2) - LeagueStats.block_chance * (
                            3 * LeagueStats.p3_league_attack_ratio * (
                            LeagueStats.p3_league_ratio ** 2) + 2 * LeagueStats.p2_league_attack_ratio * (
                                    LeagueStats.p2_league_ratio ** 2)) - LeagueStats.tov_chance * tov_value
    off_rebound_val = 3 * p3_league_attack_ratio * (
            (p3_league_ratio + LeagueStats.orb_p3) ** 2) + 2 * p2_league_attack_ratio * (
                              (p2_league_ratio + LeagueStats.orb_p2) ** 2) + 2 * (
                              ft_league_ratio ** 2) * ft_league_attack_ratio - LeagueStats.block_chance * (
                              3 * LeagueStats.p3_league_attack_ratio * (
                              LeagueStats.p3_league_ratio ** 2) + 2 * LeagueStats.p2_league_attack_ratio * (
                                      LeagueStats.p2_league_ratio ** 2)) - LeagueStats.tov_chance * tov_value
    block_val = 0.57 * d_rebound_val

    total = p3_multiplier * p3_in * p3_ratio + 2 * p2_in * p2_ratio + 1 * ft_in * ft_ratio + assist_val * assists + d_rebound_val * d_rebounds + off_rebound_val * off_rebound + stl_value * steals + block_val * blocks - tov_value * (
            turnovers - LeagueStats.ast_tov_ratio * assists) - (
                    3 * p3_on_me * p3_ratio_on_me + 2 * p2_on_me * p2_ratio_on_me + 1 * ft_on_me * ft_ratio_on_me)

    return total

Add scores to tables

In [14]:
df_season['score2'] = df_season.apply(calc_score, axis=1)
df_playoffs['score2'] = df_playoffs.apply(calc_score, axis=1)