In [None]:
#importing modules

import pandas as pd
import numpy as np
from bs4 import BeautifulSoup as bs
import requests
from csv import writer


In [None]:
#loading CSV File of Player Stats into DataFrame
players = pd.read_csv("Seasons_Stats.csv",
                    delimiter = ',',
                   index_col=[0])


#Drop columns with many NaN values
players.drop(columns= [x for x in players if (round((players[x].isna().sum()/len(players)*100),2) > 50)],inplace = True)
players = players[players.isnull().sum(axis=1) < 40]

#Choosing relevant stats and creating a different Dataframe
stats = players[['Player','G','PER','TS%','WS/48','TRB','AST','PTS']]




In [None]:
#Declaring NBA League Criteria for All-time Greats
def league_criteria(x):
    rows = x[x['G'] < 400].index
    x.drop(rows, inplace = True)

In [None]:
#Grouping year-wise stats of players into single row
stats = stats.groupby(['Player']).agg({'G':'sum',
                                             'PER':'mean',
                                             'TS%':'mean',
                                             'WS/48':'mean',
                                             'TRB':'sum',
                                             'AST':'sum',
                                             'PTS':'sum'}).reset_index()

league_criteria(stats)


    

In [None]:
points = stats[['Player','G','TRB','AST','PTS','WS/48','TS%','PER']]


points['Points Per Game'] = points.apply(lambda row: row.PTS / row.G, axis=1)
points['Assists Per Game'] = points.apply(lambda row: row.AST / row.G, axis=1)
points['Rebounds Per Game'] = points.apply(lambda row: row.TRB / row.G, axis=1)



In [None]:
#Point scoring function
def scores(rank):
    return (20 - ((rank-1))) if rank < 21 else 0

    
#Rank function        
def tally(col_source,col_target):
    global points
    
    points[col_target] = points[col_source].rank(method = 'average', ascending = False)
    points[col_target] = points[col_target].apply(scores)
    
        



tally('Points Per Game','PPG_Scores')
tally('Assists Per Game','APG_Scores')
tally('Rebounds Per Game','RPG_Scores')
tally('WS/48','WS48_Scores')
tally('TS%','TS_Scores')
tally('PER','PER_Scores')

#Adding points of criteria calculated so far
points['TOTAL'] = points.iloc[:,11:17].sum(axis=1)
points = points.sort_values(by="TOTAL",ascending=False)

points = points.reset_index(drop = True)



           

In [None]:
points_eligible = points.sort_values(by = 'TOTAL', ascending = False)
points_eligible['Player'] = points_eligible['Player'].str.replace(r'*', '')


In [None]:
#Scraping data of Championships won by players

url_1 = "https://www.landofbasketball.com/championships/players_with_most_titles.htm"
page = requests.get(url_1)

soup = bs(page.content,'html.parser')
champ = soup.find('main', class_ = "main-content")
player = champ.find_all('tr', class_ = ['a-top bt','a-top'])

with open('Championships.csv','w',encoding = 'utf8', newline = '') as f:
    thewriter = writer(f)
    header = ['Player','Championships']
    thewriter.writerow(header)
    for r1 in player:
        pl_name = r1.find('a').text
        pl_count = r1.find('div', style = "width: 70px").text
        info = [pl_name, pl_count]
        thewriter.writerow(info)
        
champions = pd.read_csv("Championships.csv",
                    delimiter = ',')


In [None]:
points_eligible = pd.merge(points_eligible,champions, on='Player', how='left')

In [None]:
#Creating DataFrame for Finals MVP list of players

fmvp_name = ['Bill Russell', 'Michael Jordan', 'LeBron James', 'Tim Duncan', "Shaquille O'Neal", 'Magic Johnson',
             'Kareem Abdul-Jabbar', 'Larry Bird', 'Kawhi Leonard','Kevin Durant', 'Kobe Bryant', 'Hakeem Olajuwon', 'Willis Reed', 
             'Giannis Antetokounmpo', 'Rick Barry', 'Chauncey Billups','Wilt Chamberlain', 'Stephen Curry', 'Joe Dumars',
             'John Havlicek', 'Andre Iguodala', 'Dennis Johnson', 'Moses Malone','Cedric Maxwell', 'Dirk Nowitzki',
             'Tony Parker', 'Paul Pierce', 'Isiah Thomas', 'Wes Unseld', 'Dwyane Wade', 'Bill Walton', 'Jerry West', 
             'Jo Jo White', 'James Worthy']

count1 = [9,6,4,3,3,3,2,2,2,2,2,2,2]
count2 = [1]*21
fmvp_count = count1+count2

fmvp_list = list(zip(fmvp_name, fmvp_count))
FMVPs = pd.DataFrame(fmvp_list, columns=['Player', 'FMVPs'])








In [None]:
points_eligible = pd.merge(points_eligible,FMVPs, on='Player', how='left')

In [None]:
#Importing and adding data for regular season MVPs

RMVPs = pd.read_html("https://www.basketball-reference.com/awards/finals_mvp.html")

for idx, table in enumerate(RMVPs):
    print("*************************")
    print(idx)
    print(table)

In [None]:
RMVPS = RMVPs[1]
RMVPS = RMVPS.drop('Lg', axis = 1)
RMVPS = RMVPS.rename(columns = {'Count':'RMVPs'})


In [None]:
points_eligible = pd.merge(points_eligible,RMVPS, on='Player', how='left')

In [None]:
#Importing and adding data for all-NBA Team Selections

All_NBA = pd.read_html("https://www.basketball-reference.com/awards/all_league_by_player.html")

for idx, table in enumerate(All_NBA):
    print("*************************")
    print(idx)
    print(table)

In [None]:
all_nba = All_NBA[0]
all_nba.columns = all_nba.columns.droplevel(0)
all_nba = all_nba.rename(columns = {'Tot':'All NBA Team Selections'})
all_nba = all_nba[['Player','All NBA Team Selections']]
all_nba = all_nba.iloc[:, :-2]

In [None]:
points_eligible = pd.merge(points_eligible,all_nba, on='Player', how='left')

In [None]:
#Importing and adding data for all Defensive Team Selections


all_def = pd.read_html("https://www.basketball-reference.com/awards/all_defense_by_player.html")

for idx, table in enumerate(all_def):
    print("*************************")
    print(idx)
    print(table)

In [None]:
all_defensive = all_def[0]
all_defensive = all_defensive.rename(columns = {'Tot':'All Defensive Team Selections'})
all_defensive.columns = all_defensive.columns.droplevel(0)
all_defensive = all_defensive[['Player','All Defensive Team Selections']]
all_defensive = all_defensive.iloc[:, :-2]


In [None]:
points_eligible = pd.merge(points_eligible,all_defensive, on='Player', how='left')

In [None]:
def scores2(rank):
    return (27.5 - ((rank-1))) if rank < 20 else 0

def scores3(rank):
    return (22.5 - ((rank))) if rank < 20 else 0




def tally2(col_source,col_target):
    global points_eligible
    
    points_eligible[col_target] = points_eligible[col_source].rank(method = 'min', ascending = False)
    points_eligible[col_target] = points_eligible[col_target].apply(scores2)

def tally3(col_source,col_target):
    global points_eligible
    
    points_eligible[col_target] = points_eligible[col_source].rank(method = 'min', ascending = False)
    points_eligible[col_target] = points_eligible[col_target].apply(scores3)
    
    

In [None]:
points_eligible['Champ_Scores'] = 3*points_eligible['Championships']


In [None]:
tally2('FMVPs','FMVP_Scores')
tally2('RMVPs','RMVP_Scores')
tally3('All NBA Team Selections','AllNBA_Scores')
tally3('All Defensive Team Selections','AllDefensive_Scores')


In [None]:
#Creating DataFrame for Impact of players on society

impact_name = ['Michael Jordan','Kobe Bryant','Stephen Curry','LeBron James','Bill Russell','Allen Iverson',"Shaquille O'Neal",
         'Magic Johnson','Julius Erving','Wilt Chamberlain']

impact_count = [i for i in range(10,0,-1)]


impact_list = list(zip(impact_name, impact_count))
Impact = pd.DataFrame(impact_list, columns=['Player', 'Impact'])






In [None]:
points_eligible = pd.merge(points_eligible,Impact, on='Player', how='left')

In [None]:
points_eligible = points_eligible.fillna(0)

In [None]:
#Summing all points given for each metric

x = points_eligible.iloc[:,23:28].sum(axis=1)
points_eligible['TOTAL'] = points_eligible['TOTAL'] + x

In [None]:
points_eligible = points_eligible.sort_values(by = 'TOTAL', ascending = False)


In [None]:
FINAL = points_eligible[['Player','TOTAL']]
FINAL = FINAL[FINAL['TOTAL']>0]
FINAL = FINAL.reset_index(drop = True)
FINAL.index +=1

#TOP 25
FINAL.head(25)
