# <b> <font color='red'>  Project in Data Science </font> </b>

## Analysing NBA historical data to discover patterns that should be adopted by franchises to succeed

#### <b>Andreas Neocleous
a.neocleous12@hotmail.com
</b>

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import re

### Scraping as a test from hoopshype site

In [2]:
url="https://hoopshype.com/salaries/players/"

r = requests.get(url,timeout=2.5)
r_html = r.text

soup = BeautifulSoup(r_html, 'html.parser')

salary_table = soup.find('table')

In [3]:
length=len(salary_table.find_all("td"))

player_names=[salary_table.find_all("td")[i].text.strip() for i in range(9,length,8)]

column1=[salary_table.find_all("td")[i].text.strip() for i in range(10,length,8)]
column2=[salary_table.find_all("td")[i].text.strip() for i in range(11,length,8)]
column3=[salary_table.find_all("td")[i].text.strip() for i in range(12,length,8)]
column4=[salary_table.find_all("td")[i].text.strip() for i in range(13,length,8)]
column5=[salary_table.find_all("td")[i].text.strip() for i in range(14,length,8)]
column6=[salary_table.find_all("td")[i].text.strip() for i in range(15,length,8)]

In [4]:
df_dict={'player_names':player_names,
        'salary ($)':column1}
        
salary_df=pd.DataFrame(df_dict)
salary_df[:5]

Unnamed: 0,player_names,salary ($)
0,Stephen Curry,"$45,780,966"
1,John Wall,"$44,310,840"
2,James Harden,"$44,310,840"
3,Russell Westbrook,"$44,211,146"
4,Kevin Durant,"$42,018,900"


### Transform salaries to numeric values

In [5]:
salary_df.replace({'\$':''}, regex = True,inplace=True)
salary_df.replace(',','', regex=True, inplace=True)

for item in salary_df.columns[1:]:
    
    salary_df[item]=pd.to_numeric(salary_df[item])
    
salary_df[:5]

Unnamed: 0,player_names,salary ($)
0,Stephen Curry,45780966
1,John Wall,44310840
2,James Harden,44310840
3,Russell Westbrook,44211146
4,Kevin Durant,42018900


### Creating arrays of years and team names in order to feed them as f string to for loops which is going to create for us all the DataFrames we need

In [6]:
years=[]
for i in range(2020,2000,-1):
    years.append(f"{i}-{i+1}")

team_names = ["portland_trail_blazers","oklahoma_city_thunder","los_angeles_clippers","cleveland_cavaliers","philadelphia_76ers",
              "miami_heat","golden_state_warriors","denver_nuggets","houston_rockets","orlando_magic",
              "los_angeles_lakers","milwaukee_bucks","toronto_raptors","washington_wizards","dallas_mavericks",
              "san_antonio_spurs","utah_jazz","brooklyn_nets","new_orleans_pelicans","boston_celtics",
              "minnesota_timberwolves","sacramento_kings","indiana_pacers","chicago_bulls","atlanta_hawks",
              "detroit_pistons","new_york_knicks","phoenix_suns","memphis_grizzlies","charlotte_hornets"]            
              
print(years)
print(team_names)
print(len(years))
print(len(team_names)) 


['2020-2021', '2019-2020', '2018-2019', '2017-2018', '2016-2017', '2015-2016', '2014-2015', '2013-2014', '2012-2013', '2011-2012', '2010-2011', '2009-2010', '2008-2009', '2007-2008', '2006-2007', '2005-2006', '2004-2005', '2003-2004', '2002-2003', '2001-2002']
['portland_trail_blazers', 'oklahoma_city_thunder', 'los_angeles_clippers', 'cleveland_cavaliers', 'philadelphia_76ers', 'miami_heat', 'golden_state_warriors', 'denver_nuggets', 'houston_rockets', 'orlando_magic', 'los_angeles_lakers', 'milwaukee_bucks', 'toronto_raptors', 'washington_wizards', 'dallas_mavericks', 'san_antonio_spurs', 'utah_jazz', 'brooklyn_nets', 'new_orleans_pelicans', 'boston_celtics', 'minnesota_timberwolves', 'sacramento_kings', 'indiana_pacers', 'chicago_bulls', 'atlanta_hawks', 'detroit_pistons', 'new_york_knicks', 'phoenix_suns', 'memphis_grizzlies', 'charlotte_hornets']
20
30


### Create a testing DataFrame by using f-strings

In [7]:
url=f"https://hoopshype.com/salaries/{team_names[0]}/{years[0]}/"
print(url)

r = requests.get(url,timeout=2.5)
r_html = r.text

soup = BeautifulSoup(r_html, 'html.parser')

salary_table = soup.find('table')



length=len(salary_table.find_all("td"))
player_names=[salary_table.find_all("td")[i].text.strip() for i in range(3,length,3)]
column1=[salary_table.find_all("td")[i].text.strip() for i in range(4,length,3)]

df_dict={'player_names':player_names,
        'salary ($)':column1}
        
salary_portland20_21=pd.DataFrame(df_dict)

https://hoopshype.com/salaries/portland_trail_blazers/2020-2021/


In [8]:
salary_portland20_21.replace({'\$':''}, regex = True,inplace=True)
salary_portland20_21.replace(',','', regex=True, inplace=True)

for item in salary_portland20_21.columns[1:]:
    
    salary_portland20_21[item]=pd.to_numeric(salary_df[item])
    
salary_portland20_21[:5]

Unnamed: 0,player_names,salary ($)
0,Damian Lillard,45780966
1,CJ McCollum,44310840
2,Jusuf Nurkic,44310840
3,Robert Covington,44211146
4,Norman Powell,42018900


### We are going to create a dictionary with keys being name of teams plus year using f-strings and their values will be their payroll as a DataFrame

In [9]:
payrolls_noPositions=dict()
counter=0
for i in range(0,len(team_names)):
    for j in range(0,len(years)):
        
        url = f"https://hoopshype.com/salaries/{team_names[i]}/{years[j]}/"
        
        salary_table = []
        try:
            r = requests.get(url,timeout=6)
            r_html = r.text
            soup = BeautifulSoup(r_html, 'html.parser')
            salary_table = soup.find('table')
        except:
            print("Something wrong")

        length=len(salary_table.find_all("td"))
        player_names=[salary_table.find_all("td")[i].text.strip() for i in range(3,length,3)]
        column1=[salary_table.find_all("td")[i].text.strip() for i in range(4,length,3)]
        
        
        salary_table = []
        try:
            r = requests.get(url,timeout=6)
            r_html = r.text
            soup = BeautifulSoup(r_html, 'html.parser')
            salary_table = soup.find('table')
        except:
            print("Something wrong")

        df_dict={'player_names':player_names,
                'salary ($)':column1}
        
        
        
        tempDataFrame=pd.DataFrame(df_dict)
        
        tempDataFrame.replace({'\$':''}, regex = True,inplace=True)
        tempDataFrame.replace(',','', regex=True, inplace=True)
        for item in tempDataFrame.columns[1:2]:
            tempDataFrame[item]=pd.to_numeric(tempDataFrame[item])
            
        counter=counter+1
        if counter%100==0 or counter==1:
            print(tempDataFrame)
            print(counter)
        payrolls_noPositions[f"{team_names[i]}{years[j]}"] = tempDataFrame

print(counter,len(payrolls_noPositions))

               player_names  salary ($)
0            Damian Lillard    31626953
1               CJ McCollum    29354152
2              Jusuf Nurkic    12888889
3          Robert Covington    12138345
4             Norman Powell    10865952
5             Derrick Jones     9258000
6              Zach Collins     5406255
7               Enes Kanter     5005350
8          Andrew Nicholson     2844429
9           Carmelo Anthony     2564753
10          Anfernee Simons     2252040
11            Nassir Little     2210640
12         Anderson Varejao     1913345
13              Harry Giles     1620564
14                CJ Elleby      898310
15  Rondae Hollis-Jefferson      502957
16           Keljin Blevins      449115
17                  TJ Leaf      104598
18                   Totals   131904647
1
Something wrong
         player_names  salary ($)
0     Dikembe Mutombo    14315790
1       Allen Iverson    11250000
2     Derrick Coleman     8710000
3         Matt Geiger     8142160
4         Aa

### Know we are going to do the same but WITH the implementation of another column that will contain the position each player plays in

#### While running our program the first time it has been noticed that some data were problematic. It has been decided to drop those problematic data by throwing exceptions and keep track of the data we are not importing 

In [10]:
payrolls=dict()
kickout=[]
dropped=[]
counter=0
for i in range(0,len(team_names)):
    for j in range(0,len(years)):
            
        counter=counter+1    
        problematic=False    
        url = f"https://hoopshype.com/salaries/{team_names[i]}/{years[j]}/"
        salary_table = []
        try:
            r = requests.get(url,timeout=20)
            r_html = r.text
            soup = BeautifulSoup(r_html, 'html.parser')
            salary_table = soup.find('table')
        except:
            print("Something wrong")
        length=len(salary_table.find_all("td"))
        player_names=[salary_table.find_all("td")[i].text.strip() for i in range(3,length,3)]
        column1=[salary_table.find_all("td")[i].text.strip() for i in range(4,length,3)]
        
        #Creating the new column for players position via entering each players personal URL in hoopshype
        column2 = []
        for k in range(0,len(player_names)):
            if k == len(player_names)-1:
                column2.append("NaN")
                break
            player_name_trans = player_names[k].replace(' ','-')
            url_player = f"https://hoopshype.com/player/{player_name_trans}/salary/"
            r = requests.get(url_player,timeout=10)
            r_html = r.text
            try:
                soup = BeautifulSoup(r_html, 'html.parser')
                htmlText = soup.find_all("span", class_="player-bio-text-line-value")
                positionString = str(htmlText[0])
                position = positionString.find(">")
                column2.append(positionString[position+1])
            except IndexError as e:
                    kickout.append(f"{team_names[i]}{years[j]}")
                    dropped.append(f"({i},{j})")
                    print(i,j,e)
                    problematic= True
                    break
            except AttributeError as e1:
                    kickout.append(f"{team_names[i]}{years[j]}")
                    dropped.append(f"({i},{j})")
                    print(i,j,e1)
                    problematic= True
                    break
        if problematic==False:
            salary_table = []
            try:
                r = requests.get(url,timeout=20)
                r_html = r.text
                soup = BeautifulSoup(r_html, 'html.parser')
                salary_table = soup.find('table')
            except:
                print("Something wrong")

            df_dict={'player_names':player_names,
                    'salary ($)':column1,
                        'position':column2}



            tempDataFrame=pd.DataFrame(df_dict)

            tempDataFrame.replace({'\$':''}, regex = True,inplace=True)
            tempDataFrame.replace(',','', regex=True, inplace=True)
            for item in tempDataFrame.columns[1:2]:
                tempDataFrame[item]=pd.to_numeric(tempDataFrame[item])
            if counter%75==0 or counter==1:
                print(counter)
                print(tempDataFrame)
            payrolls[f"{team_names[i]}{years[j]}"] = tempDataFrame
            
print(counter,len(payrolls),len(kickout))            


1
               player_names  salary ($) position
0            Damian Lillard    31626953        G
1               CJ McCollum    29354152        G
2              Jusuf Nurkic    12888889        C
3          Robert Covington    12138345        F
4             Norman Powell    10865952        G
5             Derrick Jones     9258000        F
6              Zach Collins     5406255        C
7               Enes Kanter     5005350        -
8          Andrew Nicholson     2844429        -
9           Carmelo Anthony     2564753        F
10          Anfernee Simons     2252040        G
11            Nassir Little     2210640        F
12         Anderson Varejao     1913345        -
13              Harry Giles     1620564        -
14                CJ Elleby      898310        F
15  Rondae Hollis-Jefferson      502957        -
16           Keljin Blevins      449115        G
17                  TJ Leaf      104598        -
18                   Totals   131904647      NaN
0 14
0 18
0 19
1 1

##### Below we will print the same instance with and without the players position column by calling it in different payroll dictionary

In [15]:
payrolls["portland_trail_blazers2019-2020"]

Unnamed: 0,player_names,salary ($),position
0,Damian Lillard,29802321,G
1,CJ McCollum,27556959,G
2,Hassan Whiteside,27093018,C
3,Trevor Ariza,12200000,F
4,Jusuf Nurkic,12000000,C
5,Rodney Hood,5718000,F
6,Zach Collins,4240200,C
7,Carmelo Anthony,2159029,F
8,Anfernee Simons,2149560,G
9,Nassir Little,2105520,F


In [16]:
payrolls_noPositions["portland_trail_blazers2019-2020"]

Unnamed: 0,player_names,salary ($)
0,Damian Lillard,29802321
1,CJ McCollum,27556959
2,Hassan Whiteside,27093018
3,Trevor Ariza,12200000
4,Jusuf Nurkic,12000000
5,Rodney Hood,5718000
6,Zach Collins,4240200
7,Carmelo Anthony,2159029
8,Anfernee Simons,2149560
9,Nassir Little,2105520


### Now we are going to create lists that will represent each team's success. 
#### Any team will only be in one and only one list
#### Teams are split according to their success into:
        1) Champions: One team per year. Winners of the championship
        2) Top4: Teams that made it at least the semifinals but are not champions
        3) Decent: Teams that made the playoffs but didn't qualify to semifinals
        4) Mid-Tear: Teams that were on the top half of the regular season table if playoff teams were excluded
        5) Low-Tear: Teams that were on the bottom half of the regular season table if playoff teams were excluded


In [62]:
champions = ["milwaukee_bucks2020-2021", "los_angeles_lakers2019-2020", "toronto_raptors2018-2019", "golden_state_warriors2017-2018","golden_state_warriors2016-2017",
            "cleveland_cavaliers2015-2016", "golden_state_warriors2014-2015","san_antonio_spurs2013-2014","miami_heat2012-2013","miami_heat2011-2012",
            "dallas_mavericks2010-2011","los_angeles_lakers2009-2010","los_angeles_lakers2008-2009","boston_celtics2007-2008","san_antonio_spurs2006-2007",
            "miami_heat2005-2006","san_antonio_spurs2004-2005","detroit_pistons2003-2004","san_antonio_spurs2002-2003", "los_angeles_lakers2001-2002"]

top4 = ["phoenix_suns2020-2021","los_angeles_clippers2020-2021","atlanta_hawks2020-2021",
        "denver_nuggets2019-2020","boston_celtics2019-2020","miami_heat2019-2020",
        "milwaukee_bucks2018-2019","golden_state_warriors2018-2019","portland_trail_blayzers2018-2019",
        "houston_rockets2017-2018","cleveland_cavaliers2017-2018","boston_celtics2017-2018",
       "cleveland_cavaliers2016-2017","boston_celtics2016-2017","san_antonio_spurs2016-2017",
       "golden_state_warriors2015-2016","oklahoma_city_thunders2015-2016","toronto_raptors2015-2016",
       "cleveland_cavaliers2014-2015","atlanta_hawks2014-2015","houston_rockets2014-2015",
       "oklahoma_city_thunders2013-2014","indiana_pacers2013-2014","miami_heat2013-2014",
       "indiana_pacers2012-2013","san_antonio_spurs2012-2013","memphis_grizzlies",
       "oklahoma_city_thunders2011-2012","san_antonio_spurs2011-2012","boston_celtics2011-2012",
       "oklahoma_city_thunders2010-2011","miami_heat2010-2011","chicago_bulls2010-2011",
       "boston_celtics2009-2010","orlando_magic2009-2010","phoenix_suns2009-2010",
       "denver_nuggets2008-2009","orlando_magic2008-2009","cleveland_cavaliers2008-2009",
       "detroit_pistons2007-2008","san_antonio_spurs2007-2008","los_angeles_lakers2007-2008",
       "utah_jazz2006-2007","cleveland_cavaliers2006-2007","detroit_pistons2006-2007",
       "detroit_pistons2005-2006","dallas_mavericks2005-2006","phoenix_suns2005-2006",
       "phoenix_suns2004-2005","detroit_pistons2004-2005","miami_heat2004-2005",
       "indiana_pacers2003-2004","los_angeles_lakers2003-2004","minnesota_timberwolves2003-2004",
       "dallas_mavericks2002-2003","brooklyn_nets2002-2003","detroit_pistons2002-2003",
       "brooklyn_nets2001-2002","boston_celtics2001-2002","sacramento_kings2001-2002"]

decent = ["miami_heat2020-2021","boston_celtics020-2021","new_york_knicks2020-2021","washington_wizards2020-2021","memphies_grizzlies2020-2021","dallas_mavericks2020-2021","portland_trail_blazers2020-2021","los_angeles_lakers2020-2021",
            "utah_jazz2020-2021","denver_nuggets2020-2021","philadelphia_76ers2020-2021","brooklyn_nets2020-2021",
            "orlando_magic2019-2020","indiana_pacers2019-2020","philadelphia_76ers2019-2020","brooklyn_nets2019-2020","portland_pacers2019-2020","oklahoma_city_thunders2019-2020","utah_jazz2019-2020","dallas_mavericks2019-2020",
           "milwaukee_bucks2019-2020","toronto_raptors2019-2020","houston_rockets2019-2020","los_angeles_clippers2019-2020",
           "detroit_pistons2018-2019","indiana_pacers2018-2019","brooklyn_nets2018-2019","orlando_magic2018-2019","los_angeles_clippers2018-2019","utah_jazz2018-2019","oklahoma_city_thunder2018-2019","san_antonio_spurs2018-2019",
           "boston_celtics2018-2019","philadelphia_76ers2018-2019","houston_rockets2018-2019","denver_nuggets2018-2019",
           "washington_wizards2017-2018","indiana_pacers2017-2018","miami_heat2017-2018","milwaukee_bucks2017-2018","minnesota_timberwolves2017-2018","oklahoma_city_thunder2017-2018","portland_trail_blazers2017-2018","san_antonio_spurs2017-2018",
           "toronto_raptors2017-2018","philadelphia_76ers2017-2018","utah_jazz2017-2018","new_orleans_pelicans2017-2018",
           "chicago_bulls2016-2017","atlanta_hawks2016-2017","milwaukee_bucks2016-2017","indiana_pacers2016-2017","portland_trail_blazers2016-2017","los_angeles_clippers2016-2017","oklahoma_city_thunder2016-2017","memphis_grizzlies2016-2017",
           "washington_wizards2016-2017","toronto_raptors2016-2017","utah_jazz2016-2017","houston_rockets2016-2017",
            "detroit_pistons2015-2016","boston_celtics2015-2016","charlotte_hornets2015-2016","indiana_pacers2015-2016","houston_rockets2015-2016","los_angeles_clippers2015-2016","dallas_mavericks2015-2016","memphis_grizzlies",
           "atlanta_hawks2015-2016","miami_heat2015-2016","portland_trail_blazers2015-2016","san_antonio_spurs2015-2016",
           "brooklyn_nets2014-2015","toronto_raptors2014-2015","milwaukee_bucks2014-2015","boston_celtics2014-2015","new_orleans_pelicans2014-2015","portland_trail_blazers2014-2015","san_antonio_spurs2014-2015","dallas_mavericks2014-2015",
           "washington_wizards2014-2015","chicago_bulls2014-2015","memphis_grizzlies2014-2015","los_angeles_clippers2014-2015",
           "atlanta_hawks2013-2014","chicago_bulls2013-2014","toronto_raptors2013-2014","charlotte_hornets2013-2014","dallas_mavericks2013-2014","houston_rockets2013-2014","golden_state_warriors2013-2014","memphis_grizzlies2013-2014",
            "washington_wizards2013-2014","brooklyn_nets2013-2014","portland_trail_blazers2013-2014","los_angeles_clippers2013-2014",
           "milwaukee_bucks2012-2013","brooklyn_nets2012-2013","atlanta_hawks2012-2013","boston_celtics2012-2013","houston_rockets2012-2013","los_angeles_clippers2012-2013","denver_nuggets2012-2013","los_angeles_lakers2012-2013",
           "chicago_bulls2012-2013","new_york_knicks2012-2013","oklahoma_city_thunder2012-2013","golden_state_warriors2012-2013",
            "chicago_bulls2011-2012","atlanta_hawks2011-2012","orlando_magic2011-2012","new_york_knicks2011-2012","utah_jazz2011-2012","memphis_grizzlies2011-2012","denver_nuggets2011-2012","dallas_mavericks2011-2012",
            "philadelphia_76ers2011-2012","indiana_pacers2011-2012","los_angeles_clippers2011-2012","los_angeles_lakers2011-2012",
           "indiana_pacers2010-2011","chicago_bulls2010-2011","orlando_magic2010-2011","new_york_knicks2010-2011","philadelphia_76ers2010-2011","san_antonio_spurs2010-2011","denver_nuggets2010-2011","portland_trail_blazers2010-2011","new_orleans_pelicans2010-2011",
            "atlanta_hawks2010-2011","boston_celtics2010-2011","memphis_grizzlies2010-2011","los_angeles_lakers2010-2011",
            "chicago_bulls2009-2010","miami_heat2009-2010","milwaukee_bucks2009-2010","charlotte_hornets2009-2010","oklahoma_city_thunder2009-2010","denver_nuggets2009-2010","portland_trail_blazers2009-2010","dallas_mavericks2009-2010",
           "cleveland_cavaliers2009-2010","atlanta_hawks2009-2010","utah_jazz2009-2010","san_antonio_spurs2009-2010",
           "detroit_pistons2008-2009","miami_heat2008-2009","philadelphia_76ers2008-2009","chicago_bulls2008-2009","utah_jazz2008-2009","portland_trail_blazers2008-2009","san_antonio_spurs2008-2009","new_orleans_pelicans",
           "atlanta_hawks2008-2009","boston_celtics2008-2009","houston_rockets2008-2009","dallas_mavericks2008-2009",
           "atlanta_hawks2007-2008","washington_wizards2007-2008","toronto_raptors2007-2008","philadelphia_76ers2007-2008","denver_nuggets2007-2008","houston_rockets2007-2008","phoenix_suns2007-2008","dallas_mavericks2007-2008",
           "cleveland_cavaliers2007-2008","orlando_magic2007-2008","utah_jazz2007-2008","new_orleans_pelicans2007-2008",
            "orlando_magic2006-2007","miami_heat2006-2007","toronto_raptors2006-2007","washington_wizards2006-2007","dallas_mavericks2006-2007","houston_rockets2006-2007","denver_nuggets2006-2007","los_angeles_lakers2006-2007",
           "chicago_bulls2006-2007","brooklyn_nets2006-2007","golden_state_warriors2006-2007","phoenix_suns2006-2007",
           "milwaukee_bucks2005-2006","washington_wizards2005-2006","indiana_pacers2005-2006","chicago_bulls2005-2006","sacramento_kings2005-2006","memphies_grizzlies2005-2006","denver_nuggets2005-2006","los_angeles_lakers2005-2006",
           "cleveland_cavaliers2005-2006","brooklyn_nets","san_antonio_spurs2005-2006","los_angeles_clippers2005-2006",
           "brooklyn_nets2004-2005","chicago_bulls2004-2005","boston_celtics2004-2005","philadelphia_76ers2004-2005","memphis_grizzlies2004-2005","houston_rockets2004-2005","sacramento_kings2004-2005","denver_nuggets2004-2005",
           "washington_wizards2004-2005","indiana_pacers2004-2005","dallas_mavericks2004-2005","oklahoma_city_thunder2004-2005",
           "boston_celtics2003-2004","new_orleans_pelicans2003-2004","milwaukee_bucks2003-2004","new_york_knicks2003-2004","denver_nuggets2003-2004","dallas_mavericks2003-2004","memphis_grizzlies2003-2004","houston_rockets2003-2004",
           "miami_heat2003-2004","brooklyn_nets2003-2004","sacramento_kings2003-2004","san_antonio_spurs2003-2004",
            "orlando_magic2002-2003","new_orleans_pelicans2002-2003","indiana_pacers2002-2003","indiana_pacers2002-2003","milwaukee_bucks2002-2003","phoenix_suns2002-2003","minnesota_timberwolves2002-2003","portland_trail_blazers2002-2003","utah_jazz2002-2003",
           "philadelphia_76ers2002-2003","boston_celtics2002-2003","los_angeles_lakers2002-2003","sacramento_kings2002-2003",
           "indiana_pacers2001-2002","orlando_magic2001-2002","philadelphia_76ers2001-2002","toronto_raptors2001-2002","utah_jazz2001-2002","minnesota_timberwolves2001-2002","portland_trail_blazers2001-2002","oklahoma_city_thunder2001-2002",
           "charlotte_hornets2001-2002","detroit_pistons2001-2002","dallas_mavericks2001-2002","san_antonio_spurs2001-2002"]


mid_tear = ["indiana_pacers2020-2021","charlotte_hornets2020-2021","chicago_bulls2020-2021","toronto_raptors2020-2021",
            "memphis_grizzlies2020-2021","san_antonio_spurs2020-2021","new_orleans_pelicans","sacramento_kings2020-2021",
           "charlotte_hornets2019-2020","washington_wizards2019-2020","chicago_bulls2019-2020","new_york_knicks2019-2020",
           "memphis_grizzlies2019-2020","phoenix_suns2019-2020","san_antonio_spurs2019-2020","sacramento_kings2019-2020",
           "charlotte_hornets2018-2019","miami_heat2018-2019","washington_wizards2018-2019","atlanta_hawks2018-2019",
           "sacramento_kings2018-2019","los_angeles_lakers2018-2019","minnesota_timberwolves2018-2019","memphies_grizzlies2018-2019",
           "detroit_pistons2017-2018","charlotte_hornets2017-2018","new_york_knicks2017-2018","brooklyn_nets2017-2018",
            "denver_nuggets2017-2018","los_angeles_clippers2017-2018","los_angeles_lakers2017-2018","sacramento_kings2017-2018",
           "miami_heat2016-2017","detroit_pistons2016-2017","charlotte_hornets2016-2017","new_york_knicks2016-2017",
           "denver_nuggets2016-2017","new_orleans_pelicans2016-2017","dallas_mavericks2016-2017","sacramento_kings2016-2017",
           "chicago_bulls2015-2016","washington_wizards2015-2016","orlando_magic2015-2016","milwaukee_bucks2015-2016",
           "utah_jazz2015-2016","sacramento_kings2015-2016","denver_nuggets2015-2016","new_orleans_pelicans2015-2016",
           "indiana_pacers2014-2015","miami_heat2014-2015","charlotte_hornets2014-2015","detroit_pistons2014-2015",
           "oklahoma_city_thunder2014-2015","phoenix_suns2014-2015","utah_jazz2014-2015","denver_nuggets2014-2015",
           "new_york_knicks2013-2014","cleveland_cavaliers2013-2014","detroit_pistons2013-2014","boston_celtics2013-2014",
           "phoenix_suns2013-2014","minnesota_timberwolves2013-2014","denver_nuggets2013-2014","new_orleans_pelicans2013-2014",
           "philadelphia_76ers2012-2013","toronto_raptors2012-2013","detroit_pistons2012-2013","washington_wizards2012-2013",
           "utah_jazz2012-2013","dallas_mavericks2012-2013","portland_trail_blazers2012-2013","minnesota_timberwolves2012-2013",
           "milwaukee_bucks2011-2012","detroit_pistons2011-2012","toronto_raptors2011-2012","brooklyn_nets2011-2012",
           "houston_rockets2011-2012","phoenix_suns2011-2012","portland_trail_blazers2011-2012","minnesota_timberwolves2011-2012",
           "milwukee_bucks2010-2011","charlotte_hornets2010-2011","detroit_pistons2010-2011","brooklyn_nets2010-2011",
           "houston_rockets2010-2011","phoenix_suns2010-2011","utah_jazz2010-2011","golden_state_warriors2010-2011",
           "toronto_raptors2009-2010","indiana_pacers2009-2010","new_york_knicks2009-2010","detroit_pistons2009-2010",
           "houston_rockets2009-2010","memphis_grizzlies2009-2010","new_orleans_pelicans2009-2010","los_angeles_clippers2009-2010",
           "indiana_pacers2008-2009","charlotte_hornets2008-2009","brooklyn_nets2008-2009","milwaukee_bucks2008-2009",
           "phoenix_suns2008-2009","golden_state_warriors2008-2009","minnesota_timberwolves2008-2009","memphis_grizzlies2008-2009",
           "indiana_pacers2007-2008","brooklyn_nets2007-2008","chicago_bulls2007-2008","charlotte_hornets2007-2008",
           "golden_state_warriors2007-2008","portland_trail_blazers2007-2008","sacramento_kings2007-2008","los_angeles_clippers2007-2008",
           "philadelphia_76ers2006-2007","indiana_pacers2006-2007","new_york_knicks2006-2007","charlotte_hornets2006-2007",
            "los_angeles_clippers2006-2007","new_orleans_pelicans2006-2007","sacramento_kings2006-2007","portland_trail_blazers",
           "philadelphia_76ers2005-2006","orlando_magic2005-2006","boston_celtics2005-2006","toronto_raptors2005-2006",
            "utah_jazz2005-2006","new_orleans_pelicans2005-2006","oklahoma_city_thunder2005-2006","golden_state_warriors2005-2006",
           "cleveland_cavaliers2004-2005","orlando_magic2004-2005","new_york_knicks2004-2005","toronto_raptors2004-2005",
           "minnesota_timberwolves2004-2005","los_angeles_clippers2004-2005","los_angeles_clippers2004-2005","los_angeles_lakers2004-2005","golden_state_warriors2004-2005",
           "cleveland_cavaliers2003-2004","toronto_raptors2003-2004","philadelphia_76ers2003-2004","atlanta_hawks2003-2004",
           "utah_jazz2003-2004","portland_trail_blazers2003-2004","oklahoma_city_thunder2003-2004","golden_state_warriors2003-2004",
           "new_york_knicks2002-2003","washington_wizards2002-2003","atlanta_hawks2002-2003","chicago_bulls2002-2003",
           "houston_rockets2002-2003","oklahoma_city_thunder2002-2003","golden_state_warriors2002-2003","memphis_grizzlies2002-2003",
           "milwaukee_bucks2001-2002","washington_wizards2001-2002","miami_heat2001-2002","atlanta_hawks2001-2002",
           "los_angeles_clippers2001-2002","phoenix_suns2001-2002","houston_rockets2001-2002","denver_nuggets2001-2002"]


low_tear = ["detroit_pistons2020-2021","orlando_magic2020-2021","cleveland_cavaliers2020-2021","houston_rockets2020-2021","oklahoma_city_thunders2020-2021","minnesota_timberwolves2020-2021",
            "cleveland_cavaliers2019-2020","atlanta_hawks2019-2020","detroit_pistons","golden_state_warriors2019-2020","minnesota_timberwolves2019-2020","new_orleans_pelicans2019-2020",
           "new_york_knicks2018-2019","cleveland_cavaliers2018-2019","chicago_bulls2018-2019","phoenix_suns2018-2019","dallas_mavericks2018-2019","new_orleans_pelicans2018-2019",
           "atlanta_hawks2017-2018","orlando_magic2017-2018","chicago_bulls2017-2018","phoenix_suns2017-2018","memphis_grizzlies2017-2018","dallas_mavericks2017-2018",
           "brooklyn_nets2016-2017","philadelphia_76ers2016-2017","orlando_magic2016-2017","phoenix_suns2016-2017","los_angeles_lakers2016-2017","minnesota_timberwolves2016-2017",
            "philadelphia_76ers2015-2016","brooklyn_nets2015-2016","new_york_knicks2015-2016","los_angeles_lakers2015-2016","phoenix_suns2015-2016","minnesota_timberwolves2015-2016",
           "new_york_knicks2014-2015","philadelphia_76ers2014-2015","orlando_magic2014-2015","minnesota_timberwolves2014-2015","los_angeles_lakers2014-2015","sacramento_kings2014-2015",
           "milwaukee_bucks2013-2014","philadelphia_76ers2013-2014","orlando_magic2013-2014","utah_jazz2013-2014","los_angeles_lakers2013-2014","sacramento_kings2013-2014",
           "orlando_magic2012-2013","charlotte_hornets2012-2013","cleveland_cavaliers2012-2013","phoenix_suns2012-2013","new_orleans_pelicans2012-2013","sacramento_kings2012-2013",
           "charlotte_hornets2011-2012","washington_wizards2011-2012","cleveland_cavaliers2011-2012","new_orleans_pelicans2011-2012","sacramento_kings2011-2012","golden_state_warriors2011-2012",
           "cleveland_cavaliers2010-2011","toronto_raptors2010-2011","washington_wizards2010-2011","minnesota_timberwolves2010-2011","sacramento_kings2010-2011","los_angeles_clippers2010-2011",
           "brooklyn_nets2009-2010","washington_wizards2009-2010","philadelphia_76ers2009-2010","minnesota_timberwolves2009-2010","sacramento_kings2009-2010","golden_state_warriors2009-2010",
           "washington_wizards2008-2009","new_york_knicks2008-2009","toronto_raptors2008-2009","sacramento_kings2008-2009","los_angeles_clippers2008-2009","oklahoma_city_thunder2008-2009",
           "miami_heat2007-2008","new_york_knicks2007-2008","milwaukee_bucks2007-2008","oklahoma_city_thunder2007-2008","memphis_grizzlies2007-2008","minnesota_timberwolves2007-2008",
            "boston_celtics2006-2007","milwaukee_bucks2006-2007","atlanta_hawks2006-2007","memphis_grizzlies2006-2007","oklahoma_city_thunder2006-2007","minnesota_timberwolves2006-2007",
            "new_york_knicks2005-2006","atlanta_hawks2005-2006","charlotte_hornets2005-2006","portland_trail_blazers2005-2006","minnesota_timberwolves2005-2006","houston_rockets2005-2006",
           "atlanta_hawks2004-2005","charlotte_hornets2004-2005","milwaukee_bucks2004-2005","new_orleans_pelicans2004-2005","utah_jazz2004-2005","portland_trail_blazers2004-2005",
           "washington_wizards2003-2004","chicago_bulls2003-2004","orlando_magic2003-2004","los_angeles_clippers2003-2004","phoenix_suns2003-2004",
           "cleveland_cavaliers2002-2003","toronto_raptors2002-2003","miami_heat2002-2003","los_angeles_clippers2002-2003","denver_nuggets2002-2003",
           "chicago_bulls2001-2002","cleveland_cavaliers2001-2002","new_york_knicks2001-2002","golden_state_warriors2001-2002","memphis_grizzlies2001-2002"]
            
print(len(champions)+len(top4)+len(decent)+len(low_tear)+len(mid_tear))

600


# Now that our lists are loaded, we are ready to analyze our data and discover patterns to them

### <font color='purple'>1.</font> Is investing more money to the roster of the team crutial for the success of the team?

In [272]:
#We are finding how much is the average money each "tear" of teams spents on players
champs_avg = 0
counter = 0
for i in range(0,len(champions)):
    if champions[i] in payrolls_noPositions.keys():
        temp = payrolls_noPositions[champions[i]]
        valueable = int(temp["salary ($)"][-1:])
        champs_avg = champs_avg+valueable
        counter=counter+1
champs_avg = champs_avg/counter


top4_avg = 0
counter = 0
for i in range(0,len(top4)):
    if top4[i] in payrolls_noPositions.keys():
        temp = payrolls_noPositions[top4[i]]
        valueable = int(temp["salary ($)"][-1:])
        top4_avg = top4_avg+valueable
        counter=counter+1
top4_avg = top4_avg/counter

decent_avg = 0
counter = 0
for i in range(0,len(decent)):
    if decent[i] in payrolls_noPositions.keys():
        temp = payrolls_noPositions[decent[i]]
        valueable = int(temp["salary ($)"][-1:])
        decent_avg = decent_avg+valueable
        counter=counter+1
decent_avg = decent_avg/counter

mid_tear_avg = 0
counter = 0
for i in range(0,len(mid_tear)):
    if mid_tear[i] in payrolls_noPositions.keys():
        temp = payrolls_noPositions[mid_tear[i]]
        valueable = int(temp["salary ($)"][-1:])
        mid_tear_avg = mid_tear_avg+valueable
        counter=counter+1
mid_tear_avg = mid_tear_avg/counter

low_tear_avg = 0
counter = 0
for i in range(0,len(low_tear)):
    if low_tear[i] in payrolls_noPositions.keys():
        temp = payrolls_noPositions[low_tear[i]]
        valueable = int(temp["salary ($)"][-1:])
        low_tear_avg = low_tear_avg+valueable
        counter=counter+1
low_tear_avg = low_tear_avg/counter

print(champs_avg-top4_avg)
print(top4_avg-decent_avg)
print(decent_avg-mid_tear_avg)
print(mid_tear_avg-low_tear_avg)

2415220.672222227
2833001.6880341917
4589460.1279601455
2447927.9096095264


#### <u>Conclusion on #1:</u> 
We can see, the average ammount teams spend on players compared to the  teams one tear benieth them is always bigger so we can assume that money spent on players does play significant role to success

### <font color='purple'>2.</font> Should the team aim for “super-stars” or having a more unified payroll with less outliers?

In order to so, we will merge the DataFrames for teams in same category of success. To do so we have to exclude last row of each dataframe which is the sum of players payment  

In [273]:
champs_payroll = pd.DataFrame()
for i in range(0,len(champions)):
    if champions[i] in payrolls_noPositions.keys():
        champs_payroll = champs_payroll.append(payrolls_noPositions[champions[i]][:-1])
champs_payroll[:23]

Unnamed: 0,player_names,salary ($)
0,Khris Middleton,33051724
1,Giannis Antetokounmpo,27528088
2,Jrue Holiday,27026011
3,Brook Lopez,12697675
4,PJ Tucker,7969537
5,Pat Connaughton,4938273
6,Bobby Portis,3623000
7,Jon Leuer,3169348
8,Donte DiVincenzo,3044160
9,Bryn Forbes,2337145


In [274]:
top4_payroll = pd.DataFrame()
for i in range(0,len(top4)):
    if top4[i] in payrolls_noPositions.keys():
        top4_payroll = top4_payroll.append(payrolls_noPositions[top4[i]][:-1])

In [275]:
decent_payroll = pd.DataFrame()
for i in range(0,len(decent)):
    if decent[i] in payrolls_noPositions.keys():
        decent_payroll = decent_payroll.append(payrolls_noPositions[decent[i]][:-1])

In [276]:
mid_tear_payroll = pd.DataFrame()
for i in range(0,len(mid_tear)):
    if mid_tear[i] in payrolls_noPositions.keys():
        mid_tear_payroll = mid_tear_payroll.append(payrolls_noPositions[mid_tear[i]][:-1])

In [277]:
low_tear_payroll = pd.DataFrame()
for i in range(0,len(low_tear)):
    if low_tear[i] in payrolls_noPositions.keys():
        low_tear_payroll = low_tear_payroll.append(payrolls_noPositions[low_tear[i]][:-1])

In [278]:
compressed_payrolls = [champs_payroll, top4_payroll,decent_payroll,mid_tear_payroll,low_tear_payroll]
easy_to_read=["champs", "top4    ","decent  ","mid_tear","low_tear"]
for i in range(0,len(compressed_payrolls)):
    if i<len(compressed_payrolls)-1:
        print("        ",easy_to_read[i])
        for j in range(i+1,len(compressed_payrolls)):
            c = compressed_payrolls[i]['salary ($)'].std()
            t = compressed_payrolls[j]['salary ($)'].std()
            print(easy_to_read[j], (c-t), (c-t)/t*100)
        print()

         champs
top4     540035.4630299704 8.538893468772624
decent   990554.8789321026 16.863665135998
mid_tear 1868371.5155008286 37.396721854832265
low_tear 1857171.62421023 37.08940385932693

         top4    
decent   450519.41590213217 7.6698512405790025
mid_tear 1328336.0524708582 26.587546144794842
low_tear 1317136.1611802597 26.304405248767793

         decent  
mid_tear 877816.636568726 17.570094772348003
low_tear 866616.7452781275 17.307123390141484

         mid_tear
low_tear -11199.891290598549 -0.22367199985311928



In [279]:
compressed_payrolls = [champs_payroll, top4_payroll,decent_payroll,mid_tear_payroll,low_tear_payroll]
easy_to_read=["champs", "top4    ","decent  ","mid_tear","low_tear"]
for i in range(0,len(compressed_payrolls)):
    if i<len(compressed_payrolls)-1:
        print("        ",easy_to_read[i])
        for j in range(i+1,len(compressed_payrolls)):
            c = compressed_payrolls[i]['salary ($)'].median()
            t = compressed_payrolls[j]['salary ($)'].median()
            print(easy_to_read[j], (c-t))
        print()

         champs
top4     -26055.5
decent   1305.5
mid_tear -167101.5
low_tear 221061.0

         top4    
decent   27361.0
mid_tear -141046.0
low_tear 247116.5

         decent  
mid_tear -168407.0
low_tear 219755.5

         mid_tear
low_tear 388162.5



#### <u>Conclusion on #2:</u> 
     - According to standard deviations, looks like in order to step up, you need to have a less unified payroll 
     - According to 50% percentile it seems that it is easier to be at top half of paid athletes than the other group of teams.
    If we take into consideration that really bad teams will be rewarded with better prospects at the draft, mid_tear teams is teams that are more likely to stay low for near future

In [149]:
df = payrolls["portland_trail_blazers2019-2020"]
df2 = payrolls[f"{team_names[25]}{years[4]}"]
temp = pd.DataFrame(df["salary ($)"][:-1]+df2["salary ($)"][:-1])
df.describe().describe()

Unnamed: 0,salary ($)
count,8.0
mean,24188730.0
std,44827100.0
min,18.0
25%,1158811.0
50%,7152147.0
75%,18739070.0
max,131980000.0


### 1) How position of "G" has evolved:
         i. Over the years in teams that succeeded
        ii. Over different success groups

In [52]:
#player_name_trans = player_names[k].replace(' ','-')
url_player = f"https://hoopshype.com/player/vince-carter/salary/"
r = requests.get(url_player,timeout=10)
r_html = r.text
soup = BeautifulSoup(r_html, 'html.parser')
htmlText = soup.find_all("span", class_="player-bio-text-line-value")
positionString = str(htmlText[2])
position = positionString.find(">")
column2.append(float(positionString[position+7:position+11]))
print(htmlText)
print(column2)

[<span class="player-bio-text-line-value">-</span>, <span class="player-bio-text-line-value">01/26/77</span>, <span class="player-bio-text-line-value">6-6 / 1.98</span>, <span class="player-bio-text-line-value">220 lbs. / 99.8 kg.</span>, <span class="player-bio-text-line-value">$186,916,012 ($243,570,308*)</span>]
[1.98, 1.98]


In [152]:
payrolls_filled=dict()
kickout_heightbased=[]
dropped_heightbased=[]
counter=0
for i in range(0,len(team_names)):
    for j in range(0,len(years)):
            
        counter=counter+1    
        problematic=False    
        url = f"https://hoopshype.com/salaries/{team_names[i]}/{years[j]}/"
        salary_table = []
        try:
            r = requests.get(url,timeout=20)
            r_html = r.text
            soup = BeautifulSoup(r_html, 'html.parser')
            salary_table = soup.find('table')
        except:
            print("Something wrong")
        length=len(salary_table.find_all("td"))
        player_names=[salary_table.find_all("td")[i].text.strip() for i in range(3,length,3)]
        column1=[salary_table.find_all("td")[i].text.strip() for i in range(4,length,3)]
        
        #Creating the new column for players position via entering each players personal URL in hoopshype
        column2 = []
        for k in range(0,len(player_names)):
            if k == len(player_names)-1:
                column2.append("NaN")
                break
            player_name_trans = player_names[k].replace(' ','-')
            url_player = f"https://hoopshype.com/player/{player_name_trans}/salary/"
            r = requests.get(url_player,timeout=10)
            r_html = r.text
            try:
                soup = BeautifulSoup(r_html, 'html.parser')
                htmlText = soup.find_all("span", class_="player-bio-text-line-value")
                positionString = str(htmlText[0])
                position = positionString.find(">")
                if positionString[position+1]=='-':
                    positionString = str(htmlText[2])
                    height = (float(positionString[position+7:position+11]))
                    if height>=2.06:
                        column2.append('C')
                    elif height>=1.96:
                        column2.append('F')
                    else:
                        column2.append('G')
                else:
                    column2.append(positionString[position+1])
                
            except IndexError as e:
                    kickout_heightbased.append(f"{team_names[i]}{years[j]}")
                    dropped_heightbased.append(f"({i},{j})")
                    print(i,j,e)
                    problematic= True
                    break
            except AttributeError as e1:
                    kickout_heightbased.append(f"{team_names[i]}{years[j]}")
                    dropped_heightbased.append(f"({i},{j})")
                    print(i,j,e1)
                    problematic= True
                    break
            except ValueError as e2:
                    column2.append('-')
            
        if problematic==False:
            salary_table = []
            try:
                r = requests.get(url,timeout=20)
                r_html = r.text
                soup = BeautifulSoup(r_html, 'html.parser')
                salary_table = soup.find('table')
            except:
                print("Something wrong")

            df_dict={'player_names':player_names,
                    'salary ($)':column1,
                        'position':column2}

            tempDataFrame=pd.DataFrame(df_dict)

            tempDataFrame.replace({'\$':''}, regex = True,inplace=True)
            tempDataFrame.replace(',','', regex=True, inplace=True)
            for item in tempDataFrame.columns[1:2]:
                tempDataFrame[item]=pd.to_numeric(tempDataFrame[item])
            if counter%75==0 or counter==1:
                print(counter)
                print(tempDataFrame)
            payrolls_filled[f"{team_names[i]}{years[j]}"] = tempDataFrame

1
               player_names  salary ($) position
0            Damian Lillard    31626953        G
1               CJ McCollum    29354152        G
2              Jusuf Nurkic    12888889        C
3          Robert Covington    12138345        F
4             Norman Powell    10865952        G
5             Derrick Jones     9258000        F
6              Zach Collins     5406255        C
7               Enes Kanter     5005350        F
8          Andrew Nicholson     2844429        C
9           Carmelo Anthony     2564753        F
10          Anfernee Simons     2252040        G
11            Nassir Little     2210640        F
12         Anderson Varejao     1913345        C
13              Harry Giles     1620564        C
14                CJ Elleby      898310        F
15  Rondae Hollis-Jefferson      502957        F
16           Keljin Blevins      449115        G
17                  TJ Leaf      104598        F
18                   Totals   131904647      NaN
0 14 list index ou

ReadTimeout: HTTPSConnectionPool(host='hoopshype.com', port=443): Read timed out. (read timeout=10)