In [45]:
years = list(range(2020, 2023))

In [46]:
years

[2020, 2021, 2022]

In [47]:
URL_BASE = "https://www.basketball-reference.com/leagues/NBA_{}_totals.html"

In [48]:
from playwright.async_api import async_playwright
import pandas as pd
import time

In [49]:
async def get_page_html(url):
    async with async_playwright() as p:
        browser = await p.chromium.launch()
        page = await browser.new_page()
        await page.goto(url)
        print(await page.title())
        html = await page.inner_html("#all_totals_stats")
    return html

In [50]:
async def get_stats(years):
    dfs = []
    for year in years:
        url = URL_BASE.format(year)
        html = await get_page_html(url)
        df = pd.read_html(html)[0]
        df["Season"] = year
        dfs.append(df)
        time.sleep(10)
    return pd.concat(dfs)

In [51]:
stats = await get_stats(years)

2019-20 NBA Player Stats: Totals | Basketball-Reference.com
2020-21 NBA Player Stats: Totals | Basketball-Reference.com
2021-22 NBA Player Stats: Totals | Basketball-Reference.com


In [52]:
stats

Unnamed: 0,Rk,Player,Pos,Age,Tm,G,GS,MP,FG,FGA,...,ORB,DRB,TRB,AST,STL,BLK,TOV,PF,PTS,Season
0,1,Steven Adams,C,26,OKC,63,63,1680,283,478,...,207,376,583,146,51,67,94,122,684,2020
1,2,Bam Adebayo,PF,22,MIA,72,72,2417,440,790,...,176,559,735,368,82,93,204,182,1146,2020
2,3,LaMarcus Aldridge,C,34,SAS,53,53,1754,391,793,...,103,289,392,129,36,87,74,128,1001,2020
3,4,Kyle Alexander,C,23,MIA,2,0,13,1,2,...,2,1,3,0,0,0,1,1,2,2020
4,5,Nickeil Alexander-Walker,SG,21,NOP,47,1,591,98,266,...,9,75,84,89,17,8,54,57,267,2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
837,601,Thaddeus Young,PF,33,TOR,26,0,475,67,144,...,40,75,115,45,31,11,22,43,164,2022
838,602,Trae Young,PG,23,ATL,76,76,2652,711,1544,...,50,234,284,737,72,7,303,128,2155,2022
839,603,Omer Yurtseven,C,23,MIA,56,12,706,130,247,...,85,209,294,49,17,20,41,84,299,2022
840,604,Cody Zeller,C,29,POR,27,0,355,51,90,...,50,75,125,22,8,6,19,56,140,2022


In [53]:
summary = stats[["Player", "Age", "Tm", "Season"]].groupby("Player", group_keys=False).agg(['count', lambda x: len(x.unique()), 'min'])

In [54]:
summary

Unnamed: 0_level_0,Age,Age,Age,Tm,Tm,Tm,Season,Season,Season
Unnamed: 0_level_1,count,<lambda_0>,min,count,<lambda_0>,min,count,<lambda_0>,min
Player,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Aaron Gordon,5,3,24,5,3,DEN,5,3,2020
Aaron Henry,1,1,22,1,1,PHI,1,1,2022
Aaron Holiday,5,3,23,5,4,IND,5,3,2020
Aaron Nesmith,2,2,21,2,1,BOS,2,2,2021
Aaron Wiggins,1,1,23,1,1,OKC,1,1,2022
...,...,...,...,...,...,...,...,...,...
Zeke Nnaji,2,2,20,2,1,DEN,2,2,2021
Zhaire Smith,1,1,20,1,1,PHI,1,1,2020
Ziaire Williams,1,1,20,1,1,MEM,1,1,2022
Zion Williamson,2,2,19,2,1,NOP,2,2,2020


In [55]:
summary.columns = ["count", "_1", "age", "_2", "team_count", "team", "_3", "season_count", "first_season"]

In [56]:
del summary["_1"]
del summary["_2"]
del summary["_3"]

In [57]:
summary

Unnamed: 0_level_0,count,age,team_count,team,season_count,first_season
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Aaron Gordon,5,24,3,DEN,3,2020
Aaron Henry,1,22,1,PHI,1,2022
Aaron Holiday,5,23,4,IND,3,2020
Aaron Nesmith,2,21,1,BOS,2,2021
Aaron Wiggins,1,23,1,OKC,1,2022
...,...,...,...,...,...,...
Zeke Nnaji,2,20,1,DEN,2,2021
Zhaire Smith,1,20,1,PHI,1,2020
Ziaire Williams,1,20,1,MEM,1,2022
Zion Williamson,2,19,1,NOP,2,2020


In [58]:
summary = summary[(summary["team_count"] == 1) & (summary["season_count"] > 1)]

In [59]:
summary

Unnamed: 0_level_0,count,age,team_count,team,season_count,first_season
Player,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Aaron Nesmith,2,21,1,BOS,2,2021
Adam Mokoka,2,21,1,CHI,2,2020
Aleksej Pokusevski,2,19,1,OKC,2,2021
Alen Smailagić,2,19,1,GSW,2,2020
Amir Coffey,3,22,1,LAC,3,2020
...,...,...,...,...,...,...
Will Barton,3,29,1,DEN,3,2020
Xavier Tillman Sr.,2,22,1,MEM,2,2021
Zach LaVine,3,24,1,CHI,3,2020
Zeke Nnaji,2,20,1,DEN,2,2021


In [60]:
valid_players = pd.Series(summary.index)

In [62]:
from bing_image_downloader import downloader

In [65]:
downloader.download("Bradley Beal", limit=50,  output_dir='images', adult_filter_off=False, timeout=60, filter="photo")

[%] Downloading Images to /Users/vik/DataScience/project-walkthroughs/team_identifier/images/Bradley Beal


[!!]Indexing page: 1

[%] Indexed 8 Images on Page 1.


[%] Downloading Image #1 from https://wallpapercave.com/wp/wp2312261.jpg
[%] File Downloaded !

[%] Downloading Image #2 from https://img3.nickiswift.com/img/gallery/the-truth-about-bradley-beals-wife/l-intro-1614259789.jpg
[%] File Downloaded !

[%] Downloading Image #3 from http://fabwags.com/wp-content/uploads/2017/02/Bradley_Beal_girlfriend_Kamiah_Adams-pic.jpg
[%] File Downloaded !

[%] Downloading Image #4 from http://cdn.chatsports.com/thumbnails/3549-60627-original.jpeg
[%] File Downloaded !

[%] Downloading Image #5 from https://bodyartguru.com/wp-content/uploads/2020/11/Rauw-Wrist-Tattoo.jpg
[%] File Downloaded !

[%] Downloading Image #6 from https://blacksportsonline.com/wp-content/uploads/2014/10/Brooke-Hesson.jpg
[%] File Downloaded !

[%] Downloading Image #7 from http://www.playerwives.com/wp-content/uploads/

KeyboardInterrupt: 