# NBA 2K23 Game - ETL
* Scraping the raw data from 2kratings.com
* Extracting and cleaning it
* `pandas`, `re`, `BeautifulSoup`, `selenium`

## Extraction

In [2]:
import pandas as pd
import requests
import re
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from webdriver_manager.chrome import ChromeDriverManager

In [45]:
def scrape_site(url):
    options = Options()
    options.page_load_strategy='none'
    options.add_argument("start-maximized")
    options.add_argument("enable-automation")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--disable-browser-side-navigation")
    options.add_argument("--disable-gpu")
    
    driver = webdriver.Chrome(ChromeDriverManager().install(), options=options)
    driver.get(url)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    driver.quit()
    return soup

In [46]:
teams_url = r'https://www.2kratings.com/current-teams/'

soup_teams = scrape_site(teams_url)
teams_raw = soup_teams.body.find_all('tbody')[0].find_all('tr')

teams = []
for team in teams_raw:
    teams.append([team.a['href'], str(team.a.text).strip()])
teams

[['https://www.2kratings.com/teams/atlanta-hawks', 'Atlanta Hawks'],
 ['https://www.2kratings.com/teams/boston-celtics', 'Boston Celtics'],
 ['https://www.2kratings.com/teams/brooklyn-nets', 'Brooklyn Nets'],
 ['https://www.2kratings.com/teams/charlotte-hornets', 'Charlotte Hornets'],
 ['https://www.2kratings.com/teams/chicago-bulls', 'Chicago Bulls'],
 ['https://www.2kratings.com/teams/cleveland-cavaliers',
  'Cleveland Cavaliers'],
 ['https://www.2kratings.com/teams/dallas-mavericks', 'Dallas Mavericks'],
 ['https://www.2kratings.com/teams/denver-nuggets', 'Denver Nuggets'],
 ['https://www.2kratings.com/teams/detroit-pistons', 'Detroit Pistons'],
 ['https://www.2kratings.com/teams/golden-state-warriors',
  'Golden State Warriors'],
 ['https://www.2kratings.com/teams/houston-rockets', 'Houston Rockets'],
 ['https://www.2kratings.com/teams/indiana-pacers', 'Indiana Pacers'],
 ['https://www.2kratings.com/teams/los-angeles-clippers',
  'Los Angeles Clippers'],
 ['https://www.2kratings.co

In [26]:
data = {}
for team in teams:
    team_url = team[0]
    team_name = team[1]
    
    soup_roster = scrape_site(team_url)
    roster_raw = soup_roster.find_all('tbody')[0].find_all('span', class_='entry-font')
    
    roster = []
    for player in roster_raw:
        player_url = player.a['href']
        player_name = player.a.text
        roster.append([r'%s' % str(player_url), str(player_name)])
        
    for player in roster:
        player_team = team_name
        player_name = player[1]
        player_url = player[0]
        
        soup_attributes = scrape_site(player_url)
        
        per_player_dict = {}
        
        per_player_dict["Name"] = player_name
        per_player_dict["Team"] = player_team
        per_player_dict["Raw Data"] = soup_attributes
        
        data[player_url] = per_player_dict
        
print('Done!')        

Done!


In [149]:
raw_data_df = pd.DataFrame.from_dict(data, orient='index').rename_axis('url')
raw_data_df['Raw Data'] = raw_data_df['Raw Data'].apply(lambda x: str(x))
raw_data_df.to_pickle('raw_data.pkl')

## Transformation

In [99]:
raw_data_df = pd.read_pickle('raw_data.pkl')

def str_to_bs4(x):
    html_soup = BeautifulSoup(x, 'html.parser')
    return html_soup

raw_data_df["Raw Data"] = raw_data_df["Raw Data"].apply(lambda x: str_to_bs4(x))

raw_data_df

Unnamed: 0_level_0,Name,Team,Raw Data
url,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
https://www.2kratings.com/trae-young,Trae Young,Atlanta Hawks,"[[[<meta charset=""utf-8""/>, <meta content=""IE=..."
https://www.2kratings.com/dejounte-murray,Dejounte Murray,Atlanta Hawks,"[[[<meta charset=""utf-8""/>, <meta content=""IE=..."
https://www.2kratings.com/clint-capela,Clint Capela,Atlanta Hawks,"[[[<meta charset=""utf-8""/>, <meta content=""IE=..."
https://www.2kratings.com/onyeka-okongwu,Onyeka Okongwu,Atlanta Hawks,"[[[<meta charset=""utf-8""/>, <meta content=""IE=..."
https://www.2kratings.com/john-collins,John Collins,Atlanta Hawks,"[[[<meta charset=""utf-8""/>, <meta content=""IE=..."
...,...,...,...
https://www.2kratings.com/anthony-gill,Anthony Gill,Washington Wizards,"[[[<meta charset=""utf-8""/>, <meta content=""IE=..."
https://www.2kratings.com/xavier-cooks,Xavier Cooks,Washington Wizards,"[[[<meta charset=""utf-8""/>, <meta content=""IE=..."
https://www.2kratings.com/isaiah-todd-2,Isaiah Todd,Washington Wizards,"[[[<meta charset=""utf-8""/>, <meta content=""IE=..."
https://www.2kratings.com/quenton-jackson,Quenton Jackson,Washington Wizards,"[[[<meta charset=""utf-8""/>, <meta content=""IE=..."


In [100]:
raw_data_dict = raw_data_df.to_dict(orient='index')

In [101]:
final_dict = {}
for player_url in raw_data_dict.keys():
    player_name = raw_data_dict[player_url]['Name']
    player_team = raw_data_dict[player_url]['Team']
    scrape_date = pd.to_datetime('today').strftime('%Y-%m-%d')
    
    raw_data = raw_data_dict[player_url]['Raw Data']
    
    per_player_dict = {}
    per_player_dict['Name'] = player_name
    per_player_dict['Team'] = player_team
    per_player_dict['scrape_date'] = scrape_date
    per_player_dict['Overall Rating'] = raw_data.find_all('span', class_='attribute-box-player')[0].text
                
    ######################## Header Subtitle ########################

    player_info = raw_data.find_all('div', class_='header-subtitle')[0].find_all('p', class_='mb-0')
    for info in player_info:
        text = info.text
        if 'Nationality' in text:
            per_player_dict['Nationality'] = text.split(':')[-1].strip()
        if 'Archetype' in text:
            per_player_dict['Archetype'] = text.split(':')[-1].strip()
        if 'Position' in text:
            per_player_dict['Position'] = text.split(':')[-1].strip()
        if 'Height' and 'Weight' in text:
            height, weight = text.split('|')[0].strip().split(':')[-1].strip(), text.split('|')[-1].split(':')[-1].strip()
            per_player_dict['Height'] = height
            per_player_dict['Weight'] = weight
        if 'Jersey' in text:
            per_player_dict['Jersey'] = text.split('#')[-1].strip()
        if 'in the NBA' in text:
            per_player_dict['Years in NBA'] = text.split(':')[-1].strip()
        if 'Prior to' in text:
            per_player_dict['Prior to NBA'] = text.split(':')[-1].strip()



    ############################# About #############################

    player_about = raw_data.find_all('div', class_='card-body py-3 mb-1')[0].text

    def regex_search(text, regex):
        regex_match = re.search(regex, text)
        if regex_match:
            return regex_match.group(0)
        else:
            return pd.NA

    player_undrafted_regex = r"undrafted"
    player_draft_year_regex = r"\b\d{4}\b"
    player_draft_pick_num_regex = r"\b\d+(?=th|st|nd|rd)"
    player_salary_regex = r"(\$[\d,]+)"
    player_age_regex = r"\b\d+(?=-year)"

    player_salary = regex_search(player_about, player_salary_regex)
    player_age = regex_search(player_about, player_age_regex)

    player_undrafted = regex_search(player_about, player_undrafted_regex)

    if pd.notna(player_undrafted):
        player_draft_year = "Undrafted"
        player_draft_pick_num = pd.NA
    else:
        player_draft_year = regex_search(player_about, player_draft_year_regex)
        player_draft_pick_num = regex_search(player_about, player_draft_pick_num_regex)

    per_player_dict['Salary'] = player_salary
    per_player_dict['Age'] = player_age
    per_player_dict['Draft Year'] = player_draft_year
    per_player_dict['Draft Pick Number'] = player_draft_pick_num



    ########################## Attributes ###########################

    attributes_raw = raw_data.find_all('div', class_='row mr-md-n4')[0].find_all('li')
    for attribute in attributes_raw:
        text = attribute.text
        skill_name = text[3:]
        skill_level = text[:2]
        per_player_dict[skill_name] = int(skill_level)



    ############################ Badges ############################

    badge_figure_rawer = raw_data.find_all('div', class_='badge-card')
    badge_figure_raw = badge_figure_rawer[:int(len(badge_figure_rawer)/2)] # remove duplicates

    badges = {}
    for badge_raw in badge_figure_raw:
        name = badge_raw.find_all('h4')[0].text    
        category = badge_raw.find_all('span', class_='badge-pill')[0].text
        description = badge_raw.find_all('p', class_='badge-description')[0].text
        level_raw = badge_raw.find_all('img', class_='card-img')[0]['data-src']
        level = ''
        if '_bronze.' in level_raw:
            level = 'Bronze'
        elif '_silver.' in level_raw:
            level = 'Silver'
        elif '_gold.' in level_raw:
            level = 'Gold'
        elif 'hof.' in level_raw:
            level = 'Hall of Fame'

        badges[name] = {'Badge Category': category, 
                       'Badge Description': description, 
                       'Badge Level': level}

    per_player_dict['Badges'] = badges

    ################################################################

    final_dict[player_url] = per_player_dict

df = pd.DataFrame.from_dict(final_dict, orient='index').rename_axis('url')
df

Unnamed: 0_level_0,Name,Team,scrape_date,Overall Rating,Nationality,Archetype,Position,Height,Weight,Jersey,...,Perimeter Defense,Steal,Block,Lateral Quickness,Help Defense IQ,Pass Perception,Defensive Consistency,Offensive Rebound,Defensive Rebound,Badges
url,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
https://www.2kratings.com/trae-young,Trae Young,Atlanta Hawks,2023-04-20,89,United States,Offensive Threat,PG,"6'1"" (185cm)",164lbs (74kg),11,...,55,43,32,70,58,70,30,37,43,{'Agent 3': {'Badge Category': 'Shooting Badge...
https://www.2kratings.com/dejounte-murray,Dejounte Murray,Atlanta Hawks,2023-04-20,84,United States,2-Way Playshot,PG,"6'5"" (196cm)",180lbs (81kg),5,...,86,59,40,87,85,89,85,36,62,{'Ankle Braces': {'Badge Category': 'DEF/REB B...
https://www.2kratings.com/clint-capela,Clint Capela,Atlanta Hawks,2023-04-20,83,Switzerland,Paint Beast,C,"6'10"" (208cm)",256lbs (115kg),15,...,53,40,78,60,86,74,85,93,90,{'Post Move Lockdown': {'Badge Category': 'DEF...
https://www.2kratings.com/onyeka-okongwu,Onyeka Okongwu,Atlanta Hawks,2023-04-20,81,United States,2-Way Inside-the-Arc Scorer,C / PF,"6'9"" (206cm)",240lbs (108kg),17,...,64,42,84,69,83,65,65,83,83,{'Aerial Wizard': {'Badge Category': 'Finishin...
https://www.2kratings.com/john-collins,John Collins,Atlanta Hawks,2023-04-20,80,United States,Inside the Arc Scorer,PF / C,"6'9"" (206cm)",226lbs (102kg),20,...,49,37,71,57,74,38,60,43,77,{'Aerial Wizard': {'Badge Category': 'Finishin...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
https://www.2kratings.com/anthony-gill,Anthony Gill,Washington Wizards,2023-04-20,72,United States,Interior Finisher,PF,"6'8"" (203cm)",230lbs (104kg),16,...,60,32,56,50,50,58,50,56,47,{}
https://www.2kratings.com/xavier-cooks,Xavier Cooks,Washington Wizards,2023-04-20,71,Australia,Hybrid Defender,SF / PF,"6'8"" (203cm)",183lbs (82kg),,...,70,52,54,71,72,67,65,64,70,{}
https://www.2kratings.com/isaiah-todd-2,Isaiah Todd,Washington Wizards,2023-04-20,70,United States,Lob Threat,PF,"6'9"" (206cm)",219lbs (99kg),14,...,52,35,50,65,58,54,40,55,59,{}
https://www.2kratings.com/quenton-jackson,Quenton Jackson,Washington Wizards,2023-04-20,68,,Perimeter Defender,SG / PG,"6'5"" (196cm)",175lbs (79kg),29,...,73,70,40,78,70,75,65,46,42,{}


#### TODO:
* ~~find out column by column where there are null values, and document/fix it~~
* ~~make position the primary position only~~
* ~~make nationality the primary nationality only~~
* ~~split height column to (1) ft and inches column ("height") (2) cm column ("height (cm)")~~
* ~~make weight kg only~~
* ~~transform all columns to their proper type~~
* ~~make a table for badges (with description and type)~~


In [150]:
df['Age']['https://www.2kratings.com/trae-young'] = 24
df['Age']['https://www.2kratings.com/john-collins'] = 25
df['Age']['https://www.2kratings.com/deandre-hunter'] = 25
df['Age']['https://www.2kratings.com/bruno-fernando'] = 24
df['Age']['https://www.2kratings.com/eugene-omoruyi'] = 26
df['Age']['https://www.2kratings.com/trevor-hudgins'] = 24
df['Age']['https://www.2kratings.com/dewayne-dedmon'] = 33
df['Age']['https://www.2kratings.com/cameron-payne'] = 28
df['Age']['https://www.2kratings.com/cameron-reddish'] = 23
df['Age']['https://www.2kratings.com/kevin-huerter'] = 24
df['Age']['https://www.2kratings.com/damian-jones'] = 27
df['Age']['https://www.2kratings.com/saddiq-bey'] = 24

df['Nationality']['https://www.2kratings.com/jordan-goodwin'] = 'United States'
df['Nationality']['https://www.2kratings.com/quenton-jackson'] = 'United States'

df['Jersey']['https://www.2kratings.com/raiquan-gray'] = 0
df['Jersey']['https://www.2kratings.com/tyty-washington'] = 0
df['Jersey']['https://www.2kratings.com/chance-comanche'] = 35
df['Jersey']['https://www.2kratings.com/xavier-cooks'] = 12

df['Years in NBA']['https://www.2kratings.com/anthony-lamb'] = 2
df['Years in NBA']['https://www.2kratings.com/omer-yurtseven'] = 1
df['Years in NBA']['https://www.2kratings.com/lindell-wigginton'] = 1
df['Years in NBA']['https://www.2kratings.com/kevon-harris'] = 1
df['Years in NBA']['https://www.2kratings.com/jeff-dowtin'] = 1

df['Salary']['https://www.2kratings.com/tyrese-martin'] = '$1,017,781'
df['Salary']['https://www.2kratings.com/donovan-williams'] = '$239,822'
df['Salary']['https://www.2kratings.com/raiquan-gray'] = '$5,849'
df['Salary']['https://www.2kratings.com/dru-smith'] = '$374,357'
df['Salary']['https://www.2kratings.com/dennis-smith-jr'] = '$2,133,278'
df['Salary']['https://www.2kratings.com/kobi-simmons'] = '$32,795'
df['Salary']['https://www.2kratings.com/carlik-jones'] = '$612,162'
df['Salary']['https://www.2kratings.com/sam-merrill'] = '$1,105,522'
df['Salary']['https://www.2kratings.com/mamadi-diakite'] = '$508,891'
df['Salary']['https://www.2kratings.com/eugene-omoruyi'] = '$1,013,119'
df['Salary']['https://www.2kratings.com/jared-rhoden'] = '$307,089'
df['Salary']['https://www.2kratings.com/andre-iguodala'] = '$2,905,851'
df['Salary']['https://www.2kratings.com/willie-cauley-stein'] = '$155,738'
df['Salary']['https://www.2kratings.com/dj-augustin'] = '$633,938'
df['Salary']['https://www.2kratings.com/james-johnson'] = '$3,841,067'
df['Salary']['https://www.2kratings.com/kendall-brown'] = '$508,891'
df['Salary']['https://www.2kratings.com/gabe-york'] = '$32,171'
df['Salary']['https://www.2kratings.com/moussa-diabate'] = '$508,891'
df['Salary']['https://www.2kratings.com/dennis-schroder'] = '$2,641,682'
df['Salary']['https://www.2kratings.com/shaquille-harrison'] = '$134,862'
df['Salary']['https://www.2kratings.com/scotty-pippen-jr'] = '$508,891'
df['Salary']['https://www.2kratings.com/jacob-gilyard'] = '$5,849'
df['Salary']['https://www.2kratings.com/cody-zeller'] = '$743,922'
df['Salary']['https://www.2kratings.com/orlando-robinson'] = '$386,055'
df['Salary']['https://www.2kratings.com/meyers-leonard'] = '$713,558'
df['Salary']['https://www.2kratings.com/ej-liddell'] = '$508,891'
df['Salary']['https://www.2kratings.com/dereon-seabron'] = '$511,816'
df['Salary']['https://www.2kratings.com/daquan-jeffries'] = '$799,403'
df['Salary']['https://www.2kratings.com/olivier-sarr'] = '$90,665'
df['Salary']['https://www.2kratings.com/michael-carter-williams'] = '$652,829'
df['Salary']['https://www.2kratings.com/jay-scrubb'] = '$49,719'
df['Salary']['https://www.2kratings.com/louis-king'] = '$307,089'
df['Salary']['https://www.2kratings.com/jeenathan-williams'] = '$52,644'
df['Salary']['https://www.2kratings.com/justin-minaya'] = '$35,096'
df['Salary']['https://www.2kratings.com/chance-comanche'] = '$26,000'
df['Salary']['https://www.2kratings.com/ibou-badji'] = '$18,226'
df['Salary']['https://www.2kratings.com/john-butler'] = '$503,041'
df['Salary']['https://www.2kratings.com/pj-dozier'] = '$784,654'
df['Salary']['https://www.2kratings.com/sandro-mamukelashvili'] = '$758,395'
df['Salary']['https://www.2kratings.com/christian-koloko'] = '$1,500,000'
df['Salary']['https://www.2kratings.com/kris-dunn'] = '$1,000,001'
df['Salary']['https://www.2kratings.com/luka-samanic'] = '$538,317'
df['Salary']['https://www.2kratings.com/micah-potter'] = '$508,891'
df['Salary']['https://www.2kratings.com/jordan-goodwin'] = '$1,280,206'
df['Salary']['https://www.2kratings.com/xavier-cooks'] = '$140,384'
df['Salary']['https://www.2kratings.com/quenton-jackson'] = '$219,630'

In [105]:
df['Position'] = df['Position'].apply(lambda x: x.split(' ')[0])

In [106]:
df['Nationality'] = df['Nationality'].apply(lambda x: x.split('/')[0].strip())

In [107]:
df['Height (cm)'] = df['Height'].apply(lambda x: x.split('(')[-1][:-3])

In [108]:
df['Height'] = df['Height'].apply(lambda x: x.split(' (')[0])

In [109]:
df['Weight'] = df['Weight'].apply(lambda x: x.split(' (')[-1][:-3])

In [115]:
df['Salary'] = df['Salary'].apply(lambda x: int(x.replace(',', '')[1:]))

In [116]:
df

Unnamed: 0_level_0,Name,Team,scrape_date,Overall Rating,Nationality,Archetype,Position,Height,Weight,Jersey,...,Steal,Block,Lateral Quickness,Help Defense IQ,Pass Perception,Defensive Consistency,Offensive Rebound,Defensive Rebound,Badges,Height (cm)
url,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
https://www.2kratings.com/trae-young,Trae Young,Atlanta Hawks,2023-04-20,89,United States,Offensive Threat,PG,"6'1""",74,11,...,43,32,70,58,70,30,37,43,{'Agent 3': {'Badge Category': 'Shooting Badge...,185
https://www.2kratings.com/dejounte-murray,Dejounte Murray,Atlanta Hawks,2023-04-20,84,United States,2-Way Playshot,PG,"6'5""",81,5,...,59,40,87,85,89,85,36,62,{'Ankle Braces': {'Badge Category': 'DEF/REB B...,196
https://www.2kratings.com/clint-capela,Clint Capela,Atlanta Hawks,2023-04-20,83,Switzerland,Paint Beast,C,"6'10""",115,15,...,40,78,60,86,74,85,93,90,{'Post Move Lockdown': {'Badge Category': 'DEF...,208
https://www.2kratings.com/onyeka-okongwu,Onyeka Okongwu,Atlanta Hawks,2023-04-20,81,United States,2-Way Inside-the-Arc Scorer,C,"6'9""",108,17,...,42,84,69,83,65,65,83,83,{'Aerial Wizard': {'Badge Category': 'Finishin...,206
https://www.2kratings.com/john-collins,John Collins,Atlanta Hawks,2023-04-20,80,United States,Inside the Arc Scorer,PF,"6'9""",102,20,...,37,71,57,74,38,60,43,77,{'Aerial Wizard': {'Badge Category': 'Finishin...,206
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
https://www.2kratings.com/anthony-gill,Anthony Gill,Washington Wizards,2023-04-20,72,United States,Interior Finisher,PF,"6'8""",104,16,...,32,56,50,50,58,50,56,47,{},203
https://www.2kratings.com/xavier-cooks,Xavier Cooks,Washington Wizards,2023-04-20,71,Australia,Hybrid Defender,SF,"6'8""",82,12,...,52,54,71,72,67,65,64,70,{},203
https://www.2kratings.com/isaiah-todd-2,Isaiah Todd,Washington Wizards,2023-04-20,70,United States,Lob Threat,PF,"6'9""",99,14,...,35,50,65,58,54,40,55,59,{},206
https://www.2kratings.com/quenton-jackson,Quenton Jackson,Washington Wizards,2023-04-20,68,United States,Perimeter Defender,SG,"6'5""",79,29,...,70,40,78,70,75,65,46,42,{},196


In [119]:
df['Overall Rating'] = df['Overall Rating'].apply(lambda x: int(x))

In [121]:
df['Weight'] = df['Weight'].apply(lambda x: int(x))

In [125]:
df['Age'] = df['Age'].apply(lambda x: int(x))

In [123]:
df['Years in NBA'] = df['Years in NBA'].apply(lambda x: int(x))

In [151]:
mask = df['Draft Pick Number'].notna()
df['Draft Pick Number'][mask] = df['Draft Pick Number'][mask].apply(lambda x: int(x))

In [133]:
df.dtypes

Name                     object
Team                     object
scrape_date              object
Overall Rating            int64
Nationality              object
Archetype                object
Position                 object
Height                   object
Weight                    int64
Jersey                   object
Years in NBA              int64
Prior to NBA             object
Salary                    int64
Age                       int64
Draft Year               object
Draft Pick Number        object
Close Shot                int64
Mid-Range Shot            int64
Three-Point Shot          int64
Free Throw                int64
Shot IQ                   int64
Offensive Consistency     int64
Speed                     int64
Acceleration              int64
Strength                  int64
Vertical                  int64
Stamina                   int64
Hustle                    int64
Overall Durability        int64
Layup                     int64
Standing Dunk             int64
Driving 

In [138]:
badges_parsed = [dict(zip(d.keys(), [[v['Badge Description'], v['Badge Category']] for v in d.values()])) for d in df['Badges']]

In [139]:
badges = {}
for player_badges in badges_parsed:
    for badge in player_badges:
        badges[badge] = {'Description': player_badges[badge][0], 'Category': player_badges[badge][1]}

In [141]:
badges_df = pd.DataFrame.from_dict(badges, orient='index').rename_axis('Name').sort_values(by=['Category', 'Name'])

In [142]:
badges_df

Unnamed: 0_level_0,Description,Category
Name,Unnamed: 1_level_1,Unnamed: 2_level_1
Anchor,"A crucial badge for last line of defense bigs,...",DEF/REB Badge
Ankle Braces,Reduces the chances of getting crossed over,DEF/REB Badge
Boxout Beast,Helps rebounders win boxout battles on both of...,DEF/REB Badge
Brick Wall,Increases effectiveness of screens and drains ...,DEF/REB Badge
Challenger,Improves the effectiveness of perimeter shot c...,DEF/REB Badge
...,...,...
Limitless Range,Boosts the range that one can effectively shoo...,Shooting Badge
Middy Magician,Improved ability to knock down mid-range jumpe...,Shooting Badge
Slippery Off Ball,Strengthens the player's ability to get the op...,Shooting Badge
Space Creator,"Ability to hit stepback jumpers and hop shots,...",Shooting Badge


In [152]:
df.to_pickle('nba2k23_data.pkl')
badges_df.to_pickle('nba2k23_badges.pkl')

In [153]:
df.to_csv('nba2k23_data.csv')
badges_df.to_csv('nba2k23_badges.csv')