# AUDL PROJECT - DATA SCRAPING 🕷

## I. Data Outline 🕸
- [Player Stats](https://theaudl.com/stats/player-stats) (G, PP, POS, etc.) for each player ✔
- [Team Stats](https://theaudl.com/stats/team) by year X team/opponent
- [Team Game Stats](https://theaudl.com/stats/team-game-stats) - All teams, all seasons
- [Individual Player Stats](https://theaudl.com/league/players)
    - Stats Per Game
    - Height / Weight (if available)
    - Defense / Offense (if available)
    - Handler / Cutter (if available)


In [2]:
# IMPORTS
import pandas as pd
import selenium
import os
import time

from selenium import webdriver

In [5]:
# SETUP FOR SCRAPING

def get_page_results(driver: webdriver.Chrome, webpage: str) -> None:
    """
    Utilize a chrome webdriver to scrape data from an AUDL Stats page
    Save results of page to a CSV.
    input:
        - driver: Chrome webdriver, used as input to keep same driver while looping
    output:
        - None
    """

    driver.get(webpage)
    time.sleep(2)
    # driver.switch_to.frame(0)

    save_file = '.\\DATA\\20220227_player_all_time.csv'

    if not os.path.isfile(save_file):
        df = pd.read_html(driver.page_source)[0]
        df.to_csv(save_file, index=False)
        return None

    df: pd.DataFrame = pd.read_csv(save_file).drop_duplicates()
    all_stats: pd.DataFrame = pd.concat([df, pd.read_html(driver.page_source)[0]])
    all_stats.to_csv(save_file, index=False)
    return None


### Player All-Time Stats

In [11]:
# SCRAPE PLAYER STATS PAGES
num_pages = 130
root_page = 'https://theaudl.com/stats/player-stats?page='

driver = webdriver.Chrome(".\\webdriver\\chromedriver_98.exe")

for i in range(num_pages):
    i_page = root_page + str(i+1) # +1 because range() starts @ 0

    get_page_results(driver=driver, webpage=i_page)

driver.close()
    


  driver = webdriver.Chrome(".\\webdriver\\chromedriver_98.exe")


In [12]:
save_file = '.\\DATA\\20220227_player_all_time.csv'
df = pd.read_csv(save_file)
print(f"SHAPE: {df.shape}")

df.sample(20)

SHAPE: (2558, 26)


Unnamed: 0,Player,G,PP,POS,SCR,AST,GLS,BLK,+/- ▼,Cmp,...,T,S,D,C,Hck,Hck%,Pul,OPP,DPP,MP
445,Nick Vogt,12,237,270,47,12,35,12,55,148,...,3,0,1,1,2,--,0,205,32,241
404,Ryan Purcell,47,1024,1224,156,124,32,20,60,1389,...,106,2,10,0,1,--,19,690,334,978
65,Ethan Beardsley,60,1395,1794,185,18,167,47,193,413,...,13,0,26,0,0,--,1,1071,324,1307
795,Joe Becker,21,285,167,47,18,29,7,28,87,...,19,0,7,0,0,--,2,227,58,114
2159,Jacob Shoyer,1,4,2,0,0,0,0,0,0,...,0,0,0,0,0,--,0,0,4,13
326,Kevin Underhill,18,501,589,100,60,40,14,73,778,...,40,4,1,0,0,--,29,379,122,452
725,Lance Blackstock,17,288,230,29,11,18,12,31,57,...,6,0,4,0,0,--,31,35,253,240
1542,Ted Schewe,13,145,148,18,5,13,1,7,65,...,9,0,3,0,3,--,0,77,68,184
1786,Steven Garlok,4,53,40,3,2,1,2,3,13,...,0,0,2,0,0,--,0,3,50,48
122,Chris Mazur,49,1065,1112,240,176,64,30,143,1389,...,117,2,10,0,5,--,162,762,303,892


### 