In [14]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
from io import StringIO

In [32]:
def scrape_maxpreps(state_code:str, side:str, category: str) -> pd.DataFrame:
    """Scrapes maxpreps site for a specified state, side(offense, defense, special_teams), and category (passing, rushing, receiving, etc"""
    # argument processing
    state_code = str.lower(state_code)
    assert side in ["offense", "defense", "special-teams"]
    
    # HTTP request and BeautifulSoup initialisation
    resp = requests.get(f"https://www.maxpreps.com/{state_code}/football/stat-leaders/{side}/{category}/yds/")
    resp.raise_for_status()
    bs = BeautifulSoup(resp.text)
    
    names = [] # Names and schools need to be separated
    schools = []
    for tag in bs.find_all('a', attrs={'class': "sc-63c3c392-2 ctOOgc"}):
        names.append(tag.text)
    for tag in bs.find_all('a', attrs={'class':"sc-63c3c392-2 ctOOgc school"}):
        schools.append(tag.text)
        
    df = pd.read_html(StringIO(resp.text))[0]
    df['Name'] = names[2:]
    df["School"] = schools
    df.drop(['#'], axis=1, inplace = True)
    return df
    
# TODO: accomodate for multiple pages: MAKE NEW FUNCTION

In [36]:
scrape_maxpreps("il", "offense", "passing")

Unnamed: 0,Name,Yds,Yds/G,Comp,Att,Pct,TD,Int,Rate,GP,School
0,Callahan Roper,1688,281.3,108,181,0.597,23,8,112,6,Marion
1,Ryne Sheppard,1599,266.5,92,117,0.786,24,0,159,6,Maroa-Forsyth (Maroa)
2,Brody Scheffler,1543,257.2,88,138,0.638,20,2,135,6,Springfield
3,Jake Nawrot,1537,256.2,88,118,0.746,23,0,156,6,HHS (Arlington Heights)
4,Jaxon Bridges,1502,250.3,122,190,0.642,13,6,98,6,Effingham
5,Matthew Gipson,1474,245.7,60,107,0.561,16,3,129,6,GCHS (Fox Lake)
6,Brock Phillip,1426,237.7,75,144,0.521,16,3,115,6,St. Francis (Wheaton)
7,Keller Stocks,1382,230.3,85,134,0.634,19,2,131,6,Mt. Zion
8,Kenyonte Louis,1376,229.3,64,99,0.646,16,2,139,6,Kenwood (Chicago)
9,Hunter Hochbaum,1365,227.5,91,149,0.611,14,3,114,6,Mater Dei (Breese)
