In [2]:
# import needed libraries
import pandas as pd
from audl.stats.endpoints.gamestats import GameStats
from audl.stats.endpoints.seasonschedule import SeasonSchedule
from audl.stats.endpoints import playerstats, playerprofile
import numpy as np
from bs4 import BeautifulSoup
import requests
from tqdm import tqdm

In [11]:
def get_player_career_per_game_stats(filename: str):
    """
    Downloads a csv file of all career player per game stats from the AUDL/UFA
    Args:
        filename (str): the filename of the csv file to be created"""
    
    # get all career player per game stats
    stats = playerstats.PlayerStats('career', 'game', 'all')
    
    # download as csv file
    stats.download_stats_as_dataframe(filename)
    
def scrape_data(df: pd.DataFrame):
    
    position_list = []
    height_list = []
    weight_list = []
    i=0

    for playerID in tqdm(df['playerID']):
        i += 1

        url = "https://watchufa.com/league/players/" + playerID

        # Send a GET request to the Audl page
        response = requests.get(url)

        # Check if the request was successful
        if response.status_code == 200:
            # Parse the HTML content
            soup = BeautifulSoup(response.content, "lxml")

            position_info = soup.find_all('div', class_ = "audl-player-current-team-position")
            if len(position_info) > 0 and '/' in position_info[0].get_text():
                position_list.append(position_info[0].get_text().split('/')[1].strip())

            else:
                position_list.append(None)

            height_weight_info = soup.find_all('span', class_='audl-personal-stats-value')

            if len(height_weight_info) > 0 and height_weight_info[0].get_text()[0].isnumeric():
                height = height_weight_info[0].get_text().replace('"', '')
                height = height.replace("'", " ")
                height_parts = height.split(" ")

                if len(height_parts) == 2:
                    try:
                        feet, inches = map(int, height_parts)
                        total_inches = feet * 12 + inches
                        height_list.append(total_inches)
                    except ValueError:
                        # Handle the case where the conversion to int fails
                        height_list.append(None)
                else:
                    # Handle the case where the split operation didn't produce two elements
                    height_list.append(None)

                if len(height_weight_info) > 1 and height_weight_info[1].get_text()[0].isnumeric():
                    weight_list.append(int(height_weight_info[1].get_text().split(' ')[0]))
                else:
                    weight_list.append(None)
            else:
                height_list.append(None)
                weight_list.append(None)
        else: 
            print("connection failed. Please retry function")
                
    df['height'] = height_list
    df['weight'] = weight_list
    df['position'] = position_list
    return df

def get_yearly_player_stats():
    all_stats = pd.DataFrame()
    years = [2012,2013,2014,2015,2016,2017,2018,2019,2021,2022,2023,2024]
    for i in years:
        stats = playerstats.PlayerStats(i, 'game', 'all')
        df = stats.fetch_table()
        df['year'] = i
        all_stats = pd.concat([all_stats, df])

    return all_stats


def get_play_by_play_data(year: int):

    games = SeasonSchedule(year).get_schedule()
    games = games['gameID']

    columns = ['gameID', 'team', 'point', 'event_number', 'event_type', 'thrower_id', 'thrower_fname',
           'thrower_lname', 'reciever_id', 'reciever_fname', 'reciever_lname', 'x', 'y', 'lineup']

    event_id_dict= {3:'pull', 21: 'opponent score', 20: 'complete pass', 22: 'score', 9 :'throwaway caused',
               5:'block', 8: 'throwaway' , 15: 'timeout start', 40 :'timeout new lineup', 53: 'timeout end',
               23: 'end of first quarter', 24: 'end of second quarter', 25: 'end of third quarter', 43: 'injury' ,  
               14: 'other team timeout start', 41: 'other team timeout new lineup', 50: 'Game Start', 19: 'Dropped Pass',
               43: 'Injury', 42: 'Injury', 4: 'out of bounds pull'}
    
    all_game_data = pd.DataFrame(columns = columns)

    count = 0

    for game_id in tqdm(games):
        if 'allstar' in game_id:
            continue
        count+=1
        
        game = GameStats(game_id)
        events = game.get_events()
        lineup_data = game.get_lineup_by_points()
        
        players = game.get_players_metadata()
        home_team = str(game.get_game_metadata()['team_season_home.team.name']).lower().split('\n')[0].split()[-1]
        away_team = str(game.get_game_metadata()['team_season_away.team.name']).lower().split('\n')[0].split()[-1]

        
        gamedict = {'homeEvents' : home_team, 'awayEvents': away_team}


        for location, team in gamedict.items():
            
            if team == 'chill':
                team = 'windchill'
            
            for p, l in zip(events[location][1:], lineup_data):
                point = p['point']

                thrower = 0
                lastE = 0
                
                
                if l['offense'] == team:
                    lineup = [players[players['id'] == i]['player.ext_player_id'] for i in l['lineup_offense']]
                elif l['defense'] == team:
                    lineup = [players[players['id'] == i]['player.ext_player_id'] for i in l['lineup_defense']]
                else:
                    print(l, gamedict)

                

                for i in range(len(p['events'])):

                    data_dict = dict(zip(columns, np.zeros(11)))
                    
                    if 'l' in p['events'][i].keys():
                        lineup = [players[players['id'] == i]['player.ext_player_id'] for i in p['events'][i]['l']]


                    data_dict['gameID'] = game.game_id
                    data_dict['team'] = location
                    data_dict['point'] = point
                    data_dict['lineup'] = lineup
                    
                    data_dict['event_number'] = p['events'][i]['t']
                    
                    if p['events'][i]['t'] in event_id_dict.keys():
                        data_dict['event_type'] = event_id_dict[p['events'][i]['t']]
                    else:
                        data_dict['event_type'] = 'unknown'

                    if 'x' in p['events'][i].keys():
                        data_dict['x'] = p['events'][i]['x']
                        data_dict['y'] = p['events'][i]['y']

                    if (p['events'][i]['t'] in [3,5,19]) and 'r' in p['events'][i].keys():
                        thrower_info = players[players['id'] == p['events'][i]['r']][['player.ext_player_id', 'player.first_name', 'player.last_name']]
                        if thrower_info.shape[0]:

                            data_dict['thrower_id'] = thrower_info.iloc[0]['player.ext_player_id']
                            data_dict['thrower_fname'] = thrower_info.iloc[0]['player.first_name']
                            data_dict['thrower_lname'] = thrower_info.iloc[0]['player.last_name']


                    if lastE == 20 and p['events'][i]['t'] == 20 or p['events'][i]['t'] == 22:

                        thrower_info = players[players['id'] == thrower][['player.ext_player_id', 'player.first_name', 'player.last_name']]

                        if thrower_info.shape[0]:

                            data_dict['thrower_id'] = thrower_info.iloc[0]['player.ext_player_id']
                            data_dict['thrower_fname'] = thrower_info.iloc[0]['player.first_name']
                            data_dict['thrower_lname'] = thrower_info.iloc[0]['player.last_name']
                            
                        if 'r'in p['events'][i].keys():
                            
                            reciever_info = players[players['id'] == p['events'][i]['r']][['player.ext_player_id', 'player.first_name', 'player.last_name']]


                            if reciever_info.shape[0]:

                                data_dict['reciever_id'] = reciever_info.iloc[0]['player.ext_player_id']
                                data_dict['reciever_fname'] = reciever_info.iloc[0]['player.first_name']
                                data_dict['reciever_lname'] = reciever_info.iloc[0]['player.last_name']


                            thrower = p['events'][i]['r']
                            
                        else:
                            thrower = 0

                    elif p['events'][i]['t'] == 20:
                        thrower = p['events'][i]['r']
                        data_dict['event_type'] = 'gain possession'


                    lastE = p['events'][i]['t']

                    all_game_data = pd.concat([all_game_data, pd.DataFrame([data_dict])], ignore_index=True)
    return all_game_data              
                    


In [4]:
get_player_career_per_game_stats("player_career_per_game_stats_aug_24.csv")

Downloaded csv file at player_career_per_game_stats_aug_24.csv


In [5]:
df = pd.read_csv("player_career_per_game_stats_aug_24.csv")
df.shape

(2101, 27)

In [19]:
df = scrape_data(df)

100
200
300
400
500
600
700
800
900
1000
1100
1200
1300
1400
1500
1600
1700
1800
1900
2000
2100


In [32]:
df1 = get_yearly_player_stats()
df1.head()

Unnamed: 0,playerID,name,gamesPlayed,scores,assists,goals,plusMinus,completions,completionPercentage,hockeyAssists,...,oPointsPlayed,dPointsPlayed,minutesPlayed,possessions,oEfficiency,yardsTotal,yardsThrown,yardsReceived,hucksCompleted,huckPercentage
0,jkorber,John Korber,12,7.25,1.58,5.67,7.17,2.92,87.5,0.0,...,,,,,,,,,,
1,jhelton,Jonathan Helton,16,6.13,4.0,2.13,5.5,20.44,91.6,0.0,...,,,,,,,,,,
2,smurray,Sean Murray,16,4.38,1.69,2.69,4.88,5.81,86.92,0.0,...,,,,,,,,,,
3,cbrock,Cameron Brock,16,5.31,1.38,3.94,4.56,7.13,92.68,0.0,...,,,,,,,,,,
4,rdulabon,Rob Dulabon,15,5.2,1.6,3.6,4.47,13.8,94.52,0.0,...,,,,,,,,,,


In [35]:
df1.to_csv('player_yearly_stats_aug_24.csv')

In [30]:
df['position'].count()

585

In [29]:
df.to_csv('player_career_per_game_stats_positions_aug_24.csv')

In [12]:
play_by_play = get_play_by_play_data(2024)

https://www.backend.audlstats.com/web-api/games?limit=10&years=2024&page=1


  dfs = dfs.append(df)


https://www.backend.audlstats.com/web-api/games?limit=10&years=2024&page=2


  dfs = dfs.append(df)


https://www.backend.audlstats.com/web-api/games?limit=10&years=2024&page=3


  dfs = dfs.append(df)


https://www.backend.audlstats.com/web-api/games?limit=10&years=2024&page=4


  dfs = dfs.append(df)


https://www.backend.audlstats.com/web-api/games?limit=10&years=2024&page=5


  dfs = dfs.append(df)


https://www.backend.audlstats.com/web-api/games?limit=10&years=2024&page=6


  dfs = dfs.append(df)


https://www.backend.audlstats.com/web-api/games?limit=10&years=2024&page=7


  dfs = dfs.append(df)


https://www.backend.audlstats.com/web-api/games?limit=10&years=2024&page=8


  dfs = dfs.append(df)


https://www.backend.audlstats.com/web-api/games?limit=10&years=2024&page=9


  dfs = dfs.append(df)


https://www.backend.audlstats.com/web-api/games?limit=10&years=2024&page=10


  dfs = dfs.append(df)


https://www.backend.audlstats.com/web-api/games?limit=10&years=2024&page=11


  dfs = dfs.append(df)


https://www.backend.audlstats.com/web-api/games?limit=10&years=2024&page=12


  dfs = dfs.append(df)


https://www.backend.audlstats.com/web-api/games?limit=10&years=2024&page=13


  dfs = dfs.append(df)


https://www.backend.audlstats.com/web-api/games?limit=10&years=2024&page=14


  dfs = dfs.append(df)


https://www.backend.audlstats.com/web-api/games?limit=10&years=2024&page=15


  dfs = dfs.append(df)


https://www.backend.audlstats.com/web-api/games?limit=10&years=2024&page=16


  dfs = dfs.append(df)


https://www.backend.audlstats.com/web-api/games?limit=10&years=2024&page=17


  dfs = dfs.append(df)
100%|██████████| 155/155 [24:29<00:00,  9.48s/it]


In [14]:
play_by_play.head(50)

Unnamed: 0,gameID,team,point,event_number,event_type,thrower_id,thrower_fname,thrower_lname,reciever_id,reciever_fname,reciever_lname,x,y,lineup
0,2024-08-24-CAR-MIN,homeEvents,1,3,pull,clacy,Cameron,Lacy,0.0,0.0,0.0,13.38,100.45,"[[mdehlin], [pkrenik], [bmatis], [tvandemoo], ..."
1,2024-08-24-CAR-MIN,homeEvents,1,9,throwaway caused,0.0,0.0,0.0,0.0,0.0,0.0,,,"[[mdehlin], [pkrenik], [bmatis], [tvandemoo], ..."
2,2024-08-24-CAR-MIN,homeEvents,1,20,gain possession,0.0,0.0,0.0,0.0,0.0,0.0,10.47,93.35,"[[mdehlin], [pkrenik], [bmatis], [tvandemoo], ..."
3,2024-08-24-CAR-MIN,homeEvents,1,22,score,tvandemoo,Tristan,Van de Moortele,nhanson,Noah,Hanson,12.09,104.25,"[[mdehlin], [pkrenik], [bmatis], [tvandemoo], ..."
4,2024-08-24-CAR-MIN,homeEvents,2,3,pull,clacy,Cameron,Lacy,0.0,0.0,0.0,2.03,119.03,"[[mrehder], [ddeclerck], [tvandemoo], [bvohnou..."
5,2024-08-24-CAR-MIN,homeEvents,2,9,throwaway caused,0.0,0.0,0.0,0.0,0.0,0.0,,,"[[mrehder], [ddeclerck], [tvandemoo], [bvohnou..."
6,2024-08-24-CAR-MIN,homeEvents,2,20,gain possession,0.0,0.0,0.0,0.0,0.0,0.0,4.54,99.74,"[[mrehder], [ddeclerck], [tvandemoo], [bvohnou..."
7,2024-08-24-CAR-MIN,homeEvents,2,22,score,tvandemoo,Tristan,Van de Moortele,pkrenik,Paul,Krenik,8.73,103.61,"[[mrehder], [ddeclerck], [tvandemoo], [bvohnou..."
8,2024-08-24-CAR-MIN,homeEvents,3,3,pull,kjohnson1,Kristian,Johnson,0.0,0.0,0.0,-5.78,88.9,"[[bmatis], [nhanson], [tshope], [ajirele1], [t..."
9,2024-08-24-CAR-MIN,homeEvents,3,9,throwaway caused,0.0,0.0,0.0,0.0,0.0,0.0,,,"[[bmatis], [nhanson], [tshope], [ajirele1], [t..."


In [15]:
play_by_play.to_csv('2024playdata.csv')