In [36]:
import soccerdata as sd
import pandas as pd
from tqdm import tqdm
pd.set_option('display.max_columns', None)
import logging

In [2]:
import boto3
from dotenv import load_dotenv
import os
from io import StringIO

load_dotenv()
aws_access_key = os.getenv('AWS_ACCESS_KEY')
aws_secret_access = os.getenv('AWS_SECRET_ACCESS')
aws_region = os.getenv('AWS_REGION')

s3 = boto3.client('s3',
                aws_access_key_id=aws_access_key,
                aws_secret_access_key=aws_secret_access,
                region_name=aws_region)

bucket = 'footballbets'

In [3]:
league = "ENG-Premier League"
season = 2324

In [44]:
ws = sd.WhoScored(leagues=league, 
                  seasons=season)

fbref = sd.FBref(leagues=league, 
                 seasons=season)


mh = sd.MatchHistory(leagues=league, 
                     seasons=season)



In [45]:
epl_schedule = ws.read_schedule()
fbref_schedule = fbref.read_schedule()

In [49]:
master_schedule = epl_schedule.merge(fbref_schedule[['game_id']], left_index=True, right_index=True, how='inner')

In [57]:
fbref_schedule.home_team.unique()

<StringArray>
[        'Burnley',         'Arsenal',     'Bournemouth',        'Brighton',
         'Everton',   'Newcastle Utd',   'Sheffield Utd',       'Brentford',
         'Chelsea',  'Manchester Utd', "Nott'ham Forest",          'Fulham',
       'Liverpool', 'Manchester City',       'Tottenham',          'Wolves',
     'Aston Villa',        'West Ham',  'Crystal Palace',      'Luton Town']
Length: 20, dtype: string

In [56]:
epl_schedule.home_team.unique()

array(['Burnley', 'Arsenal', 'Bournemouth', 'Brighton', 'Everton',
       'Newcastle', 'Sheffield United', 'Brentford', 'Chelsea',
       'Manchester United', 'Nottingham Forest', 'Fulham', 'Liverpool',
       'Manchester City', 'Tottenham', 'Wolves', 'Aston Villa',
       'West Ham', 'Crystal Palace', 'Luton'], dtype=object)

In [37]:
for id, match in epl_schedule.iloc[:2].iterrows():
    print(id[-1])
    match_file_name = id[-1].replace(' ', '_')

    event_data = ws.read_events(match_id=int(match.game_id),
                                output_fmt="spadl")
    
    logging.info("SPADL Data Loaded")
    
    spadl_buffer = StringIO()

    event_data.to_csv(spadl_buffer, index=False)
    s3.put_object(Bucket=bucket, Key=f'{league}/{season}/events/{match_file_name}_SPADL.csv', Body=spadl_buffer.getvalue())

    logging.info("SPADL Data Into S3")

    

    logging.info("Player Data Loaded")

    missing_players = ws.read_missing_players(match_id=int(match.game_id))

    logging.info("Missing Player Data Loaded")

    players_buffer = StringIO()
    missing_players.to_csv(players_buffer, index=False)
    s3.put_object(Bucket=bucket, Key=f'{league}/{season}/mising_players/{match_file_name}_Missing_Players.csv', Body=players_buffer.getvalue())

    logging.info("Missing Player Data Into S3")

    


    

2023-08-11_Burnley-Manchester_City


  ).bfill()


2023-08-12_Arsenal-Nottingham_Forest


  ).bfill()


In [40]:
odds = mh.read_games()

In [59]:
odds.home_team.unique()

array(['Burnley', 'Arsenal', 'Bournemouth', 'Brighton', 'Everton',
       'Newcastle', 'Sheffield United', 'Brentford', 'Chelsea',
       'Man United', "Nott'm Forest", 'Fulham', 'Liverpool', 'Man City',
       'Tottenham', 'Wolves', 'Aston Villa', 'West Ham', 'Crystal Palace',
       'Luton'], dtype=object)

In [6]:
test_match = ws.read_events(match_id=int(epl_schedule.game_id[0]),
                output_fmt="spadl")

  test_match = ws.read_events(match_id=int(epl_schedule.game_id[0]),


  ).bfill()


In [12]:
missing_players = ws.read_missing_players(match_id=int(epl_schedule.game_id[0]))

In [13]:
missing_players.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,game_id,player_id,reason,status
league,season,game,team,player,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
ENG-Premier League,2324,2023-08-11 Burnley-Manchester City,Burnley,Michael Obafemi,1729191,351355,injured,Out
ENG-Premier League,2324,2023-08-11 Burnley-Manchester City,Manchester City,Nathan AkÃ©,1729191,122945,injured doubtful,Doubtful


In [14]:
test_match.head()

Unnamed: 0,game_id,original_event_id,period_id,time_seconds,team_id,player_id,start_x,end_x,start_y,end_y,type_id,result_id,bodypart_id,action_id,player,team
0,1729191,2575436797.0,1,0.0,184,371447.0,52.5,28.56,34.0,40.732,0,1,0,0,Lyle Foster,Burnley
1,1729191,2575436867.0,1,2.0,184,353412.0,30.135,68.355,40.324,20.672,0,0,0,1,Jordan Beyer,Burnley
2,1729191,2575436877.0,1,6.0,167,122945.0,79.905,62.265,15.164,3.196,0,0,1,2,Nathan Aké,Man City
3,1729191,2575436913.0,1,8.0,184,135865.0,57.75,43.05,8.5,12.036,0,1,0,3,Connor Roberts,Burnley
4,1729191,,1,10.5,184,362826.0,43.05,41.79,12.036,15.232,21,1,0,4,Dara O'Shea,Burnley


In [9]:
type(int(epl_schedule.game_id[0]))

  type(int(epl_schedule.game_id[0]))


int

In [25]:
lamb_func = lambda x: x.replace(' ', '_')
new_level_values = epl_schedule.index.get_level_values(2).map(lamb_func)
epl_schedule.index = epl_schedule.index.set_levels(new_level_values, level=2)