In [None]:
import soccerdata as sd
import pandas as pd
from tqdm import tqdm
pd.set_option('display.max_columns', None)
import logging

In [None]:
import boto3
from dotenv import load_dotenv
import os
from io import StringIO

load_dotenv()
aws_access_key = os.getenv('AWS_ACCESS_KEY')
aws_secret_access = os.getenv('AWS_SECRET_ACCESS')
aws_region = os.getenv('AWS_REGION')

s3 = boto3.client('s3',
                aws_access_key_id=aws_access_key,
                aws_secret_access_key=aws_secret_access,
                region_name=aws_region)

bucket = 'footballbets'

In [None]:
league = "ENG-Premier League"
season = 2324

In [None]:
ws = sd.WhoScored(leagues=league, 
                  seasons=season)

fbref = sd.FBref(leagues=league, 
                 seasons=season)


mh = sd.MatchHistory(leagues=league, 
                     seasons=season)



In [None]:
epl_schedule = ws.read_schedule()
fbref_schedule = fbref.read_schedule()

In [None]:
master_schedule = epl_schedule.merge(fbref_schedule[['game_id']], left_index=True, right_index=True, how='inner')

In [None]:
fbref_schedule.home_team.unique()

In [None]:
epl_schedule.home_team.unique()

In [None]:
for id, match in epl_schedule.iloc[:2].iterrows():
    print(id[-1])
    match_file_name = id[-1].replace(' ', '_')

    event_data = ws.read_events(match_id=int(match.game_id),
                                output_fmt="spadl")
    
    logging.info("SPADL Data Loaded")
    
    spadl_buffer = StringIO()

    event_data.to_csv(spadl_buffer, index=False)
    s3.put_object(Bucket=bucket, Key=f'{league}/{season}/events/{match_file_name}_SPADL.csv', Body=spadl_buffer.getvalue())

    logging.info("SPADL Data Into S3")

    

    logging.info("Player Data Loaded")

    missing_players = ws.read_missing_players(match_id=int(match.game_id))

    logging.info("Missing Player Data Loaded")

    players_buffer = StringIO()
    missing_players.to_csv(players_buffer, index=False)
    s3.put_object(Bucket=bucket, Key=f'{league}/{season}/mising_players/{match_file_name}_Missing_Players.csv', Body=players_buffer.getvalue())

    logging.info("Missing Player Data Into S3")

    


    

In [None]:
odds = mh.read_games()

In [None]:
odds.home_team.unique()

In [None]:
test_match = ws.read_events(match_id=int(epl_schedule.game_id[0]),
                output_fmt="spadl")

In [None]:
missing_players = ws.read_missing_players(match_id=int(epl_schedule.game_id[0]))

In [None]:
missing_players.head()

In [None]:
test_match.head()

In [None]:
type(int(epl_schedule.game_id[0]))

In [None]:
lamb_func = lambda x: x.replace(' ', '_')
new_level_values = epl_schedule.index.get_level_values(2).map(lamb_func)
epl_schedule.index = epl_schedule.index.set_levels(new_level_values, level=2)