## Parameters and Imports

In [None]:
import soccerdata as sd
import pandas as pd
from tqdm import tqdm
pd.set_option('display.max_columns', None)
import logging

In [None]:
import boto3
from dotenv import load_dotenv
import os
from io import StringIO

load_dotenv()
aws_access_key = os.getenv('AWS_ACCESS_KEY')
aws_secret_access = os.getenv('AWS_SECRET_ACCESS')
aws_region = os.getenv('AWS_REGION')

s3 = boto3.client('s3',
                aws_access_key_id=aws_access_key,
                aws_secret_access_key=aws_secret_access,
                region_name=aws_region)

bucket = 'footballbets'
league = "ENG-Premier League"
season = 2223

## Join Schedules

In [None]:
ws = sd.WhoScored(leagues=league, 
                  seasons=season)

fbref = sd.FBref(leagues=league, 
                 seasons=season)


mh = sd.MatchHistory(leagues=league, 
                     seasons=season)

understat = sd.Understat(leagues=league, 
                         seasons=season,
                         no_cache=True)

In [None]:
epl_schedule = ws.read_schedule()
fbref_schedule = fbref.read_schedule()
understat_schedule = understat.read_schedule(force_cache=True)

In [None]:
master_schedule = epl_schedule.merge(fbref_schedule[['game_id']], left_index=True, right_index=True, how='inner')
master_schedule = master_schedule.merge(understat_schedule[['game_id']], left_index=True, right_index=True, how='left')

master_schedule.rename(columns={'game_id_x':'ws_game_id',
                                'game_id_y':'fbref_game_id',
                                'game_id':'und_game_id'},
                                inplace=True)

## Master Loop

In [None]:
team_match_data = understat.read_team_match_stats(force_cache=True)

logging.info("Team Data Loaded")

team_match = StringIO()
team_match_data.to_csv(team_match, index=False)
s3.put_object(Bucket=bucket, Key=f'{league}/{season}/team_match_stats/Team_Stats.csv', Body=team_match.getvalue())

odds_data = mh.read_games()

logging.info("Odds Data Loaded")

odds_match = StringIO()
odds_data.to_csv(odds_match, index=False)
s3.put_object(Bucket=bucket, Key=f'{league}/{season}/odds_data/Odds.csv', Body=odds_match.getvalue())

logging.info("Team Match Stats Into S3")

for id, match in master_schedule.iloc[:2].iterrows():
    print(id[-1])
    match_file_name = id[-1].replace(' ', '_')

    event_data = ws.read_events(match_id=int(match.ws_game_id),
                                output_fmt="spadl")
    
    logging.info("SPADL Data Loaded")
    
    spadl_buffer = StringIO()

    event_data.to_csv(spadl_buffer, index=False)
    s3.put_object(Bucket=bucket, Key=f'{league}/{season}/events/{match_file_name}_SPADL.csv', Body=spadl_buffer.getvalue())

    logging.info("SPADL Data Into S3")

    missing_players = ws.read_missing_players(match_id=int(match.ws_game_id))

    logging.info("Missing Player Data Loaded")

    players_buffer = StringIO()
    missing_players.to_csv(players_buffer, index=False)
    s3.put_object(Bucket=bucket, Key=f'{league}/{season}/missing_players/{match_file_name}_Missing_Players.csv', Body=players_buffer.getvalue())

    logging.info("Missing Player Data Into S3")

    player_match_data = understat.read_player_match_stats(match_id=int(match.und_game_id))

    logging.info("Player Data Loaded")

    players_match = StringIO()
    player_match_data.to_csv(players_match, index=False)
    s3.put_object(Bucket=bucket, Key=f'{league}/{season}/player_match_stats/{match_file_name}_Player_Stats.csv', Body=players_match.getvalue())

    logging.info("Player Data Into S3")

    lineups = fbref.read_lineup(match_id=match.fbref_game_id)

    logging.info("Lineup Data Loaded")

    lineups_match = StringIO()
    lineups.to_csv(lineups_match, index=False)
    s3.put_object(Bucket=bucket, Key=f'{league}/{season}/lineups/{match_file_name}_Lineups.csv', Body=lineups_match.getvalue())

    logging.info("Lineup Data Into S3")
