In [1]:
import boto3
from dotenv import load_dotenv
import os
import warnings
from io import StringIO
import pandas as pd
from tqdm import tqdm
import numpy as np
import io
import requests
from datetime import datetime


import sys
sys.path.append('..')
tqdm.pandas()
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)
warnings.filterwarnings('ignore')

load_dotenv()
aws_access_key = os.getenv('AWS_ACCESS_KEY')
aws_secret_access = os.getenv('AWS_SECRET_ACCESS')
aws_region = os.getenv('AWS_REGION')

s3 = boto3.client('s3',
                aws_access_key_id=aws_access_key,
                aws_secret_access_key=aws_secret_access,
                region_name=aws_region)

bucket = 'footballbets'
league = "ENG-Premier League"
season = 2223

In [2]:
current_directory = os.getcwd()
parent_directory = os.path.dirname(os.path.dirname(current_directory))
sys.path.append(parent_directory)
import _config
TEAMNAME_REPLACEMENTS = _config.TEAMNAME_REPLACEMENTS

## Rest

In [3]:
scheduler = s3.get_object(Bucket=bucket, Key=f'ENG-Premier League/2223/schedule.csv')
schedule = pd.read_csv(StringIO(scheduler['Body'].read().decode('utf-8')))

In [86]:
season_id = f'20{str(season)[:2]}'
europe = pd.DataFrame()

for league in ['europa', 'champions']:

    headers = {
        'Cookies' : '_ga_DTCKHDGKYF=GS1.1.1722868866.6.1.1722869089.0.0.0; _ga=GA1.2.1274569263.1721488882; ARRAffinity=3587c3b28f299ba120e848a3ba122803c40823fd58ac197de099244cf70e116d; ARRAffinitySameSite=3587c3b28f299ba120e848a3ba122803c40823fd58ac197de099244cf70e116d; _gid=GA1.2.1211098860.1722868867; Timezone=Eastern Standard Time',
        'Referer' : f'https://fixturedownload.com/download/csv/{league}-league-{season_id}',
        'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:128.0) Gecko/20100101 Firefox/128.0'
    }


    csv = requests.get(f"https://fixturedownload.com/download/{league}-league-{season_id}-EasternStandardTime.csv", headers=headers)
    temp = pd.read_csv(io.StringIO(csv.text))
    temp['league'] = 'Europa League' if league == 'europa' else 'Champions League'
    europe = pd.concat([europe, temp])

In [90]:
team_cols = ['Home Team', 'Away Team']
europe[team_cols] = europe[team_cols].replace(TEAMNAME_REPLACEMENTS)
pl_teams = list(schedule.home_team.unique())
europe = europe[(europe['Home Team'].isin(pl_teams)) | (europe['Away Team'].isin(pl_teams))]

europe['date'] = europe.Date.apply(lambda x: str(x).split(' ')[0])
europe['date'] = europe.date.apply(lambda x: datetime.strptime(x, "%d/%m/%Y").date())
europe['time'] = europe.Date.apply(lambda x: str(x).split(' ')[1])
europe['time'] = europe.time.apply(lambda x: datetime.strptime(x, "%H:%M").time())

In [91]:
europe['season'] = 2223

europe['game'] = europe.apply(lambda x: f"{x.date} {x['Home Team']}-{x['Away Team']}", axis=1)
europe['start_time'] = europe.apply(lambda x: f"{x.date}T{x.time}", axis=1)
europe = europe.rename(columns={'Home Team':'home_team', 'Away Team':'away_team'})

cols_to_keep = ['league', 'season', 'game', 'start_time', 'home_team', 'away_team']
nul_cols = schedule.columns.difference(cols_to_keep)

europe = europe.drop(europe.columns.difference(cols_to_keep), axis=1)
europe[nul_cols] = np.nan

In [92]:
final_sched = pd.concat([schedule, europe], ignore_index=True).sort_values('start_time')

In [None]:
final_sched[final_sched['league'] != 'ENG-Premier League']

## Distance

In [4]:
from geopy.distance import geodesic
STADIUM_LOCATIONS = _config.STADIUM_LOCATIONS

In [5]:
STADIUM_LOCATIONS

{'Tottenham': [51.60324, -0.06781],
 'Newcastle': [54.977821, -1.62936],
 'Sheffield United': [53.36983, -1.47673],
 'Manchester United': [53.461658, -2.28827],
 'Nottingham Forest': [52.93823, -1.13326],
 'Manchester City': [53.484206, -2.202812],
 'Luton': [51.884014, -0.43236],
 'Wolves': [52.590195, -2.131226],
 'Leicester': [52.620622, -1.143047],
 'Leeds': [53.777714, -1.573104],
 'Bournemouth': [50.735313, -1.839234],
 'Arsenal': [51.554291, -0.108475],
 'Aston Villa': [52.508486, -1.884946],
 'Brentford': [51.490339, -0.288307],
 'Brighton': [50.860916, -0.08428],
 'Chelsea': [51.481124, -0.190297],
 'Everton': [53.439197, -2.967017],
 'Fulham': [51.47452, -0.22061],
 'Ipswich': [52.054732, 1.144614],
 'Liverpool': [53.431267, -2.961584],
 'West Ham': [51.538636, -0.016403],
 'Burnley': [53.78847, -2.230325],
 'Huddersfield': [53.654643, -1.76894],
 'Southampton': [50.90653, -1.390823],
 'Swansea': [51.642156, -3.935119],
 'West Brom': [52.509003, -1.963299],
 'Stoke City': [52

In [7]:
schedule['distance'] = schedule.apply(lambda x: geodesic(tuple(STADIUM_LOCATIONS[x.home_team]), tuple(STADIUM_LOCATIONS[x.away_team])).kilometers, axis=1)


In [10]:
schedule.sort_values('distance', ascending=True)

Unnamed: 0.1,Unnamed: 0,index,league,season,game,stage_id,ws_game_id,status,start_time,home_team_id,home_team,home_yellow_cards,home_red_cards,away_team_id,away_team,away_yellow_cards,away_red_cards,has_incidents_summary,has_preview,score_changed_at,elapsed,last_scorer,is_top_match,home_team_country_code,away_team_country_code,comment_count,is_lineup_confirmed,is_stream_available,match_is_opta,home_team_country_name,away_team_country_name,date,home_score,away_score,incidents,bets,aggregate_winner_field,winner_field,period,extra_result_field,home_extratime_score,away_extratime_score,home_penalty_score,away_penalty_score,started_at_utc,first_half_ended_at_utc,second_half_started_at_utc,stage,fbref_game_id,und_game_id,distance
53,53,53,ENG-Premier League,2223,2022-09-03 Everton-Liverpool,20934,1640728,6,2022-09-03T12:30:00,31,Everton,2,0,26,Liverpool,2,0,False,True,2022-09-03 13:59:08Z,FT,0.0,True,gb-eng,gb-eng,67,True,False,False,England,England,2022-09-03 11:30:00+00:00,0,0,[],,,,7,,,,,,2022-09-03T11:30:09Z,2022-09-03T12:17:12Z,2022-09-03T12:33:10Z,,073227b6,18252,0.953571
219,219,219,ENG-Premier League,2223,2023-02-13 Liverpool-Everton,20934,1640928,6,2023-02-13T20:00:00,26,Liverpool,1,0,31,Everton,3,0,True,True,2023-02-13 21:06:48Z,FT,0.0,True,gb-eng,gb-eng,12,True,False,False,England,England,2023-02-13 20:00:00+00:00,2,0,"[{'minute': '36', 'type': 1, 'subType': 1, 'pl...",,,0.0,7,,,,,,2023-02-13T20:00:25Z,2023-02-13T20:47:21Z,2023-02-13T21:02:48Z,,a895ec23,18428,0.953571
176,176,176,ENG-Premier League,2223,2023-01-12 Fulham-Chelsea,20934,1640737,6,2023-01-12T20:00:00,170,Fulham,4,0,15,Chelsea,3,1,True,True,2023-01-12 21:32:19Z,FT,0.0,True,gb-eng,gb-eng,34,True,False,False,England,England,2023-01-12 20:00:00+00:00,2,1,"[{'minute': '25', 'type': 1, 'subType': 1, 'pl...",,,0.0,7,,,,,,2023-01-12T20:00:17Z,2023-01-12T20:48:13Z,2023-01-12T21:04:42Z,,2902a42d,18262,2.230463
199,199,199,ENG-Premier League,2223,2023-02-03 Chelsea-Fulham,20934,1640902,6,2023-02-03T20:00:00,15,Chelsea,1,0,170,Fulham,4,0,False,True,,FT,,True,gb-eng,gb-eng,42,True,False,False,England,England,2023-02-03 20:00:00+00:00,0,0,[],,,,3,,,,,,2023-02-03T20:00:33Z,2023-02-03T20:46:39Z,2023-02-03T21:02:20Z,,c3cf244b,18415,2.230463
23,23,23,ENG-Premier League,2223,2022-08-20 Fulham-Brentford,20934,1640697,6,2022-08-20T15:00:00,170,Fulham,3,0,189,Brentford,2,0,True,True,2022-08-20 16:48:33Z,FT,0.0,True,gb-eng,gb-eng,10,True,False,False,England,England,2022-08-20 14:00:00+00:00,3,2,"[{'minute': '1', 'type': 1, 'subType': 1, 'pla...",,,0.0,3,,,,,,2022-08-20T14:00:41Z,2022-08-20T14:47:43Z,2022-08-20T15:03:29Z,,a107c037,18225,5.021268
250,250,250,ENG-Premier League,2223,2023-03-06 Brentford-Fulham,20934,1640879,6,2023-03-06T20:00:00,189,Brentford,0,0,170,Fulham,3,0,True,True,2023-03-06 22:03:13Z,FT,1.0,True,gb-eng,gb-eng,5,True,False,False,England,England,2023-03-06 20:00:00+00:00,3,2,"[{'minute': '6', 'type': 1, 'subType': 1, 'pla...",,,0.0,7,,,,,,2023-03-06T20:01:13Z,2023-03-06T20:51:23Z,2023-03-06T21:07:38Z,,3dce3a24,18454,5.021268
67,67,67,ENG-Premier League,2223,2022-10-01 Arsenal-Tottenham,20934,1640754,6,2022-10-01T12:30:00,13,Arsenal,2,0,30,Tottenham,1,1,True,True,2022-10-01 13:54:06Z,FT,0.0,True,gb-eng,gb-eng,28,True,False,False,England,England,2022-10-01 11:30:00+00:00,3,1,"[{'minute': '20', 'type': 1, 'subType': 1, 'pl...",,,0.0,7,,,,,,2022-10-01T11:30:11Z,2022-10-01T12:17:22Z,2022-10-01T12:32:23Z,,aefe3b90,18282,6.132343
186,186,186,ENG-Premier League,2223,2023-01-15 Tottenham-Arsenal,20934,1640872,6,2023-01-15T16:30:00,30,Tottenham,4,0,13,Arsenal,2,0,True,True,2023-01-15 17:06:10Z,FT,1.0,True,gb-eng,gb-eng,44,True,False,False,England,England,2023-01-15 16:30:00+00:00,0,2,"[{'minute': '14', 'type': 1, 'subType': 3, 'pl...",,,1.0,7,,,,,,2023-01-15T16:30:20Z,2023-01-15T17:18:20Z,2023-01-15T17:34:27Z,,4fc8b7b2,18400,6.132343
75,75,75,ENG-Premier League,2223,2022-10-02 Manchester City-Manchester United,20934,1640761,6,2022-10-02T14:00:00,167,Manchester City,0,0,32,Manchester United,3,0,True,True,2022-10-02 15:48:38Z,FT,1.0,True,gb-eng,gb-eng,146,True,False,False,England,England,2022-10-02 13:00:00+00:00,6,3,"[{'minute': '8', 'type': 1, 'subType': 1, 'pla...",,,0.0,7,,,,,,2022-10-02T13:00:50Z,2022-10-02T13:47:52Z,2022-10-02T14:03:20Z,,886e6108,18289,6.204653
181,181,181,ENG-Premier League,2223,2023-01-14 Manchester United-Manchester City,20934,1640869,6,2023-01-14T12:30:00,32,Manchester United,3,0,167,Manchester City,0,0,True,True,2023-01-14 14:11:08Z,FT,0.0,True,gb-eng,gb-eng,83,True,False,False,England,England,2023-01-14 12:30:00+00:00,2,1,"[{'minute': '60', 'type': 1, 'subType': 1, 'pl...",,,0.0,7,,,,,,2023-01-14T12:31:01Z,2023-01-14T13:16:50Z,2023-01-14T13:33:35Z,,f0fd7541,18397,6.204653
