# Sofascore Premier League

In [1]:
import requests
import json
import random
import typing
# import datetime
from datetime import datetime

In [2]:
def make_request(url: str):
    user_agent_list = [ 
	'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36', 
	'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36', 
	'Mozilla/5.0 (Macintosh; Intel Mac OS X 13_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15', 
    ]
    user_agent = None
    
    for i in range(1, 4):
        user_agent = random.choice(user_agent_list)
    
    headers = {
        'User-Agent': user_agent
    }

    return requests.get(url, headers=headers)

## Links and Its Descriptions

- 'tournament': needs tournament id, or season id.
- 'event': Needs the event id.
- 'tournament_statistics':
      - limit: response length.
      - offset: next page (e.g. 1st page 100 then second page offset is 100.)
      - order: Order by the statistic.
      - accumulation: different type of statistic.
      - group: statistic by phases (defence, attack, all).

https://www.sofascore.com/api/v1/unique-tournament/17/season/52186/statistics?limit=100&offset=600&order=-rating&accumulation=per90&group=defence

In [3]:
links = {
    'tournament': 'https://www.sofascore.com/api/v1/unique-tournament/{}',
    'tournament_seasons': 'https://www.sofascore.com/api/v1/unique-tournament/{}/seasons',
    'tournament_season_statistics': 'https://www.sofascore.com/api/v1/unique-tournament/{}/season/{}/statistics?limit={}&offset={}&order=-rating&accumulation={}&group={}',
    'tournament_events': 'https://www.sofascore.com/api/v1/unique-tournament/{}/season/{}/team-events/total',
    'team_performance_data': 'https://www.sofascore.com/api/v1/unique-tournament/17/season/52186/team/42/team-performance-graph-data',
    'event_statistic': 'https://www.sofascore.com/api/v1/event/{}/statistics',
    'event_shotmaps': 'https://www.sofascore.com/api/v1/event/{}/shotmap',
    'event_lineups': 'https://www.sofascore.com/api/v1/event/{}/lineups',
}

In [4]:
premier_league_id = 17

In [5]:
print(links['tournament'].format(premier_league_id))
print(links['tournament_seasons'].format(premier_league_id))
print(links['tournament_events'].format(premier_league_id, 52186))

https://www.sofascore.com/api/v1/unique-tournament/17
https://www.sofascore.com/api/v1/unique-tournament/17/seasons
https://www.sofascore.com/api/v1/unique-tournament/17/season/52186/team-events/total


## Extracting Tournament Details.

In [6]:
req = make_request(links['tournament'].format(premier_league_id))

In [7]:
data = req.json()['uniqueTournament']

In [8]:
'https://www.sofascore.com/api/v1/unique-tournament/17/season/52186/statistics?limit=100&offset=600&order=-rating&accumulation=per90&group=defence'

'https://www.sofascore.com/api/v1/unique-tournament/17/season/52186/statistics?limit=100&offset=600&order=-rating&accumulation=per90&group=defence'

In [9]:
tournament_data = {
    'name': data['name'],
    'slug': data['slug'],
    'sofascore_id': data['id'],
    'category': data['category']['name'],
    'category_slug': data['category']['slug'],
    'category_sofascore_id': data['category']['id'],
    # 'hasStandingGroups': data['hasStandingsGroups'],
    'hasGroups': data['hasGroups'],
    'hasPlayoffSeries': data['hasPlayoffSeries'],
    'tournament_start_timestamp': datetime.fromtimestamp(data['startDateTimestamp']),
    'tournament_end_timestamp': datetime.fromtimestamp(data['endDateTimestamp']),
}

In [10]:
tournament_data

{'name': 'Premier League',
 'slug': 'premier-league',
 'sofascore_id': 17,
 'category': 'England',
 'category_slug': 'england',
 'category_sofascore_id': 1,
 'hasGroups': False,
 'hasPlayoffSeries': False,
 'tournament_start_timestamp': datetime.datetime(2024, 8, 16, 5, 30),
 'tournament_end_timestamp': datetime.datetime(2025, 5, 25, 5, 30)}

## Getting the Tournament Seasons.

In [11]:
req = make_request(links['tournament_seasons'].format(premier_league_id))

In [12]:
seasons_data = req.json()['seasons']

In [13]:
tournament_seasons = list()
for season in seasons_data:
    tmp_data = dict()
    tmp_data['name'] = season['name']
    tmp_data['year'] = season['year']
    tmp_data['sofascore_id'] = season['id']
    tournament_seasons.append(tmp_data)

In [14]:
tournament_seasons

[{'name': 'Premier League 24/25', 'year': '24/25', 'sofascore_id': 61627},
 {'name': 'Premier League 23/24', 'year': '23/24', 'sofascore_id': 52186},
 {'name': 'Premier League 22/23', 'year': '22/23', 'sofascore_id': 41886},
 {'name': 'Premier League 21/22', 'year': '21/22', 'sofascore_id': 37036},
 {'name': 'Premier League 20/21', 'year': '20/21', 'sofascore_id': 29415},
 {'name': 'Premier League 19/20', 'year': '19/20', 'sofascore_id': 23776},
 {'name': 'Premier League 18/19', 'year': '18/19', 'sofascore_id': 17359},
 {'name': 'Premier League 17/18', 'year': '17/18', 'sofascore_id': 13380},
 {'name': 'Premier League 16/17', 'year': '16/17', 'sofascore_id': 11733},
 {'name': 'Premier League 15/16', 'year': '15/16', 'sofascore_id': 10356},
 {'name': 'Premier League 14/15', 'year': '14/15', 'sofascore_id': 8186},
 {'name': 'Premier League 13/14', 'year': '13/14', 'sofascore_id': 6311},
 {'name': 'Premier League 12/13', 'year': '12/13', 'sofascore_id': 4710},
 {'name': 'Premier League 11

## Getting the Tournament Seasons.

In [15]:
req = make_request(links['tournament_events'].format(premier_league_id, 61627))

In [19]:
req.json()['tournamentTeamEvents']['1'].values()

dict_values([[{'tournament': {'name': 'Premier League', 'slug': 'premier-league', 'category': {'name': 'England', 'slug': 'england', 'sport': {'name': 'Football', 'slug': 'football', 'id': 1}, 'id': 1, 'flag': 'england', 'alpha2': 'EN'}, 'uniqueTournament': {'name': 'Premier League', 'slug': 'premier-league', 'primaryColorHex': '#3c1c5a', 'secondaryColorHex': '#f80158', 'category': {'name': 'England', 'slug': 'england', 'sport': {'name': 'Football', 'slug': 'football', 'id': 1}, 'id': 1, 'flag': 'england', 'alpha2': 'EN'}, 'userCount': 1847376, 'id': 17, 'displayInverseHomeAwayTeams': False}, 'priority': 608, 'isLive': False, 'id': 1}, 'customId': 'Tsxb', 'status': {'code': 100, 'description': 'Ended', 'type': 'finished'}, 'winnerCode': 2, 'homeTeam': {'name': 'Luton Town', 'slug': 'luton-town', 'shortName': 'Luton', 'gender': 'M', 'sport': {'name': 'Football', 'slug': 'football', 'id': 1}, 'userCount': 173567, 'nameCode': 'LUT', 'disabled': False, 'national': False, 'type': 0, 'id': 7

In [17]:
data = list(req.json()['tournamentTeamEvents']["1"].values())

In [19]:
match_data = [match for matchweek in data for match in matchweek]

In [21]:
match_list = []
for match in match_data:
    tmp_data = {}
    tmp_data['tournament'] = {
        'name': match['tournament']['uniqueTournament']['name'],
        'slug': match['tournament']['uniqueTournament']['slug'],
        'id': match['tournament']['uniqueTournament']['id'],
    }
    tmp_data['league_season'] = { 
        'year': '23/24',
        'sofascore_id': 52186
    }
    tmp_data['match_sofascore_id'] = match['id']
    tmp_data['homeTeam'] = { 
        'name': match['homeTeam']['name'],
        'slug': match['homeTeam']['slug'],
        'nameCode': match['homeTeam']['nameCode'],
        'sofascore_id': match['homeTeam']['id'],
    }
    tmp_data['status'] = {
        'description': match['status']['description'],
        'type': match['status']['type']
    }
    tmp_data['awayTeam'] = {
        'name': match['awayTeam']['name'],
        'slug': match['awayTeam']['slug'],
        'nameCode': match['awayTeam']['nameCode'],
        'sofascore_id': match['awayTeam']['id'],
    }
    tmp_data['homeScore'] = {
        'current': match['homeScore']['current'],
        'period1': match['homeScore']['period1'],
        'period2': match['homeScore']['period2'],
        'normaltime': match['homeScore']['normaltime'],
    }
    tmp_data['awayScore'] = {
        'current': match['awayScore']['current'],
        'period1': match['awayScore']['period1'],
        'period2': match['awayScore']['period2'],
        'normaltime': match['awayScore']['normaltime'],
    }
    tmp_data['match_slug'] = match['slug']
    tmp_data['startTimestamp'] = datetime.fromtimestamp(match['startTimestamp'])
    if 'endTimestamp' in match.keys():
        tmp_data['endTimeStamp'] = datetime.fromtimestamp(match['endTimestamp'])
    match_list.append(tmp_data)

In [22]:
match_list

[{'tournament': {'name': 'Premier League', 'slug': 'premier-league', 'id': 17},
  'league_season': {'year': '23/24', 'sofascore_id': 52186},
  'match_sofascore_id': 11352568,
  'homeTeam': {'name': 'Luton Town',
   'slug': 'luton-town',
   'nameCode': 'LUT',
   'sofascore_id': 72},
  'status': {'description': 'Ended', 'type': 'finished'},
  'awayTeam': {'name': 'Fulham',
   'slug': 'fulham',
   'nameCode': 'FUL',
   'sofascore_id': 43},
  'homeScore': {'current': 2, 'period1': 1, 'period2': 1, 'normaltime': 2},
  'awayScore': {'current': 4, 'period1': 2, 'period2': 2, 'normaltime': 4},
  'match_slug': 'luton-town-fulham',
  'startTimestamp': datetime.datetime(2024, 5, 19, 20, 30)},
 {'tournament': {'name': 'Premier League', 'slug': 'premier-league', 'id': 17},
  'league_season': {'year': '23/24', 'sofascore_id': 52186},
  'match_sofascore_id': 11352513,
  'homeTeam': {'name': 'West Ham United',
   'slug': 'west-ham-united',
   'nameCode': 'WHU',
   'sofascore_id': 37},
  'status': {'de

In [23]:
def datetime_serializer(obj):
    if isinstance(obj, datetime):
        return obj.isoformat()
    raise TypeError("Type not serializable")

In [7]:
team = {'tournament_id': 17, 'season_id': 61627, 'sofascore_id': 12436971, 'homeTeam': {'name': 'Manchester City', 'slug': 'manchester-city', 'nameCode': 'MCI', 'sofascore_id': 17}, 'awayTeam': {'name': 'Arsenal', 'slug': 'arsenal', 'nameCode': 'ARS', 'sofascore_id': 42}, 'home_score_current': (2,), 'home_score_period1': (1,), 'home_score_period2': (1,), 'home_score_normaltime': (2,), 'away_score_current': (2,), 'away_score_period1': (2,), 'away_score_period2': (0,), 'away_score_normaltime': (2,), 'match_slug': 'arsenal-manchester-city', 'status_code': (100,), 'status_description': ('Ended',), 'status_type': ('finished',), 'has_xg': True, 'startTimestamp': datetime.datetime(2024, 9, 22, 21, 0)}

In [11]:
if type(team['home_score_period1']) == tuple:
    print("It's tuple")
    print(team['home_score_period1'][0])
else:
    print("It's int")
    print(team['home_score_period1'])

It's tuple
1


In [13]:
from typing import Tuple

In [14]:
aa: Tuple[int, ] = (1, )

In [15]:
type(aa)

tuple