In [1]:
import bs4
import pandas as pd
import re
import requests

from typing import Any

In [2]:
res = requests.get("https://www.flashscore.mobi/match/f3eqDO5s")

In [3]:
soup = bs4.BeautifulSoup(res.text)

In [4]:
teams = [t.strip().lower() for t in soup.find("h3").text.split('-')]
assert len(teams) == 2

In [5]:
teams

['germany', 'greece']

### Datetime

In [6]:
pd.to_datetime(soup.find_all(class_="detail")[2].text, dayfirst=True)

Timestamp('2024-06-07 20:45:00')

### Summary

In [7]:
team_pattern = re.compile(r"\[(\w+)\]")
assistant_pattern = re.compile(r"\(([\w\.\s]+)\)")

In [8]:
def parse_summary(teams: tuple[str, str], summary: bs4.BeautifulSoup) -> tuple[dict[str, Any]]:
    first_team = {
        'goals': [],
        'substitutions': [],
        'yellow_cards': [],
        'red_cards': [],
    }
    second_team = {
        'goals': [],
        'substitutions': [],
        'yellow_cards': [],
        'red_cards': [],
    }

    for incident in summary.find_all(class_="incident soccer"):
        time = int(incident.find(class_="time").text.removesuffix("'"))
        text = incident.find_all(string=True, recursive=False)[-1]
        team = team_pattern.findall(text)
        assert team
        team = team[0].strip().lower()
        if teams[0].startswith(team):
            current_team = first_team
        elif teams[1].startswith(team):
            current_team = second_team
        else:
            raise RuntimeError(f"Team {team!r} not foung among {first_team!r} and {second_team!r}")

        if incident.find(class_="i-field icon ball"):
            scorer = assistant_pattern.sub('', team_pattern.sub('', text)).strip().lower()
            if assistant := assistant_pattern.findall(text):
                assistant = assistant[0].strip().lower()
            else:
                assistant = None
            current_team['goals'].append((time, scorer, assistant))
        elif incident.find(class_="i-field icon substitution"):
            sub_in = incident.find_all(string=True, recursive=False)[0].strip().lower()
            sub_out = incident.find(class_="substitution-out").text.removeprefix('(').removesuffix(')').strip().lower()
            current_team['substitutions'].append((time, sub_in, sub_out))
        elif incident.find(class_="i-field icon y-card"):
            player = team_pattern.sub('', text).strip().lower()
            current_team['yellow_cards'].append((time, player))
        elif incident.find(class_="i-field icon r-card"):
            player = team_pattern.sub('', text).strip().lower()
            current_team['red_cards'].append((time, player))

    return first_team, second_team

In [9]:
summary = soup.find(id="detail-tab-content")
incidents = parse_summary(teams, summary)

In [10]:
incidents

({'goals': [(56, 'havertz k.', 'sane l.'), (89, 'gross p. ()', None)],
  'substitutions': [(46, 'raum d.', 'mittelstadt m.'),
   (46, 'sane l.', 'wirtz f.'),
   (68, 'fullkrug n.', 'gundogan i.'),
   (68, 'henrichs b.', 'kimmich j.'),
   (68, 'gross p.', 'andrich r.'),
   (69, 'schlotterbeck n.', 'rudiger a.')],
  'yellow_cards': [],
  'red_cards': []},
 {'goals': [(33, 'masouras g.', None)],
  'substitutions': [(10, 'pavlidis v.', 'ioannidis f.'),
   (68, 'douvikas a.', 'masouras g.'),
   (68, 'bouchalakis a.', 'bakasetas a.'),
   (78, 'athanasiadis g.', 'vlachodimos o.'),
   (78, 'kourbelis d.', 'mantalos p.')],
  'yellow_cards': [(80, 'giannoulis d.')],
  'red_cards': []})