# GET Matchinfo

## Resources

https://github.com/Pitsillides91/python_2025/blob/main/8.Python_XGBoost_PlayersMarket/Player_Statistics_Analysis.ipynb

## Packages

In [1]:
# Packages
import os
import pandas as pd
from bs4 import BeautifulSoup
import pandas as pd
import re
import numpy as np
import plotly.express as px
from datetime import datetime

import requests
import time
from rapidfuzz import fuzz


## Data parser

In [1360]:

# =====================
# TIDSHANTERING
# =====================
def parse_minute(txt):
    if not txt:
        return None

    txt = txt.replace("'", "").strip()

    if "+" in txt:
        base, extra = txt.split("+")
        return int(base) + int(extra)

    return int(txt)


class MatchTeamParser:

    # =====================
    # MAIN
    # =====================
    def parse_team_info(self, html_content):

        soup = BeautifulSoup(html_content, 'html.parser')

        match_data = {
            'teams': {},
            'lineups': {},
            'substitutes': {},
            'goals': [],
            'match_length': None,
            'match_start_datetime': None   # ✅ NY
        }

        # ✅ Matchstart
        match_data["match_start_datetime"] = self.parse_match_start_datetime(soup)

        # =====================
        # Matchlängd
        # =====================
        match_data["match_length"] = self.calculate_match_length(soup)

        # =====================
        # Assists & mål
        # =====================
        assists = self.parse_assists_from_events(soup)
        goals = self.parse_goals_from_events(soup)
        match_data["goals"] = goals

        # Lookup mål
        goal_map = {}
        for g in goals:
            fid = g["scorer_fplguid"]
            if fid:
                goal_map.setdefault(fid, []).append(g["minute_int"])

        # =====================
        # Lineups
        # =====================
        team_sections = soup.find_all('section', class_='formation-list')

        for idx, section in enumerate(team_sections):
            team_key = 'home' if idx == 0 else 'away'

            team_name = section.find('h3', class_='formation-list__team')
            if team_name:
                match_data['teams'][team_key] = team_name.text.strip()

            starters, subs = [], []

            lists = section.find_all('ul', class_='formation-list__items')

            for ul in lists:
                prev = ul.find_previous_sibling('h4', class_='formation-list__section-headline')
                is_sub_list = prev and 'Ersättare' in prev.text

                for li in ul.find_all('li', class_='formation-list__item'):
                    div = li.find('div', class_='formation-list-player')
                    if not div:
                        continue

                    p = {}

                    # Nummer
                    num = div.find('span', class_='formation-list-player__number')
                    p['number'] = num.text.strip() if num else None

                    # Namn + fplguid
                    link = div.find('a', class_='formation-list-player__link')
                    if link:
                        p['name'] = link.text.strip()
                        href = link.get('href','')
                        p['fplguid'] = href.split('fplguid=')[-1] if 'fplguid=' in href else None

                    # Init
                    p.update({
                        'goals': 0,
                        'goal_minutes': [],
                        'assists': 0,
                        'assists_minutes': [],
                        'yellow_card': False,
                        'red_card': False,
                        'was_substituted': False,
                        'sub_direction': None,
                        'sub_minute': None,
                        'sub_minute_int': None
                    })

                    # EVENTS
                    events = div.find('span', class_='formation-list-player__events')
                    if events:

                        if events.find('use', {'xlink:href': lambda x: x and 'icon-card-yellow' in x}):
                            p['yellow_card'] = True

                        if events.find('use', {'xlink:href': lambda x: x and 'icon-card-red' in x}):
                            p['red_card'] = True

                        # Byten
                        sub_wrap = events.find('span', class_='formation-list-player__substitution-wrapper')
                        if sub_wrap:
                            icon = sub_wrap.find('use')
                            href = icon.get('xlink:href','') if icon else ''

                            txt = sub_wrap.find('span', class_='formation-list-player__substitution-text')
                            minute = txt.text.strip() if txt else None

                            if 'substitution-out' in href:
                                p['was_substituted'] = True
                                p['sub_direction'] = 'out'
                                p['sub_minute'] = minute
                                p['sub_minute_int'] = parse_minute(minute)

                            elif 'substitution-in' in href:
                                p['was_substituted'] = True
                                p['sub_direction'] = 'in'
                                p['sub_minute'] = minute
                                p['sub_minute_int'] = parse_minute(minute)

                    # Koppla assists
                    if p.get('fplguid') in assists:
                        p['assists'] = len(assists[p['fplguid']])
                        p['assists_minutes'] = assists[p['fplguid']]

                    # Koppla mål
                    if p.get('fplguid') in goal_map:
                        p['goals'] = len(goal_map[p['fplguid']])
                        p['goal_minutes'] = goal_map[p['fplguid']]

                    subs.append(p) if is_sub_list else starters.append(p)

            match_data['lineups'][team_key] = starters
            match_data['substitutes'][team_key] = subs

        return match_data

    # =====================
    # DataFrame
    # =====================
    def create_lineup_dataframe(self, match_data):

        rows = []

        for side in ['home','away']:
            team = match_data['teams'].get(side)

            for p in match_data['lineups'][side]:
                rows.append(self._row(team,p,"Starting"))

            for p in match_data['substitutes'][side]:
                rows.append(self._row(team,p,"Sub"))

        df = pd.DataFrame(rows)

        # ✅ Lägg till matchinfo
        df["match_length"] = match_data.get("match_length")
        df["match_start_datetime"] = match_data.get("match_start_datetime")

        return df

    def _row(self, team, p, role):
        return {
            'team': team,
            'name': p.get('name'),
            'fplguid': p.get('fplguid'),
            'player_type': role,
            'was_substituted': p.get('was_substituted'),
            'sub_direction': p.get('sub_direction'),
            'sub_minute': p.get('sub_minute'),
            'sub_minute_int': p.get('sub_minute_int'),
            'goals': p.get('goals'),
            'goal_minutes': p.get('goal_minutes'),
            'assists': p.get('assists'),
            'assists_minutes': p.get('assists_minutes'),
            'yellow_card': p.get('yellow_card'),
            'red_card': p.get('red_card')
        }

    # =====================
    # Assists
    # =====================
    def parse_assists_from_events(self, soup):

        assists = {}
        events = soup.find_all('li', class_='match-events__item')

        for e in events:
            sec = e.find('a', class_='match-event__info-secondary-link')
            if sec and 'Assist:' in sec.text:
                href = sec.get('href','')
                if 'fplguid=' in href:
                    fid = href.split('fplguid=')[-1]
                    t = e.find('span', class_='match-event__time')
                    minute = t.text.strip() if t else None
                    assists.setdefault(fid,[]).append(parse_minute(minute))

        return assists

    # =====================
    # Mål
    # =====================
    def parse_goals_from_events(self, soup):

        goals = []
        events = soup.find_all('li', class_='match-events__item')

        for e in events:
            icon = e.find('use')
            if not icon:
                continue

            if 'icon-football' not in icon.get('xlink:href',''):
                continue

            # minut
            t = e.find('span', class_='match-event__time')
            minute_txt = t.text.strip() if t else None
            minute_int = parse_minute(minute_txt)

            info = e.find('a', class_='match-event__info-main-link')
            if not info:
                continue

            text = info.text.strip()
            score = text.split()[0]
            name = text.split(".",1)[-1].strip()

            href = info.get('href','')
            fid = href.split('fplguid=')[-1] if 'fplguid=' in href else None

            # assist
            assist_elem = e.find('a', class_='match-event__info-secondary-link')
            assist = None
            assist_fid = None

            if assist_elem and "Assist:" in assist_elem.text:
                assist = assist_elem.text.replace("Assist:","").strip()
                ah = assist_elem.get("href","")
                assist_fid = ah.split('fplguid=')[-1] if 'fplguid=' in ah else None

            # straff
            penalty = False
            pen = e.find('div', class_='match-event__info-secondary')
            if pen and "Straff" in pen.text:
                penalty = True

            goals.append({
                "minute": minute_txt,
                "minute_int": minute_int,
                "score": score,
                "scorer": name,
                "scorer_fplguid": fid,
                "assist": assist,
                "assist_fplguid": assist_fid,
                "penalty": penalty
            })

        return goals

    # =====================
    # Matchlängd
    # =====================
    def calculate_match_length(self, soup):

        minutes = []

        for t in soup.find_all('span', class_='match-event__time'):
            m = parse_minute(t.text)
            if m:
                minutes.append(m)

        if not minutes:
            return 90

        max_min = max(minutes)

        # om inga sena events -> normal match
        if max_min < 90:
            return 90

        return max_min

    # =====================
    # ✅ Matchstart datetime
    # =====================
    def parse_match_start_datetime(self, soup):

        rows = soup.find_all("div", class_="icon-text-row__content")

        for r in rows:
            title = r.find("div", class_="icon-text-row__title")
            value = r.find("div", class_="icon-text-row__value")

            if not title or not value:
                continue

            if title.text.strip() == "Datum och tid":
                dt_txt = value.text.strip()
                try:
                    return datetime.strptime(dt_txt, "%Y-%m-%d %H:%M")
                except:
                    return None
        return None


def build_match_id(filename):
    return filename.replace(".html","")


## Apply the parser

In [1366]:
# BEARBETA MATCHER

parser = MatchTeamParser()

FOLDER = "superettan-matcher/2025"

all_player_rows = []
all_goal_rows = []

for file in os.listdir(FOLDER):

    if not file.endswith(".html"):
        continue

    path = os.path.join(FOLDER, file)
    print(f"Parsar: {file}")

    match_id = build_match_id(file)

    with open(path, encoding="utf-8") as f:
        html = f.read()

    match_data = parser.parse_team_info(html)

    # ---- spelare ----
    df = parser.create_lineup_dataframe(match_data)
    df["match_id"] = match_id
    df["match_length"] = match_data["match_length"]

    all_player_rows.append(df)

    # ---- mål ----
    for g in match_data["goals"]:
        g["match_id"] = match_id
        all_goal_rows.append(g)



players_df = pd.concat(all_player_rows, ignore_index=True)
goals_df = pd.DataFrame(all_goal_rows)


Parsar: Trelleborgs FF - IK Brage - Matchfakta - Svensk fotboll.html
Parsar: IK Brage - Östersund - Matchfakta - Svensk fotboll.html
Parsar: Kalmar FF - IK Brage - Matchfakta - Svensk fotboll.html
Parsar: IK Brage - GIF Sundsvall - Matchfakta - Svensk fotboll.html
Parsar: Örgryte IS Fotboll - IK Brage - Matchfakta - Svensk fotboll.html
Parsar: IK Brage - Örgryte IS Fotboll - Matchfakta - Svensk fotboll.html
Parsar: Sandvikens IF - IK Brage - Matchfakta - Svensk fotboll.html
Parsar: IK Brage - Örebro - Matchfakta - Svensk fotboll.html
Parsar: IK Brage - IK Oddevold - Matchfakta - Svensk fotboll.html
Parsar: IK Brage - Västerås SK FK - Matchfakta - Svensk fotboll.html
Parsar: Umeå FC - IK Brage - Matchfakta - Svensk fotboll.html
Parsar: GIF Sundsvall - IK Brage - Matchfakta - Svensk fotboll.html
Parsar: IK Brage - Sandvikens IF - Matchfakta - Svensk fotboll.html
Parsar: Utsikten - IK Brage - Matchfakta - Svensk fotboll.html
Parsar: Landskrona BoIS - IK Brage - Matchfakta - Svensk fotboll

In [87]:
from bs4 import BeautifulSoup
from datetime import datetime
import pandas as pd
import numpy as np

# =====================
# TIDSHANTERING
# =====================
def parse_minute(txt):
    """
    Parsar minuter från olika format:
    - "45" -> 45
    - "45+3" -> 48
    - "62:15" -> 62 (kastar bort sekunder)
    - "45+3:27" -> 48 (hanterar tilläggstid med sekunder)
    """
    if not txt:
        return None

    txt = txt.replace("'", "").strip()
    
    if not txt:
        return None

    # Hantera format "62:15" eller "45+3:27"
    # Ta bort sekunderna först
    if ":" in txt:
        txt = txt.split(":")[0]
    
    # Hantera tilläggstid "45+3"
    if "+" in txt:
        try:
            base, extra = txt.split("+")
            return int(base) + int(extra)
        except ValueError:
            return None

    try:
        return int(txt)
    except ValueError:
        return None


class MatchTeamParser:

    # =====================
    # MAIN
    # =====================
    def parse_team_info(self, html_content):

        soup = BeautifulSoup(html_content, 'html.parser')

        match_data = {
            'teams': {},
            'lineups': {},
            'substitutes': {},
            'goals': [],
            'match_length': None,
            'match_start_datetime': None
        }

        # ✅ Matchstart
        match_data["match_start_datetime"] = self.parse_match_start_datetime(soup)

        # =====================
        # Matchlängd
        # =====================
        match_data["match_length"] = self.calculate_match_length(soup)

        # =====================
        # Assists, mål & kort
        # =====================
        assists = self.parse_assists_from_events(soup)
        goals = self.parse_goals_from_events(soup)
        cards = self.parse_cards_from_events(soup)
        match_data["goals"] = goals

        # Lookup mål
        goal_map = {}
        for g in goals:
            fid = g["scorer_fplguid"]
            if fid:
                goal_map.setdefault(fid, []).append(g["minute_int"])

        # =====================
        # Lineups
        # =====================
        team_sections = soup.find_all('section', class_='formation-list')

        for idx, section in enumerate(team_sections):
            team_key = 'home' if idx == 0 else 'away'

            team_name = section.find('h3', class_='formation-list__team')
            if team_name:
                match_data['teams'][team_key] = team_name.text.strip()

            starters, subs = [], []

            lists = section.find_all('ul', class_='formation-list__items')

            for ul in lists:
                prev = ul.find_previous_sibling('h4', class_='formation-list__section-headline')
                is_sub_list = prev and 'Ersättare' in prev.text

                for li in ul.find_all('li', class_='formation-list__item'):
                    div = li.find('div', class_='formation-list-player')
                    if not div:
                        continue

                    p = {}

                    # Nummer
                    num = div.find('span', class_='formation-list-player__number')
                    p['number'] = num.text.strip() if num else None

                    # Namn + fplguid
                    link = div.find('a', class_='formation-list-player__link')
                    if link:
                        p['name'] = link.text.strip()
                        href = link.get('href','')
                        p['fplguid'] = href.split('fplguid=')[-1] if 'fplguid=' in href else None

                    # Init
                    p.update({
                        'goals': 0,
                        'goal_minutes': [],
                        'assists': 0,
                        'assists_minutes': [],
                        'yellow_card': False,
                        'red_card': False,
                        'red_card_minute': None,
                        'yellow_card_minutes': [],
                        'was_substituted': False,
                        'sub_direction': None,
                        'sub_minute': None,
                        'sub_minute_int': None,
                        'sub_in_minute': None,
                        'sub_in_minute_int': None,
                        'sub_out_minute': None,
                        'sub_out_minute_int': None
                    })

                    # EVENTS
                    events = div.find('span', class_='formation-list-player__events')
                    if events:

                        if events.find('use', {'xlink:href': lambda x: x and 'icon-card-yellow' in x}):
                            p['yellow_card'] = True

                        if events.find('use', {'xlink:href': lambda x: x and 'icon-card-red' in x}):
                            p['red_card'] = True

                        # ✅ Byten - hitta ALLA wrappers (spelare kan bytas både in OCH ut)
                        sub_wraps = events.find_all('span', class_='formation-list-player__substitution-wrapper')
                        
                        for sub_wrap in sub_wraps:
                            icon = sub_wrap.find('use')
                            href = icon.get('xlink:href','') if icon else ''

                            txt = sub_wrap.find('span', class_='formation-list-player__substitution-text')
                            minute = txt.text.strip() if txt else None

                            if minute:
                                minute_int = parse_minute(minute)
                                
                                if 'substitution-out' in href:
                                    p['was_substituted'] = True
                                    p['sub_out_minute'] = minute
                                    p['sub_out_minute_int'] = minute_int

                                elif 'substitution-in' in href:
                                    p['was_substituted'] = True
                                    p['sub_in_minute'] = minute
                                    p['sub_in_minute_int'] = minute_int

                        # Sätt sub_direction baserat på vad vi har
                        if p['sub_in_minute_int'] is not None and p['sub_out_minute_int'] is not None:
                            p['sub_direction'] = 'both'  # Byttes både in och ut
                            p['sub_minute'] = p['sub_out_minute']  # Använd ut-tiden
                            p['sub_minute_int'] = p['sub_out_minute_int']
                        elif p['sub_out_minute_int'] is not None:
                            p['sub_direction'] = 'out'
                            p['sub_minute'] = p['sub_out_minute']
                            p['sub_minute_int'] = p['sub_out_minute_int']
                        elif p['sub_in_minute_int'] is not None:
                            p['sub_direction'] = 'in'
                            p['sub_minute'] = p['sub_in_minute']
                            p['sub_minute_int'] = p['sub_in_minute_int']

                    # Koppla assists
                    if p.get('fplguid') in assists:
                        p['assists'] = len(assists[p['fplguid']])
                        p['assists_minutes'] = assists[p['fplguid']]

                    # Koppla mål
                    if p.get('fplguid') in goal_map:
                        p['goals'] = len(goal_map[p['fplguid']])
                        p['goal_minutes'] = goal_map[p['fplguid']]
                    
                    # ✅ Koppla kort-minuter från events
                    if p.get('fplguid') in cards:
                        card_data = cards[p['fplguid']]
                        p['red_card_minute'] = card_data.get('red_card_minute')
                        p['yellow_card_minutes'] = card_data.get('yellow_cards', [])

                    subs.append(p) if is_sub_list else starters.append(p)

            match_data['lineups'][team_key] = starters
            match_data['substitutes'][team_key] = subs

        return match_data

    # =====================
    # DataFrame
    # =====================
    def create_lineup_dataframe(self, match_data):

        rows = []

        for side in ['home','away']:
            team = match_data['teams'].get(side)

            for p in match_data['lineups'][side]:
                rows.append(self._row(team, p, "Starting"))

            for p in match_data['substitutes'][side]:
                rows.append(self._row(team, p, "Sub"))

        df = pd.DataFrame(rows)

        # ✅ Lägg till matchinfo
        df["match_length"] = match_data.get("match_length")
        df["match_start_datetime"] = match_data.get("match_start_datetime")

        return df

    def _row(self, team, p, role):
        return {
            'team': team,
            'name': p.get('name'),
            'fplguid': p.get('fplguid'),
            'player_type': role,
            'was_substituted': p.get('was_substituted'),
            'sub_direction': p.get('sub_direction'),
            'sub_minute': p.get('sub_minute'),
            'sub_minute_int': p.get('sub_minute_int'),
            'sub_in_minute': p.get('sub_in_minute'),
            'sub_in_minute_int': p.get('sub_in_minute_int'),
            'sub_out_minute': p.get('sub_out_minute'),
            'sub_out_minute_int': p.get('sub_out_minute_int'),
            'goals': p.get('goals'),
            'goal_minutes': p.get('goal_minutes'),
            'assists': p.get('assists'),
            'assists_minutes': p.get('assists_minutes'),
            'yellow_card': p.get('yellow_card'),
            'red_card': p.get('red_card'),
            'red_card_minute': p.get('red_card_minute'),
            'yellow_card_minutes': p.get('yellow_card_minutes', [])
        }

    # =====================
    # Röda/Gula kort från events
    # =====================
    def parse_cards_from_events(self, soup):
        """
        Parsar röda och gula kort från match events med minuter
        Returns: dict med fplguid -> {'red_card_minute': int, 'yellow_cards': [int]}
        """
        cards = {}
        events = soup.find_all('li', class_='match-events__item')

        for e in events:
            icon = e.find('use')
            if not icon:
                continue

            href = icon.get('xlink:href', '')
            
            # Hitta spelaren
            player_link = e.find('a', class_='match-event__info-main-link')
            if not player_link:
                continue
                
            player_href = player_link.get('href', '')
            if 'fplguid=' not in player_href:
                continue
                
            fid = player_href.split('fplguid=')[-1]
            
            # Hitta minut
            t = e.find('span', class_='match-event__time')
            minute_txt = t.text.strip() if t else None
            minute_int = parse_minute(minute_txt)
            
            if minute_int is None:
                continue

            # Initiera om inte finns
            if fid not in cards:
                cards[fid] = {'red_card_minute': None, 'yellow_cards': []}
            
            # Rött kort
            if 'icon-card-red' in href:
                cards[fid]['red_card_minute'] = minute_int
            
            # Gult kort
            elif 'icon-card-yellow' in href:
                cards[fid]['yellow_cards'].append(minute_int)

        return cards

    # =====================
    # Assists
    # =====================
    def parse_assists_from_events(self, soup):

        assists = {}
        events = soup.find_all('li', class_='match-events__item')

        for e in events:
            sec = e.find('a', class_='match-event__info-secondary-link')
            if sec and 'Assist:' in sec.text:
                href = sec.get('href','')
                if 'fplguid=' in href:
                    fid = href.split('fplguid=')[-1]
                    t = e.find('span', class_='match-event__time')
                    minute = t.text.strip() if t else None
                    minute_int = parse_minute(minute)
                    if minute_int is not None:
                        assists.setdefault(fid,[]).append(minute_int)

        return assists

    # =====================
    # Mål
    # =====================
    def parse_goals_from_events(self, soup):

        goals = []
        events = soup.find_all('li', class_='match-events__item')

        for e in events:
            icon = e.find('use')
            if not icon:
                continue

            if 'icon-football' not in icon.get('xlink:href',''):
                continue

            # minut
            t = e.find('span', class_='match-event__time')
            minute_txt = t.text.strip() if t else None
            minute_int = parse_minute(minute_txt)

            info = e.find('a', class_='match-event__info-main-link')
            if not info:
                continue

            text = info.text.strip()
            score = text.split()[0]
            name = text.split(".",1)[-1].strip()

            href = info.get('href','')
            fid = href.split('fplguid=')[-1] if 'fplguid=' in href else None

            # assist
            assist_elem = e.find('a', class_='match-event__info-secondary-link')
            assist = None
            assist_fid = None

            if assist_elem and "Assist:" in assist_elem.text:
                assist = assist_elem.text.replace("Assist:","").strip()
                ah = assist_elem.get("href","")
                assist_fid = ah.split('fplguid=')[-1] if 'fplguid=' in ah else None

            # straff
            penalty = False
            pen = e.find('div', class_='match-event__info-secondary')
            if pen and "Straff" in pen.text:
                penalty = True

            goals.append({
                "minute": minute_txt,
                "minute_int": minute_int,
                "score": score,
                "scorer": name,
                "scorer_fplguid": fid,
                "assist": assist,
                "assist_fplguid": assist_fid,
                "penalty": penalty
            })

        return goals

    # =====================
    # Matchlängd
    # =====================
    def calculate_match_length(self, soup):

        minutes = []

        for t in soup.find_all('span', class_='match-event__time'):
            m = parse_minute(t.text)
            if m is not None:
                minutes.append(m)

        if not minutes:
            return 90

        max_min = max(minutes)

        # om inga sena events -> normal match
        if max_min < 90:
            return 90

        return max_min

    # =====================
    # ✅ Matchstart datetime
    # =====================
    def parse_match_start_datetime(self, soup):

        rows = soup.find_all("div", class_="icon-text-row__content")

        for r in rows:
            title = r.find("div", class_="icon-text-row__title")
            value = r.find("div", class_="icon-text-row__value")

            if not title or not value:
                continue

            if title.text.strip() == "Datum och tid":
                dt_txt = value.text.strip()
                try:
                    return datetime.strptime(dt_txt, "%Y-%m-%d %H:%M")
                except:
                    return None
        return None


def build_match_id(filename):
    return filename.replace(".html","")

In [130]:
# BEARBETA MATCHER

parser = MatchTeamParser()

FOLDER = "superettan-matcher"
SUBFOLDERS = ["2025"]

all_player_rows = []
all_goal_rows = []

for subfolder in SUBFOLDERS:
    folder_path = os.path.join(FOLDER, subfolder)
    
    # Extrahera året från sökvägen
    year = subfolder.split("/")[-1]  # "2024" eller "2025"
    
    for file in os.listdir(folder_path):
        if not file.endswith(".html"):
            continue

        path = os.path.join(folder_path, file)
        print(f"Parsar: {year}/{file}")

        # ✅ Inkludera året i match_id
        match_id = f"{year}_{build_match_id(file)}"

        with open(path, encoding="utf-8") as f:
            html = f.read()

        match_data = parser.parse_team_info(html)

        # ---- spelare ----
        df = parser.create_lineup_dataframe(match_data)

        # ✅ MODELL B HÄR
        # df = apply_model_b_minutes(df, match_data)

        df["match_id"] = match_id
        df["match_length"] = match_data["match_length"]
        df["year"] = year  # ✅ Extra kolumn för år

        all_player_rows.append(df)

        # ---- mål ----
        for g in match_data["goals"]:
            g["match_id"] = match_id
            g["year"] = year  # ✅ Extra för år
            all_goal_rows.append(g)

players_df = pd.concat(all_player_rows, ignore_index=True)
goals_df = pd.DataFrame(all_goal_rows)

Parsar: 2025/Helsingborgs IF - Utsikten - Matchfakta - Svensk fotboll.html
Parsar: 2025/Helsingborgs IF - Umeå FC - Matchfakta - Svensk fotboll.html
Parsar: 2025/Falkenbergs FF - IK Oddevold - Matchfakta - Svensk fotboll.html
Parsar: 2025/Västerås SK FK - Varbergs BoIS FC - Matchfakta - Svensk fotboll.html
Parsar: 2025/Örgryte IS Fotboll - Landskrona BoIS - Matchfakta - Svensk fotboll.html
Parsar: 2025/Trelleborgs FF - IK Brage - Matchfakta - Svensk fotboll.html
Parsar: 2025/GIF Sundsvall - Örebro - Matchfakta - Svensk fotboll.html
Parsar: 2025/Örgryte IS Fotboll - GIF Sundsvall - Matchfakta - Svensk fotboll.html
Parsar: 2025/IK Oddevold - Umeå FC - Matchfakta - Svensk fotboll.html
Parsar: 2025/IK Brage - Östersund - Matchfakta - Svensk fotboll.html
Parsar: 2025/Utsikten - GIF Sundsvall - Matchfakta - Svensk fotboll.html
Parsar: 2025/IK Oddevold - Utsikten - Matchfakta - Svensk fotboll.html
Parsar: 2025/Kalmar FF - IK Brage - Matchfakta - Svensk fotboll.html
Parsar: 2025/Helsing

  players_df = pd.concat(all_player_rows, ignore_index=True)


In [131]:
def normalize_goal_minutes(goals_df, players_df):
    """
    Enforces Model B:
    - Goal minutes clipped to [0, match_length-1]
    """
    ml = (
        players_df[["match_id", "match_length"]]
        .drop_duplicates()
        .set_index("match_id")["match_length"]
    )

    goals_df = goals_df.copy()

    goals_df["minute_int"] = goals_df.apply(
        lambda r: min(int(r["minute_int"]), int(ml.loc[r["match_id"]]) - 1),
        axis=1
    )

    return goals_df


In [132]:
goals_df = normalize_goal_minutes(goals_df, players_df)
goals_df

Unnamed: 0,minute,minute_int,score,scorer,scorer_fplguid,assist,assist_fplguid,penalty,match_id,year
0,11',11,1-0,Wilhelm Loeper,88357468-1e4c-4ea3-ac68-f90a06aaac7c,3. Wilhelm Nilsson,e37747df-5f51-4db7-8624-951e2f80edc9,False,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025
1,71',71,2-0,Max Svensson,a7550484-fc21-48e4-9411-3d9555cc2d8d,8. Ervin Gigovic,b4d000d9-353b-41c1-8596-fe39f7aa92ea,False,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025
2,86',86,3-0,Baker Amer,c947e491-ee94-409e-854a-12f2483d8982,10. Max Svensson,a7550484-fc21-48e4-9411-3d9555cc2d8d,False,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025
3,38',38,1-0,Oscar Aga,ef9e5b99-5d32-4986-a986-69b019912955,9. Adam Akimey,f50dc5ad-86c3-4832-81ed-8f7a667e9342,False,2025_Helsingborgs IF - Umeå FC - Matchfakta -...,2025
4,46',46,2-0,Adam Akimey,f50dc5ad-86c3-4832-81ed-8f7a667e9342,10. Max Svensson,a7550484-fc21-48e4-9411-3d9555cc2d8d,False,2025_Helsingborgs IF - Umeå FC - Matchfakta -...,2025
...,...,...,...,...,...,...,...,...,...,...
657,82',82,1-2,Adam Engelbrektsson,dde88d7e-1e7c-413c-a392-75bedc7f13cd,16. Emir Derviškadić,8880a778-cb09-4452-ace0-ff87d94934ce,False,2025_IK Oddevold - Landskrona BoIS - Matchfakt...,2025
658,45',45,0-1,Alexander Johansson,9e0bde07-3acc-4be5-b817-02341c1d178f,8. Alexander Faltsetas,24435387-d82b-4070-948e-2235b5527fde,False,2025_Umeå FC - Utsikten - Matchfakta - Svensk...,2025
659,88',88,1-1,Mikael Harbosen Haga,04bc4774-6ba5-4f29-a1d2-3333d21bf213,,,True,2025_Umeå FC - Utsikten - Matchfakta - Svensk...,2025
660,90+2',92,1-2,Sebastian Lagerlund,d1ed4a8d-0c22-49f7-abd6-e1cf63c1013d,13. Malkolm Moenza,4b6e514b-2e07-4b32-9bce-2a143c6a39d2,False,2025_Umeå FC - Utsikten - Matchfakta - Svensk...,2025


In [92]:
# def normalize_minute(minute_str):
#     if "+" in minute_str:
#         return int(minute_str.split("+")[0].strip())
#     return int(minute_str.split(":")[0])

# goals_df["minute_norm"] = goals_df["minute"].apply(normalize_minute)
# goals_df

In [1488]:
def qa_model_b_intervals(players_df):
    bad = players_df[
        players_df["minutes_played"].notna() &
        ((players_df["end_minute"] - players_df["start_minute"])
         != players_df["minutes_played"])
    ]

    if len(bad) > 0:
        raise ValueError("❌ Interval mismatch – Model B violated")

    print("✅ Player intervals follow Model B")

qa_model_b_intervals(players_df)


✅ Player intervals follow Model B


In [133]:
def is_on_pitch(start, end, minute):
    return start <= minute < end

In [134]:
players_df["name"] = players_df["name"].str.replace(" (K)", "")

players_df["year"] = players_df["match_start_datetime"].dt.year
players_df["month"] = players_df["match_start_datetime"].dt.month

players_df

Unnamed: 0,team,name,fplguid,player_type,was_substituted,sub_direction,sub_minute,sub_minute_int,sub_in_minute,sub_in_minute_int,...,assists_minutes,yellow_card,red_card,red_card_minute,yellow_card_minutes,match_length,match_start_datetime,match_id,year,month
0,Helsingborgs IF,Johan Brattberg (MV),cc256365-a139-46a0-88e1-4e5c2fe15dfe,Starting,False,,,,,,...,[],False,False,,[],90,2025-08-10 15:00:00,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8
1,Helsingborgs IF,Benjamin Örn,aa942ed4-3f35-46d8-af83-3bc117c9bc8d,Starting,False,,,,,,...,[],False,False,,[],90,2025-08-10 15:00:00,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8
2,Helsingborgs IF,Jon Birkfeldt,fbd3c879-1be4-4d66-b566-f1623db692d7,Starting,False,,,,,,...,[],True,False,,[38],90,2025-08-10 15:00:00,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8
3,Helsingborgs IF,Wilhelm Nilsson,e37747df-5f51-4db7-8624-951e2f80edc9,Starting,False,,,,,,...,[11],False,False,,[],90,2025-08-10 15:00:00,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8
4,Helsingborgs IF,Simon Bengtsson,5dc6220e-c444-412f-a3cd-e6f404b9113e,Starting,False,,,,,,...,[],False,False,,[],90,2025-08-10 15:00:00,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8614,Utsikten,Mass Modou Sise,80ab783a-484d-4c1e-b70e-58af1082454c,Sub,True,in,80,80.0,80,80.0,...,[],False,False,,[],96,2025-05-17 15:00:00,2025_Umeå FC - Utsikten - Matchfakta - Svensk...,2025,5
8615,Utsikten,Amadou-David Sanyang,86a2ca09-236f-4566-a817-6919f9032834,Sub,False,,,,,,...,[],False,False,,[],96,2025-05-17 15:00:00,2025_Umeå FC - Utsikten - Matchfakta - Svensk...,2025,5
8616,Utsikten,Enzo Andrén,830a7b4a-b0de-4393-b212-35b067efe827,Sub,False,,,,,,...,[],False,False,,[],96,2025-05-17 15:00:00,2025_Umeå FC - Utsikten - Matchfakta - Svensk...,2025,5
8617,Utsikten,Johannes Selvén,e64bb19b-fefb-481a-8644-6e446e718cfb,Sub,True,in,46,46.0,46,46.0,...,[],False,False,,[],96,2025-05-17 15:00:00,2025_Umeå FC - Utsikten - Matchfakta - Svensk...,2025,5


## Calculate stuff

In [135]:
def calculate_minutes(df):

    df = df.copy()

    start = []
    end = []

    for _, r in df.iterrows():

        length = r["match_length"]

        # start
        if r["player_type"] == "Starting":
            s = 0
        else:
            s = r["sub_minute_int"] if r["sub_direction"]=="in" else None

        # end
        if r["sub_direction"]=="out":
            e = r["sub_minute_int"]
        else:
            e = length

        start.append(s)
        end.append(e)

    df["start_minute"] = start
    df["end_minute"] = end
    df["minutes_played"] = df["end_minute"] - df["start_minute"]

    return df


players_df = calculate_minutes(players_df)
players_df

Unnamed: 0,team,name,fplguid,player_type,was_substituted,sub_direction,sub_minute,sub_minute_int,sub_in_minute,sub_in_minute_int,...,red_card_minute,yellow_card_minutes,match_length,match_start_datetime,match_id,year,month,start_minute,end_minute,minutes_played
0,Helsingborgs IF,Johan Brattberg (MV),cc256365-a139-46a0-88e1-4e5c2fe15dfe,Starting,False,,,,,,...,,[],90,2025-08-10 15:00:00,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8,0.0,90.0,90.0
1,Helsingborgs IF,Benjamin Örn,aa942ed4-3f35-46d8-af83-3bc117c9bc8d,Starting,False,,,,,,...,,[],90,2025-08-10 15:00:00,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8,0.0,90.0,90.0
2,Helsingborgs IF,Jon Birkfeldt,fbd3c879-1be4-4d66-b566-f1623db692d7,Starting,False,,,,,,...,,[38],90,2025-08-10 15:00:00,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8,0.0,90.0,90.0
3,Helsingborgs IF,Wilhelm Nilsson,e37747df-5f51-4db7-8624-951e2f80edc9,Starting,False,,,,,,...,,[],90,2025-08-10 15:00:00,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8,0.0,90.0,90.0
4,Helsingborgs IF,Simon Bengtsson,5dc6220e-c444-412f-a3cd-e6f404b9113e,Starting,False,,,,,,...,,[],90,2025-08-10 15:00:00,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8,0.0,90.0,90.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8614,Utsikten,Mass Modou Sise,80ab783a-484d-4c1e-b70e-58af1082454c,Sub,True,in,80,80.0,80,80.0,...,,[],96,2025-05-17 15:00:00,2025_Umeå FC - Utsikten - Matchfakta - Svensk...,2025,5,80.0,96.0,16.0
8615,Utsikten,Amadou-David Sanyang,86a2ca09-236f-4566-a817-6919f9032834,Sub,False,,,,,,...,,[],96,2025-05-17 15:00:00,2025_Umeå FC - Utsikten - Matchfakta - Svensk...,2025,5,,96.0,
8616,Utsikten,Enzo Andrén,830a7b4a-b0de-4393-b212-35b067efe827,Sub,False,,,,,,...,,[],96,2025-05-17 15:00:00,2025_Umeå FC - Utsikten - Matchfakta - Svensk...,2025,5,,96.0,
8617,Utsikten,Johannes Selvén,e64bb19b-fefb-481a-8644-6e446e718cfb,Sub,True,in,46,46.0,46,46.0,...,,[],96,2025-05-17 15:00:00,2025_Umeå FC - Utsikten - Matchfakta - Svensk...,2025,5,46.0,96.0,50.0


In [9]:
def clean_minutes(df):

    df = df.copy()

    # =====================
    # START_MINUTE
    # =====================

    # Startspelare börjar på 0
    df.loc[df["player_type"]=="Starting", "start_minute"] = 0

    # Inbytta (alla med sub_in_minute_int)
    mask = df["sub_in_minute_int"].notna()
    df.loc[mask, "start_minute"] = df.loc[mask, "sub_in_minute_int"]

    # Subs som aldrig kom in
    mask = (df["player_type"]=="Sub") & (df["sub_in_minute_int"].isna())
    df.loc[mask, "start_minute"] = np.nan


    # =====================
    # END_MINUTE
    # =====================

    # Default = matchlängd
    df["end_minute"] = df["match_length"]

    # Utbytta (alla med sub_out_minute_int, både Starting och Sub)
    mask = df["sub_out_minute_int"].notna()
    df.loc[mask, "end_minute"] = df.loc[mask, "sub_out_minute_int"]

    # ✅ Röda kort (om inte redan utbytt tidigare)
    # Spelare med rött kort slutar vid kortets minut
    mask = df["red_card_minute"].notna()
    for idx in df[mask].index:
        current_end = df.loc[idx, "end_minute"]
        red_card_min = df.loc[idx, "red_card_minute"]
        # Ta det MINSTA värdet (antingen byte-minut eller rött kort-minut)
        df.loc[idx, "end_minute"] = min(current_end, red_card_min)

    # Subs som aldrig kom in → NaN
    mask = (df["player_type"]=="Sub") & (df["sub_in_minute_int"].isna())
    df.loc[mask, "end_minute"] = np.nan


    # =====================
    # MINUTES PLAYED
    # =====================

    df["minutes_played"] = df["end_minute"] - df["start_minute"]

    # Subs som inte spelade → NaN
    mask = (df["player_type"]=="Sub") & (df["start_minute"].isna())
    df.loc[mask, "minutes_played"] = np.nan

    # Skydd mot negativa värden
    df.loc[df["minutes_played"] < 0, "minutes_played"] = np.nan

    return df

In [136]:
def clean_minutes(df):

    df = df.copy()

    # =====================
    # START
    # =====================

    df["start_minute"] = np.nan
    df["end_minute"] = np.nan

    # Startspelare
    df.loc[df["player_type"] == "Starting", "start_minute"] = 0

    # Inbytta (inkl BOTH)
    mask = df["sub_direction"].isin(["in", "both"])
    df.loc[mask, "start_minute"] = df.loc[mask, "sub_in_minute_int"]

    # =====================
    # END
    # =====================

    # Default = matchens slut (EXKLUSIV)
    df["end_minute"] = df["match_length"]

    # Utbytta (inkl BOTH)
    mask = df["sub_direction"].isin(["out", "both"])
    df.loc[mask, "end_minute"] = df.loc[mask, "sub_minute_int"]

    # =====================
    # RÖTT KORT
    # =====================

    # Om rött kort finns → spelaren slutar exakt där
    mask = df["red_card"] & df["red_card_minute"].notna()
    df.loc[mask, "end_minute"] = df.loc[mask, "red_card_minute"]

    # =====================
    # MINUTES PLAYED
    # =====================

    df["minutes_played"] = df["end_minute"] - df["start_minute"]

    # Subs som aldrig kom in
    mask = df["start_minute"].isna()
    df.loc[mask, "minutes_played"] = np.nan

    # Skydd
    df.loc[df["minutes_played"] <= 0, "minutes_played"] = np.nan

    return df


In [137]:
players_df = clean_minutes(players_df)
players_df

Unnamed: 0,team,name,fplguid,player_type,was_substituted,sub_direction,sub_minute,sub_minute_int,sub_in_minute,sub_in_minute_int,...,red_card_minute,yellow_card_minutes,match_length,match_start_datetime,match_id,year,month,start_minute,end_minute,minutes_played
0,Helsingborgs IF,Johan Brattberg (MV),cc256365-a139-46a0-88e1-4e5c2fe15dfe,Starting,False,,,,,,...,,[],90,2025-08-10 15:00:00,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8,0.0,90,90.0
1,Helsingborgs IF,Benjamin Örn,aa942ed4-3f35-46d8-af83-3bc117c9bc8d,Starting,False,,,,,,...,,[],90,2025-08-10 15:00:00,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8,0.0,90,90.0
2,Helsingborgs IF,Jon Birkfeldt,fbd3c879-1be4-4d66-b566-f1623db692d7,Starting,False,,,,,,...,,[38],90,2025-08-10 15:00:00,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8,0.0,90,90.0
3,Helsingborgs IF,Wilhelm Nilsson,e37747df-5f51-4db7-8624-951e2f80edc9,Starting,False,,,,,,...,,[],90,2025-08-10 15:00:00,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8,0.0,90,90.0
4,Helsingborgs IF,Simon Bengtsson,5dc6220e-c444-412f-a3cd-e6f404b9113e,Starting,False,,,,,,...,,[],90,2025-08-10 15:00:00,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8,0.0,90,90.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8614,Utsikten,Mass Modou Sise,80ab783a-484d-4c1e-b70e-58af1082454c,Sub,True,in,80,80.0,80,80.0,...,,[],96,2025-05-17 15:00:00,2025_Umeå FC - Utsikten - Matchfakta - Svensk...,2025,5,80.0,96,16.0
8615,Utsikten,Amadou-David Sanyang,86a2ca09-236f-4566-a817-6919f9032834,Sub,False,,,,,,...,,[],96,2025-05-17 15:00:00,2025_Umeå FC - Utsikten - Matchfakta - Svensk...,2025,5,,96,
8616,Utsikten,Enzo Andrén,830a7b4a-b0de-4393-b212-35b067efe827,Sub,False,,,,,,...,,[],96,2025-05-17 15:00:00,2025_Umeå FC - Utsikten - Matchfakta - Svensk...,2025,5,,96,
8617,Utsikten,Johannes Selvén,e64bb19b-fefb-481a-8644-6e446e718cfb,Sub,True,in,46,46.0,46,46.0,...,,[],96,2025-05-17 15:00:00,2025_Umeå FC - Utsikten - Matchfakta - Svensk...,2025,5,46.0,96,50.0


## Quality control

In [85]:
def run_match_qa(df):

    print("\n================ QA RAPPORT ================\n")

    problems = False

    # =====================
    # 1. Negativa minuter
    # =====================
    neg = df[df["minutes_played"] < 0]

    if len(neg) > 0:
        problems = True
        print("❌ NEGATIVA MINUTER:")
        display(neg[["match_id","team","name","start_minute","end_minute","minutes_played"]])
    else:
        print("✅ Inga negativa minuter")


    # =====================
    # 2. Speltid > matchlängd
    # =====================
    too_long = df[df["minutes_played"] > df["match_length"]]

    if len(too_long) > 0:
        problems = True
        print("\n❌ MINUTER > MATCHLÄNGD:")
        display(too_long[["match_id","team","name","minutes_played","match_length"]])
    else:
        print("✅ Ingen speltid > matchlängd")


    # =====================
    # 3. Subs som aldrig kom in
    # =====================
    bad_subs = df[
        (df["player_type"]=="Sub") &
        (df["sub_direction"].isna()) &
        (df["minutes_played"].notna())
    ]

    if len(bad_subs) > 0:
        problems = True
        print("\n❌ Bänkspelare med minuter trots ej inbytta:")
        display(bad_subs[["match_id","team","name","minutes_played"]])
    else:
        print("✅ Inga ghost-subs")


    # =====================
    # 4. Lag > 11 samtidigt
    # =====================
    print("\n🔍 Kontrollerar >11 på plan...")

    for match_id in df["match_id"].unique():

        m = df[df["match_id"]==match_id]

        for team in m["team"].unique():

            t = m[m["team"]==team]

            for minute in range(int(t["match_length"].iloc[0])+1):

                on_pitch = t[
                    (t["start_minute"]<=minute) &
                    (t["end_minute"]>minute)
                ]

                if len(on_pitch) > 11:
                    problems = True
                    print(f"❌ {match_id} | {team} | minut {minute} → {len(on_pitch)} spelare")

    print("✅ >11-kontroll klar")


    # =====================
    # 5. Plus/minus summerar 0
    # =====================
    print("\n🔍 Kontrollerar plus/minus per mål (numerärt korrekt)...")

    for match_id in players_df["match_id"].unique():

        m_players = players_df[players_df["match_id"] == match_id]
        m_goals = goals_df[goals_df["match_id"] == match_id]

        for _, g in m_goals.iterrows():

            minute = g["minute_int"]

            on_pitch = m_players[
                (m_players["start_minute"] <= minute) &
                (m_players["end_minute"] > minute)
            ]

            scorer_team = m_players[
                m_players["fplguid"] == g["scorer_fplguid"]
            ]["team"].iloc[0]

            n_scoring = len(on_pitch[on_pitch["team"] == scorer_team])
            n_other = len(on_pitch) - n_scoring

            expected_sum = n_scoring - n_other

            actual_sum = (
                n_scoring * 1 +
                n_other * (-1)
            )

            if actual_sum != expected_sum:
                problems = True
                print(
                    f"❌ {match_id} | minut {minute} | "
                    f"expected {expected_sum}, got {actual_sum}"
                )

    print("✅ Numerärt korrekt plus/minus per mål")


    # =====================
    # SLUT
    # =====================
    if not problems:
        print("\n🎉 QA PASSERAD – datan är ren!")
    else:
        print("\n⚠ QA hittade problem – se ovan")


In [140]:
run_match_qa(players_df)




✅ Inga negativa minuter
✅ Ingen speltid > matchlängd
✅ Inga ghost-subs

🔍 Kontrollerar >11 på plan...
✅ >11-kontroll klar

🔍 Kontrollerar plus/minus per mål (numerärt korrekt)...
✅ Numerärt korrekt plus/minus per mål

🎉 QA PASSERAD – datan är ren!


In [None]:
# def calculate_full_plus_minus(players_df, goals_df):

#     df = players_df.copy()

#     plus_raw = []
#     plus90 = []
#     plus_impact = []

#     for _, p in df.iterrows():

#         minutes = p["minutes_played"]

#         # ---- Spelade inte → NaN ----
#         if pd.isna(minutes) or minutes <= 0:
#             plus_raw.append(np.nan)
#             plus90.append(np.nan)
#             plus_impact.append(np.nan)
#             continue

#         match = p["match_id"]
#         team = p["team"]
#         start = p["start_minute"]
#         end = p["end_minute"]

#         if pd.isna(start) or pd.isna(end):
#             plus_raw.append(np.nan)
#             plus90.append(np.nan)
#             plus_impact.append(np.nan)
#             continue

#         gmatch = goals_df[goals_df["match_id"] == match]

#         plus = 0
#         minus = 0
#         impact = 0.0

#         for _, g in gmatch.iterrows():

#             m = g["minute_int"]
#             # m = g["minute_norm"]
            
#             # ✅ Endpoints INCLUDED
#             if not (start <= m < end):
#                 continue

#             scorer_team = df[
#                 (df["match_id"] == match) &
#                 (df["name"] == g["scorer"])
#             ]["team"].values

#             if len(scorer_team) == 0:
#                 continue

#             scorer_team = scorer_team[0]

#             # ---- PLUS / MINUS ----
#             if scorer_team == team:
#                 plus += 1
#             else:
#                 minus += 1

#             # ---- IMPACT ----
#             pts = 0.8 if g.get("penalty", False) else 1.0

#             if g.get("scorer_fplguid") == p.get("fplguid"):
#                 impact += pts

#             if g.get("assist_fplguid") == p.get("fplguid"):
#                 impact += 0.7

#         # ---- DISCIPLINE (en gång per match) ----
#         if p.get("yellow_card", False):
#             impact -= 0.5
#         if p.get("red_card", False):
#             impact -= 1.0

#         pm = plus - minus
#         pm90 = pm / minutes * 90

#         plus_raw.append(pm)
#         plus90.append(pm90)
#         plus_impact.append(impact)

#     df["plus_minus_raw"] = plus_raw
#     df["plus_minus_per90"] = plus90
#     df["plus_minus_impact"] = plus_impact

#     return df


In [138]:
def calculate_full_plus_minus(players_df, goals_df):

    df = players_df.copy()

    plus_raw = []
    plus90 = []
    plus_impact = []

    for _, p in df.iterrows():

        minutes = p["minutes_played"]

        # ---- Spelade inte → NaN ----
        if pd.isna(minutes) or minutes <= 0:
            plus_raw.append(np.nan)
            plus90.append(np.nan)
            plus_impact.append(np.nan)
            continue

        match = p["match_id"]
        team = p["team"]
        start = p["start_minute"]
        end = p["end_minute"]

        if pd.isna(start) or pd.isna(end):
            plus_raw.append(np.nan)
            plus90.append(np.nan)
            plus_impact.append(np.nan)
            continue

        gmatch = goals_df[goals_df["match_id"] == match]

        plus = 0
        minus = 0
        impact = 0.0

        for _, g in gmatch.iterrows():

            m = g["minute_int"]
            
            # ✅ Kolla om DENNA spelare var på plan
            if not (start <= m < end):
                continue

            # ✅ Hitta målskytten via FPLGUID (inte namn)
            scorer_fplguid = g.get("scorer_fplguid")
            
            if pd.isna(scorer_fplguid):
                # Om vi inte har FPLGUID, försök med namn (fallback)
                scorer_team_rows = df[
                    (df["match_id"] == match) &
                    (df["name"] == g["scorer"])
                ]["team"].values
                
                if len(scorer_team_rows) == 0:
                    continue
                    
                scorer_team = scorer_team_rows[0]
            else:
                # Hitta lagtillhörighet via FPLGUID
                scorer_team_rows = df[
                    (df["match_id"] == match) &
                    (df["fplguid"] == scorer_fplguid)
                ]["team"].values
                
                if len(scorer_team_rows) == 0:
                    continue
                    
                scorer_team = scorer_team_rows[0]

            # ---- PLUS / MINUS ----
            if scorer_team == team:
                plus += 1
            else:
                minus += 1

            # ---- IMPACT ----
            pts = 0.8 if g.get("penalty", False) else 1.0

            if g.get("scorer_fplguid") == p.get("fplguid"):
                impact += pts

            if g.get("assist_fplguid") == p.get("fplguid"):
                impact += 0.7

        # ---- DISCIPLINE (en gång per match) ----
        if p.get("yellow_card", False):
            impact -= 0.5
        if p.get("red_card", False):
            impact -= 1.0

        pm = plus - minus
        pm90 = pm / minutes * 90 if minutes > 0 else 0

        plus_raw.append(pm)
        plus90.append(pm90)
        plus_impact.append(impact)

    df["plus_minus_raw"] = plus_raw
    df["plus_minus_per90"] = plus90
    df["plus_minus_impact"] = plus_impact

    return df

In [139]:
players_df = calculate_full_plus_minus(players_df, goals_df)
players_df

Unnamed: 0,team,name,fplguid,player_type,was_substituted,sub_direction,sub_minute,sub_minute_int,sub_in_minute,sub_in_minute_int,...,match_start_datetime,match_id,year,month,start_minute,end_minute,minutes_played,plus_minus_raw,plus_minus_per90,plus_minus_impact
0,Helsingborgs IF,Johan Brattberg (MV),cc256365-a139-46a0-88e1-4e5c2fe15dfe,Starting,False,,,,,,...,2025-08-10 15:00:00,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8,0.0,90,90.0,3.0,3.000,0.0
1,Helsingborgs IF,Benjamin Örn,aa942ed4-3f35-46d8-af83-3bc117c9bc8d,Starting,False,,,,,,...,2025-08-10 15:00:00,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8,0.0,90,90.0,3.0,3.000,0.0
2,Helsingborgs IF,Jon Birkfeldt,fbd3c879-1be4-4d66-b566-f1623db692d7,Starting,False,,,,,,...,2025-08-10 15:00:00,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8,0.0,90,90.0,3.0,3.000,-0.5
3,Helsingborgs IF,Wilhelm Nilsson,e37747df-5f51-4db7-8624-951e2f80edc9,Starting,False,,,,,,...,2025-08-10 15:00:00,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8,0.0,90,90.0,3.0,3.000,0.7
4,Helsingborgs IF,Simon Bengtsson,5dc6220e-c444-412f-a3cd-e6f404b9113e,Starting,False,,,,,,...,2025-08-10 15:00:00,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8,0.0,90,90.0,3.0,3.000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8614,Utsikten,Mass Modou Sise,80ab783a-484d-4c1e-b70e-58af1082454c,Sub,True,in,80,80.0,80,80.0,...,2025-05-17 15:00:00,2025_Umeå FC - Utsikten - Matchfakta - Svensk...,2025,5,80.0,96,16.0,1.0,5.625,1.0
8615,Utsikten,Amadou-David Sanyang,86a2ca09-236f-4566-a817-6919f9032834,Sub,False,,,,,,...,2025-05-17 15:00:00,2025_Umeå FC - Utsikten - Matchfakta - Svensk...,2025,5,,96,,,,
8616,Utsikten,Enzo Andrén,830a7b4a-b0de-4393-b212-35b067efe827,Sub,False,,,,,,...,2025-05-17 15:00:00,2025_Umeå FC - Utsikten - Matchfakta - Svensk...,2025,5,,96,,,,
8617,Utsikten,Johannes Selvén,e64bb19b-fefb-481a-8644-6e446e718cfb,Sub,True,in,46,46.0,46,46.0,...,2025-05-17 15:00:00,2025_Umeå FC - Utsikten - Matchfakta - Svensk...,2025,5,46.0,96,50.0,1.0,1.800,0.0


In [99]:
# Testa funktionen direkt på bara denna match
test_match_id = "2023_IK Brage - Örgryte IS Fotboll - Matchfakta - Svensk fotboll"

test_players = players_df[players_df["match_id"] == test_match_id].copy()
test_goals = goals_df[goals_df["match_id"] == test_match_id].copy()

print("FÖRE calculate_full_plus_minus:")
print(f"Antal spelare: {len(test_players)}")
print(f"Har plus_minus_raw: {'plus_minus_raw' in test_players.columns}")

# Kör funktionen
test_result = calculate_full_plus_minus(test_players, test_goals)

print("\nEFTER calculate_full_plus_minus:")
print(f"Har plus_minus_raw: {'plus_minus_raw' in test_result.columns}")

# Kontrollera summan
for team in ["IK Brage", "Örgryte IS Fotboll"]:
    team_sum = test_result[test_result["team"] == team]["plus_minus_raw"].sum()
    print(f"{team}: {team_sum}")

# Kontrollera ett specifikt mål
print("\nMÅL VID MINUT 32 (Ieltsin Semedo):")
goal_32 = test_goals[test_goals["minute_int"] == 32].iloc[0]
print(f"Målskytt: {goal_32['scorer']}")
print(f"Scorer FPLGUID: {goal_32.get('scorer_fplguid', 'SAKNAS')}")

# Vilka var på plan vid minut 32?
on_pitch_32 = test_players[
    (test_players["start_minute"] <= 32) &
    (test_players["end_minute"] > 32)
]
print(f"\nSpelare på plan vid minut 32: {len(on_pitch_32)}")
for team in ["IK Brage", "Örgryte IS Fotboll"]:
    count = len(on_pitch_32[on_pitch_32["team"] == team])
    print(f"  {team}: {count} spelare")

# Visa Seth Hellberg specifikt
seth = test_result[test_result["name"].str.contains("Seth", na=False)]
print("\nSETH HELLBERG:")
print(f"Start: {seth['start_minute'].values[0]}, End: {seth['end_minute'].values[0]}")
print(f"Var på plan vid minut 32? {seth['start_minute'].values[0] <= 32 < seth['end_minute'].values[0]}")
print(f"Plus/minus: {seth['plus_minus_raw'].values[0]}")

FÖRE calculate_full_plus_minus:
Antal spelare: 36
Har plus_minus_raw: True

EFTER calculate_full_plus_minus:
Har plus_minus_raw: True
IK Brage: 30.0
Örgryte IS Fotboll: -33.0

MÅL VID MINUT 32 (Ieltsin Semedo):
Målskytt: Ieltsin Jeronimo Semedo Camoes
Scorer FPLGUID: cc1e66df-a0de-41eb-9439-061ff7d3285c

Spelare på plan vid minut 32: 21
  IK Brage: 10 spelare
  Örgryte IS Fotboll: 11 spelare

SETH HELLBERG:
Start: 0.0, End: 24
Var på plan vid minut 32? False
Plus/minus: 0.0


In [38]:
# print(players_df[players_df["match_id"] == "2023_IK Brage - Örgryte IS Fotboll - Matchfakta - Svensk fotboll"])
# print(goals_df[goals_df["match_id"] == "2023_IK Brage - Örgryte IS Fotboll - Matchfakta - Svensk fotboll"])

players_df[players_df["match_id"] == "2023_IK Brage - Örgryte IS Fotboll - Matchfakta - Svensk fotboll"]

Unnamed: 0,team,name,fplguid,player_type,was_substituted,sub_direction,sub_minute,sub_minute_int,sub_in_minute,sub_in_minute_int,...,match_start_datetime,match_id,year,month,start_minute,end_minute,minutes_played,plus_minus_raw,plus_minus_per90,plus_minus_impact
250,IK Brage,André Bernardini (MV),82b6cc83-5b13-4770-aebb-7a61af0e7edf,Starting,False,,,,,,...,2023-07-09 15:00:00,2023_IK Brage - Örgryte IS Fotboll - Matchfak...,2023,7,0.0,91,91.0,3.0,2.967033,0.0
251,IK Brage,Jonathan Syberg Tamimi,86dbe6e3-bbec-4fbb-8023-4d19abd71905,Starting,False,,,,,,...,2023-07-09 15:00:00,2023_IK Brage - Örgryte IS Fotboll - Matchfak...,2023,7,0.0,91,91.0,3.0,2.967033,1.4
252,IK Brage,Alexander Zetterström,df1d3fea-0ec3-4d44-8a5c-0a2edd806a1f,Starting,False,,,,,,...,2023-07-09 15:00:00,2023_IK Brage - Örgryte IS Fotboll - Matchfak...,2023,7,0.0,91,91.0,3.0,2.967033,0.0
253,IK Brage,Pontus Rödin,37739c10-f31c-4673-9878-b0c3683bd50b,Starting,False,,,,,,...,2023-07-09 15:00:00,2023_IK Brage - Örgryte IS Fotboll - Matchfak...,2023,7,0.0,91,91.0,3.0,2.967033,0.5
254,IK Brage,Malte Persson,5126b059-0ce7-45c3-beac-06026b1bcd5b,Starting,True,out,74:35,74.0,,,...,2023-07-09 15:00:00,2023_IK Brage - Örgryte IS Fotboll - Matchfak...,2023,7,0.0,74,74.0,1.0,1.216216,0.0
255,IK Brage,Pontus Jonsson,c2d33e21-d2f9-447a-98c8-6bc7815073ab,Starting,True,out,42:44,42.0,,,...,2023-07-09 15:00:00,2023_IK Brage - Örgryte IS Fotboll - Matchfak...,2023,7,0.0,42,42.0,1.0,2.142857,0.7
256,IK Brage,Henry Sletsjøe,5a1fafea-0a01-4e28-a48c-82fe2f287b1e,Starting,False,,,,,,...,2023-07-09 15:00:00,2023_IK Brage - Örgryte IS Fotboll - Matchfak...,2023,7,0.0,91,91.0,3.0,2.967033,1.0
257,IK Brage,Gustav Berggren,33630ccf-413a-4dc1-bfc6-2706e69eb96f,Starting,False,,,,,,...,2023-07-09 15:00:00,2023_IK Brage - Örgryte IS Fotboll - Matchfak...,2023,7,0.0,91,91.0,3.0,2.967033,0.0
258,IK Brage,Ieltsin Jeronimo Semedo Camoes,cc1e66df-a0de-41eb-9439-061ff7d3285c,Starting,True,out,58:38,58.0,,,...,2023-07-09 15:00:00,2023_IK Brage - Örgryte IS Fotboll - Matchfak...,2023,7,0.0,58,58.0,2.0,3.103448,1.0
259,IK Brage,Seth Hellberg,2c471408-06a8-4f71-a397-75f94dad2121,Starting,False,,,,,,...,2023-07-09 15:00:00,2023_IK Brage - Örgryte IS Fotboll - Matchfak...,2023,7,0.0,24,24.0,0.0,0.0,-1.0


In [141]:
players_df["matches_played"] = players_df["minutes_played"] > 0
players_df

Unnamed: 0,team,name,fplguid,player_type,was_substituted,sub_direction,sub_minute,sub_minute_int,sub_in_minute,sub_in_minute_int,...,match_id,year,month,start_minute,end_minute,minutes_played,plus_minus_raw,plus_minus_per90,plus_minus_impact,matches_played
0,Helsingborgs IF,Johan Brattberg (MV),cc256365-a139-46a0-88e1-4e5c2fe15dfe,Starting,False,,,,,,...,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8,0.0,90,90.0,3.0,3.000,0.0,True
1,Helsingborgs IF,Benjamin Örn,aa942ed4-3f35-46d8-af83-3bc117c9bc8d,Starting,False,,,,,,...,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8,0.0,90,90.0,3.0,3.000,0.0,True
2,Helsingborgs IF,Jon Birkfeldt,fbd3c879-1be4-4d66-b566-f1623db692d7,Starting,False,,,,,,...,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8,0.0,90,90.0,3.0,3.000,-0.5,True
3,Helsingborgs IF,Wilhelm Nilsson,e37747df-5f51-4db7-8624-951e2f80edc9,Starting,False,,,,,,...,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8,0.0,90,90.0,3.0,3.000,0.7,True
4,Helsingborgs IF,Simon Bengtsson,5dc6220e-c444-412f-a3cd-e6f404b9113e,Starting,False,,,,,,...,2025_Helsingborgs IF - Utsikten - Matchfakta -...,2025,8,0.0,90,90.0,3.0,3.000,0.0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8614,Utsikten,Mass Modou Sise,80ab783a-484d-4c1e-b70e-58af1082454c,Sub,True,in,80,80.0,80,80.0,...,2025_Umeå FC - Utsikten - Matchfakta - Svensk...,2025,5,80.0,96,16.0,1.0,5.625,1.0,True
8615,Utsikten,Amadou-David Sanyang,86a2ca09-236f-4566-a817-6919f9032834,Sub,False,,,,,,...,2025_Umeå FC - Utsikten - Matchfakta - Svensk...,2025,5,,96,,,,,False
8616,Utsikten,Enzo Andrén,830a7b4a-b0de-4393-b212-35b067efe827,Sub,False,,,,,,...,2025_Umeå FC - Utsikten - Matchfakta - Svensk...,2025,5,,96,,,,,False
8617,Utsikten,Johannes Selvén,e64bb19b-fefb-481a-8644-6e446e718cfb,Sub,True,in,46,46.0,46,46.0,...,2025_Umeå FC - Utsikten - Matchfakta - Svensk...,2025,5,46.0,96,50.0,1.0,1.800,0.0,True


In [142]:
def calculate_on_off(players_df, goals_df):

    df = players_df.copy()

    on = []
    off = []

    for _, p in df.iterrows():

        minutes = p["minutes_played"]

        if pd.isna(minutes) or minutes <= 0:
            on.append(np.nan)
            off.append(np.nan)
            continue

        match = p["match_id"]
        team = p["team"]
        start = p["start_minute"]
        end = p["end_minute"]

        if pd.isna(start) or pd.isna(end):
            on.append(np.nan)
            off.append(np.nan)
            continue

        gmatch = goals_df[goals_df["match_id"] == match]

        on_gd = 0
        off_gd = 0

        for _, g in gmatch.iterrows():

            m = g["minute_int"]

            scorer_team = df[
                (df["match_id"] == match) &
                (df["name"] == g["scorer"])
            ]["team"].values

            if len(scorer_team) == 0:
                continue

            val = 1 if scorer_team[0] == team else -1

            # ✅ INKLUSIV ENDPOINT
            if start <= m < end:
                on_gd += val
            else:
                off_gd += val

        on.append(on_gd)
        off.append(off_gd)

    df["on_goal_diff"] = on
    df["off_goal_diff"] = off
    df["on_off_diff"] = df["on_goal_diff"] - df["off_goal_diff"]

    return df


In [143]:
def calculate_gf_ga(players_df, goals_df):

    df = players_df.copy()

    gf = []
    ga = []

    for _, p in df.iterrows():

        minutes = p["minutes_played"]

        if pd.isna(minutes) or minutes <= 0:
            gf.append(np.nan)
            ga.append(np.nan)
            continue

        match = p["match_id"]
        team = p["team"]
        start = p["start_minute"]
        end = p["end_minute"]

        if pd.isna(start) or pd.isna(end):
            gf.append(np.nan)
            ga.append(np.nan)
            continue

        gmatch = goals_df[goals_df["match_id"] == match]

        gf_i = 0
        ga_i = 0

        for _, g in gmatch.iterrows():

            m = g["minute_int"]

            # ✅ INKLUSIV ENDPOINT
            if not (start <= m < end):
                continue

            scorer_team = df[
                (df["match_id"] == match) &
                (df["name"] == g["scorer"])
            ]["team"].values

            if len(scorer_team) == 0:
                continue

            if scorer_team[0] == team:
                gf_i += 1
            else:
                ga_i += 1

        gf.append(gf_i)
        ga.append(ga_i)

    df["gf_on"] = gf
    df["ga_on"] = ga
    df["gf90"] = df["gf_on"] / df["minutes_played"] * 90
    df["ga90"] = df["ga_on"] / df["minutes_played"] * 90

    return df


In [144]:
def calculate_clutch(players_df, goals_df, clutch_from_minute=75):

    df = players_df.copy()
    clutch = []

    for _, p in df.iterrows():

        minutes = p.get("minutes_played", np.nan)

        # ✅ Om spelaren inte spelade → NaN
        if pd.isna(minutes) or minutes <= 0:
            clutch.append(np.nan)
            continue

        match = p["match_id"]
        start = p["start_minute"]
        end = p["end_minute"]

        # extra safety
        if pd.isna(start) or pd.isna(end):
            clutch.append(np.nan)
            continue

        gmatch = goals_df[
            (goals_df["match_id"] == match) &
            (goals_df["minute_int"] >= clutch_from_minute)
        ]

        cnt = 0

        for _, g in gmatch.iterrows():

            m = g["minute_int"]

            if not (start <= m < end):
                continue

            if g["scorer_fplguid"] == p.get("fplguid"):
                cnt += 1

        clutch.append(cnt)

    df["clutch_goals"] = clutch
    return df



In [145]:
def calculate_clean_sheets(players_df, goals_df):

    df = players_df.copy()
    cs = []

    for _, p in df.iterrows():

        # Om spelaren inte spelade → NaN
        if pd.isna(p["minutes_played"]) or p["minutes_played"] <= 0:
            cs.append(np.nan)
            continue

        match = p["match_id"]
        team = p["team"]
        start = p["start_minute"]
        end = p["end_minute"]

        if pd.isna(start) or pd.isna(end):
            cs.append(np.nan)
            continue

        gmatch = goals_df[goals_df["match_id"] == match]

        conceded = 0

        for _, g in gmatch.iterrows():

            m = g["minute_int"]

            # ✅ endast mål medan spelaren är kvar på planen
            if not (start <= m < end):
                continue

            scorer_team = df[
                (df["match_id"] == match) &
                (df["name"] == g["scorer"])
            ]["team"].values

            if len(scorer_team) == 0:
                continue

            if scorer_team[0] != team:
                conceded += 1

        cs.append(1 if conceded == 0 else 0)

    df["clean_sheet"] = cs
    return df


In [146]:
players_df = calculate_on_off(players_df, goals_df)
players_df = calculate_gf_ga(players_df, goals_df)
players_df = calculate_clutch(players_df, goals_df)
players_df = calculate_clean_sheets(players_df, goals_df)
players_df


Unnamed: 0,team,name,fplguid,player_type,was_substituted,sub_direction,sub_minute,sub_minute_int,sub_in_minute,sub_in_minute_int,...,matches_played,on_goal_diff,off_goal_diff,on_off_diff,gf_on,ga_on,gf90,ga90,clutch_goals,clean_sheet
0,Helsingborgs IF,Johan Brattberg (MV),cc256365-a139-46a0-88e1-4e5c2fe15dfe,Starting,False,,,,,,...,True,3.0,0.0,3.0,3.0,0.0,3.00,0.000,0.0,1.0
1,Helsingborgs IF,Benjamin Örn,aa942ed4-3f35-46d8-af83-3bc117c9bc8d,Starting,False,,,,,,...,True,3.0,0.0,3.0,3.0,0.0,3.00,0.000,0.0,1.0
2,Helsingborgs IF,Jon Birkfeldt,fbd3c879-1be4-4d66-b566-f1623db692d7,Starting,False,,,,,,...,True,3.0,0.0,3.0,3.0,0.0,3.00,0.000,0.0,1.0
3,Helsingborgs IF,Wilhelm Nilsson,e37747df-5f51-4db7-8624-951e2f80edc9,Starting,False,,,,,,...,True,3.0,0.0,3.0,3.0,0.0,3.00,0.000,0.0,1.0
4,Helsingborgs IF,Simon Bengtsson,5dc6220e-c444-412f-a3cd-e6f404b9113e,Starting,False,,,,,,...,True,3.0,0.0,3.0,3.0,0.0,3.00,0.000,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8614,Utsikten,Mass Modou Sise,80ab783a-484d-4c1e-b70e-58af1082454c,Sub,True,in,80,80.0,80,80.0,...,True,1.0,1.0,0.0,2.0,1.0,11.25,5.625,1.0,0.0
8615,Utsikten,Amadou-David Sanyang,86a2ca09-236f-4566-a817-6919f9032834,Sub,False,,,,,,...,False,,,,,,,,,
8616,Utsikten,Enzo Andrén,830a7b4a-b0de-4393-b212-35b067efe827,Sub,False,,,,,,...,False,,,,,,,,,
8617,Utsikten,Johannes Selvén,e64bb19b-fefb-481a-8644-6e446e718cfb,Sub,True,in,46,46.0,46,46.0,...,True,1.0,1.0,0.0,2.0,1.0,3.60,1.800,0.0,0.0


In [147]:
players_df["match_id_short"] = players_df["match_id"].str.split(" - Matchfakta", expand = True)[0]
players_df["impact90"] = players_df["plus_minus_impact"] / (players_df["minutes_played"] * 90)
players_df

Unnamed: 0,team,name,fplguid,player_type,was_substituted,sub_direction,sub_minute,sub_minute_int,sub_in_minute,sub_in_minute_int,...,off_goal_diff,on_off_diff,gf_on,ga_on,gf90,ga90,clutch_goals,clean_sheet,match_id_short,impact90
0,Helsingborgs IF,Johan Brattberg (MV),cc256365-a139-46a0-88e1-4e5c2fe15dfe,Starting,False,,,,,,...,0.0,3.0,3.0,0.0,3.00,0.000,0.0,1.0,2025_Helsingborgs IF - Utsikten,0.000000
1,Helsingborgs IF,Benjamin Örn,aa942ed4-3f35-46d8-af83-3bc117c9bc8d,Starting,False,,,,,,...,0.0,3.0,3.0,0.0,3.00,0.000,0.0,1.0,2025_Helsingborgs IF - Utsikten,0.000000
2,Helsingborgs IF,Jon Birkfeldt,fbd3c879-1be4-4d66-b566-f1623db692d7,Starting,False,,,,,,...,0.0,3.0,3.0,0.0,3.00,0.000,0.0,1.0,2025_Helsingborgs IF - Utsikten,-0.000062
3,Helsingborgs IF,Wilhelm Nilsson,e37747df-5f51-4db7-8624-951e2f80edc9,Starting,False,,,,,,...,0.0,3.0,3.0,0.0,3.00,0.000,0.0,1.0,2025_Helsingborgs IF - Utsikten,0.000086
4,Helsingborgs IF,Simon Bengtsson,5dc6220e-c444-412f-a3cd-e6f404b9113e,Starting,False,,,,,,...,0.0,3.0,3.0,0.0,3.00,0.000,0.0,1.0,2025_Helsingborgs IF - Utsikten,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8614,Utsikten,Mass Modou Sise,80ab783a-484d-4c1e-b70e-58af1082454c,Sub,True,in,80,80.0,80,80.0,...,1.0,0.0,2.0,1.0,11.25,5.625,1.0,0.0,2025_Umeå FC - Utsikten,0.000694
8615,Utsikten,Amadou-David Sanyang,86a2ca09-236f-4566-a817-6919f9032834,Sub,False,,,,,,...,,,,,,,,,2025_Umeå FC - Utsikten,
8616,Utsikten,Enzo Andrén,830a7b4a-b0de-4393-b212-35b067efe827,Sub,False,,,,,,...,,,,,,,,,2025_Umeå FC - Utsikten,
8617,Utsikten,Johannes Selvén,e64bb19b-fefb-481a-8644-6e446e718cfb,Sub,True,in,46,46.0,46,46.0,...,1.0,0.0,2.0,1.0,3.60,1.800,0.0,0.0,2025_Umeå FC - Utsikten,0.000000


## Merge value with player data

In [151]:
players_to_scrape = (
    # players_df[players_df["team"] == "IK Brage"]["name"]
    players_df["name"]
        .str.replace("(MV)", "", regex=False)
        .str.strip()
        .str.lower()
        .str.replace(" ", "+")
        .unique()
        .tolist()
)

base_url = "https://www.transfermarkt.com/schnellsuche/ergebnis/schnellsuche?query="

HEADERS = {"User-Agent": "Mozilla/5.0"}

results = []

def parse_player_row(row):
    data = {}

    name_tag = row.select_one("td.hauptlink a")
    data["name"] = name_tag.text.strip() if name_tag else None
    data["profile_url"] = (
        "https://www.transfermarkt.com" + name_tag["href"]
        if name_tag and name_tag.has_attr("href") else None
    )

    club_tag = row.select_one("table.inline-table tr:nth-of-type(2) a")
    data["club"] = club_tag.text.strip() if club_tag else None

    zentriert = row.find_all("td", class_="zentriert")
    data["position"] = zentriert[0].text.strip() if len(zentriert) > 0 else None
    data["age"] = zentriert[2].text.strip() if len(zentriert) > 2 else None

    mv_tag = row.select_one("td.rechts.hauptlink")
    data["market_value"] = mv_tag.text.strip() if mv_tag else None

    agent_tag = row.select_one("td.rechts a")
    data["agent"] = agent_tag.text.strip() if agent_tag else None

    return data


for i, player in enumerate(players_to_scrape, start=1):
    print(f"Processing player {i}/{len(players_to_scrape)}: {player}")
    search_url = base_url + player

    try:
        response = requests.get(search_url, headers=HEADERS)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")

        table = soup.find("table", class_="items")
        if not table:
            print("No results table found")
            continue

        rows = table.find_all("tr", class_=["odd", "even"])
        if not rows:
            print("No player rows found")
            continue

        for row in rows:
            player_data = parse_player_row(row)
            player_data["player_query"] = player
            results.append(player_data)

    except Exception as e:
        results.append({
            "player_query": player,
            "error": str(e)
        })

    time.sleep(2)

df_results = pd.DataFrame(results)


Processing player 1/465: johan+brattberg
Processing player 2/465: benjamin+örn
Processing player 3/465: jon+birkfeldt
Processing player 4/465: wilhelm+nilsson
Processing player 5/465: simon+bengtsson
Processing player 6/465: wilhelm+loeper
Processing player 7/465: ervin+gigovic
Processing player 8/465: lukas+kjellnäs
Processing player 9/465: adam+akimey
Processing player 10/465: alexander+johansson
Processing player 11/465: casper+ljung+hofvendahl
Processing player 12/465: emil+rådahl
Processing player 13/465: marcus+gudmann
Processing player 14/465: max+svensson
Processing player 15/465: baker+amer
Processing player 16/465: william+westerlund
Processing player 17/465: oscar+aga
Processing player 18/465: alvin+nordin
Processing player 19/465: tom+amos
Processing player 20/465: david+tokpah
Processing player 21/465: sebastian+lagerlund
Processing player 22/465: liiban+abdirahman+abadid
Processing player 23/465: malkolm+moenza
Processing player 24/465: alvin+karlsson
Processing player 25

In [152]:
df_results

Unnamed: 0,name,profile_url,club,position,age,market_value,agent,player_query
0,Johan Brattberg,https://www.transfermarkt.com/johan-brattberg/...,Helsingborgs IF,GK,29,€350k,Global Soccer Management,johan+brattberg
1,Benjamin Örn,https://www.transfermarkt.com/benjamin-orn/pro...,Randers FC,RB,21,€300k,Global Soccer Management,benjamin+örn
2,Jon Birkfeldt,https://www.transfermarkt.com/jon-birkfeldt/pr...,,CB,29,-,,jon+birkfeldt
3,Wilhelm Nilsson,https://www.transfermarkt.com/wilhelm-nilsson/...,Boldklubben af 1893,CB,28,€350k,MD Management,wilhelm+nilsson
4,Simon Bengtsson,https://www.transfermarkt.com/simon-bengtsson/...,Helsingborgs IF,LB,21,€200k,Global Soccer Management,simon+bengtsson
...,...,...,...,...,...,...,...,...
609,William Blix,https://www.transfermarkt.com/william-blix/pro...,Utsiktens BK U19,CB,17,-,,william+blix
610,Andreas Murbeck,https://www.transfermarkt.com/andreas-murbeck/...,Landskrona BoIS,CB,27,€125k,FM11,andreas+murbeck
611,Hassan Abdi Hassan,https://www.transfermarkt.com/hassan-abdi-hass...,Umeå FC,RB,21,-,HSA,hassan+abdi+hassan
612,Mohammed Sadat Abubakari,https://www.transfermarkt.com/mohammed-sadat-a...,Sandvikens IF,LB,26,€150k,HFC-Hense Football Consult,mohammed+sadat+abubakari


In [154]:
# Conversion function
import pandas as pd
import numpy as np

def convert_market_value(value):

    # ---- Saknade värden ----
    if value is None or pd.isna(value) or value == "-":
        return np.nan

    # ---- Redan numerisk ----
    if isinstance(value, (int, float)):
        return float(value)

    # ---- String cleanup ----
    value = (
        str(value)
        .replace("€", "")
        .replace(",", "")
        .strip()
        .lower()
    )

    # ---- Enheter ----
    if value.endswith("m"):
        return float(value[:-1]) * 1_000_000
    elif value.endswith("k"):
        return float(value[:-1]) * 1_000
    else:
        return float(value)

df_results["market_value"] = df_results["market_value"].apply(convert_market_value)
df_results

Unnamed: 0,name,profile_url,club,position,age,market_value,agent,player_query
0,Johan Brattberg,https://www.transfermarkt.com/johan-brattberg/...,Helsingborgs IF,GK,29,350000.0,Global Soccer Management,johan+brattberg
1,Benjamin Örn,https://www.transfermarkt.com/benjamin-orn/pro...,Randers FC,RB,21,300000.0,Global Soccer Management,benjamin+örn
2,Jon Birkfeldt,https://www.transfermarkt.com/jon-birkfeldt/pr...,,CB,29,,,jon+birkfeldt
3,Wilhelm Nilsson,https://www.transfermarkt.com/wilhelm-nilsson/...,Boldklubben af 1893,CB,28,350000.0,MD Management,wilhelm+nilsson
4,Simon Bengtsson,https://www.transfermarkt.com/simon-bengtsson/...,Helsingborgs IF,LB,21,200000.0,Global Soccer Management,simon+bengtsson
...,...,...,...,...,...,...,...,...
609,William Blix,https://www.transfermarkt.com/william-blix/pro...,Utsiktens BK U19,CB,17,,,william+blix
610,Andreas Murbeck,https://www.transfermarkt.com/andreas-murbeck/...,Landskrona BoIS,CB,27,125000.0,FM11,andreas+murbeck
611,Hassan Abdi Hassan,https://www.transfermarkt.com/hassan-abdi-hass...,Umeå FC,RB,21,,HSA,hassan+abdi+hassan
612,Mohammed Sadat Abubakari,https://www.transfermarkt.com/mohammed-sadat-a...,Sandvikens IF,LB,26,150000.0,HFC-Hense Football Consult,mohammed+sadat+abubakari


In [155]:
def scrape_height_from_profile(profile_url):
    try:
        response = requests.get(profile_url, headers=HEADERS)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")

        span = soup.select_one('span[itemprop="height"]')
        return span.get_text(strip=True) if span else None

    except Exception:
        return None



df_results["height"] = None

for idx, row in df_results.iterrows():
    profile_url = row.get("profile_url")

    if pd.isna(profile_url):
        continue

    print(f"Scraping height for {row['name']}")

    height = scrape_height_from_profile(profile_url)
    df_results.at[idx, "height"] = height

    time.sleep(2)


Scraping height for Johan Brattberg
Scraping height for Benjamin Örn
Scraping height for Jon Birkfeldt
Scraping height for Wilhelm Nilsson
Scraping height for Simon Bengtsson
Scraping height for Wilhelm Loeper
Scraping height for Ervin Gigović
Scraping height for Lukas Kjellnäs
Scraping height for Adam Akimey
Scraping height for Alexander Johansson
Scraping height for Alexander Johansson
Scraping height for Alexander Johansson
Scraping height for Alexander Johansson
Scraping height for Alexander Johansson Lemon
Scraping height for Casper Ljung Hofvendahl
Scraping height for Emil Rådahl
Scraping height for Marcus Gudmann
Scraping height for Max Svensson
Scraping height for Max Svensson
Scraping height for Baker Amer
Scraping height for William Westerlund
Scraping height for Oscar Aga
Scraping height for Oscar Agaba
Scraping height for Alvin Nordin
Scraping height for Tom Amos
Scraping height for David Tokpah
Scraping height for Sebastian Lagerlund
Scraping height for Liiban Abdirahman A

In [156]:
players_df["name_clean"] = players_df["name"].str.replace(r"\s*\(MV\)", "", regex=True)


In [157]:

def match_tm_data(
    players_df,
    df_tm,
    name_threshold=85,
    club_threshold=80
):
    rows = []

    for _, p in players_df.iterrows():
        name = p["name_clean"]
        team = p["team"]

        # ---------- 1) Exakt namn + exakt klubb ----------
        exact = df_tm[
            (df_tm["name"] == name) &
            (df_tm["club"] == team)
        ]

        if len(exact) == 1:
            tm = exact.iloc[0]
            rows.append({
                **p.to_dict(),
                **tm.to_dict(),
                "tm_match_type": "exact_name_exact_club",
                "tm_match_score": 100
            })
            continue

        # ---------- 2) Exakt namn + fuzzy klubb ----------
        exact_name = df_tm[df_tm["name"] == name]

        best_club_score = 0
        best_tm = None

        for _, tm in exact_name.iterrows():
            club_score = fuzz.token_sort_ratio(team, tm["club"])
            if club_score > best_club_score:
                best_club_score = club_score
                best_tm = tm

        if best_club_score >= club_threshold:
            rows.append({
                **p.to_dict(),
                **best_tm.to_dict(),
                "tm_match_type": "exact_name_fuzzy_club",
                "tm_match_score": best_club_score
            })
            continue

        # ---------- 3) Fuzzy namn + exakt klubb ----------
        same_club = df_tm[df_tm["club"] == team]

        best_name_score = 0
        best_tm = None

        for _, tm in same_club.iterrows():
            name_score = fuzz.token_sort_ratio(name, tm["name"])
            if name_score > best_name_score:
                best_name_score = name_score
                best_tm = tm

        if best_name_score >= name_threshold:
            rows.append({
                **p.to_dict(),
                **best_tm.to_dict(),
                "tm_match_type": "fuzzy_name_exact_club",
                "tm_match_score": best_name_score
            })
            continue

        # ---------- 4) Fuzzy namn + fuzzy klubb ----------
        best_combo_score = 0
        best_tm = None

        for _, tm in df_tm.iterrows():
            name_score = fuzz.token_sort_ratio(name, tm["name"])
            club_score = fuzz.token_sort_ratio(team, tm["club"])

            combo_score = 0.7 * name_score + 0.3 * club_score

            if combo_score > best_combo_score:
                best_combo_score = combo_score
                best_tm = tm

        if (
            best_tm is not None and
            name_score >= name_threshold and
            club_score >= club_threshold
        ):
            rows.append({
                **p.to_dict(),
                **best_tm.to_dict(),
                "tm_match_type": "fuzzy_name_fuzzy_club",
                "tm_match_score": round(best_combo_score, 1)
            })
            continue


        # ---------- 5) Exakt namn (ignorera klubb) ----------
        exact_name_only = df_tm[df_tm["name"] == name]

        if len(exact_name_only) == 1:
            tm = exact_name_only.iloc[0]
            rows.append({
                **p.to_dict(),
                **tm.to_dict(),
                "tm_match_type": "exact_name_only",
                "tm_match_score": 90
            })
            continue

        # ---------- 6) No match ----------
        rows.append({
            **p.to_dict(),
            "tm_match_type": "no_match",
            "tm_match_score": None
        })


    return pd.DataFrame(rows)


In [158]:
players_enriched = match_tm_data(
    players_df=players_df,
    df_tm=df_results,
    name_threshold=85,
    club_threshold=40
)

players_enriched

Unnamed: 0,team,name,fplguid,player_type,was_substituted,sub_direction,sub_minute,sub_minute_int,sub_in_minute,sub_in_minute_int,...,profile_url,club,position,age,market_value,agent,player_query,height,tm_match_type,tm_match_score
0,Helsingborgs IF,Johan Brattberg,cc256365-a139-46a0-88e1-4e5c2fe15dfe,Starting,False,,,,,,...,https://www.transfermarkt.com/johan-brattberg/...,Helsingborgs IF,GK,29,350000.0,Global Soccer Management,johan+brattberg,"2,00 m",exact_name_exact_club,100.0
1,Helsingborgs IF,Benjamin Örn,aa942ed4-3f35-46d8-af83-3bc117c9bc8d,Starting,False,,,,,,...,https://www.transfermarkt.com/benjamin-orn/pro...,Randers FC,RB,21,300000.0,Global Soccer Management,benjamin+örn,"1,88 m",exact_name_only,90.0
2,Helsingborgs IF,Jon Birkfeldt,fbd3c879-1be4-4d66-b566-f1623db692d7,Starting,False,,,,,,...,https://www.transfermarkt.com/jon-birkfeldt/pr...,,CB,29,,,jon+birkfeldt,"1,83 m",exact_name_only,90.0
3,Helsingborgs IF,Wilhelm Nilsson,e37747df-5f51-4db7-8624-951e2f80edc9,Starting,False,,,,,,...,https://www.transfermarkt.com/wilhelm-nilsson/...,Boldklubben af 1893,CB,28,350000.0,MD Management,wilhelm+nilsson,"1,83 m",exact_name_only,90.0
4,Helsingborgs IF,Simon Bengtsson,5dc6220e-c444-412f-a3cd-e6f404b9113e,Starting,False,,,,,,...,https://www.transfermarkt.com/simon-bengtsson/...,Helsingborgs IF,LB,21,200000.0,Global Soccer Management,simon+bengtsson,"1,77 m",exact_name_exact_club,100.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8614,Utsikten,Mass Modou Sise,80ab783a-484d-4c1e-b70e-58af1082454c,Sub,True,in,80,80.0,80,80.0,...,,,,,,,,,no_match,
8615,Utsikten,Amadou-David Sanyang,86a2ca09-236f-4566-a817-6919f9032834,Sub,False,,,,,,...,https://www.transfermarkt.com/amadou-david-san...,Utsiktens BK,AM,21,25000.0,,amadou-david+sanyang,,exact_name_fuzzy_club,80.0
8616,Utsikten,Enzo Andrén,830a7b4a-b0de-4393-b212-35b067efe827,Sub,False,,,,,,...,https://www.transfermarkt.com/enzo-andren/prof...,Utsiktens BK,AM,20,25000.0,ICON MGMT,enzo+andrén,"1,75 m",exact_name_fuzzy_club,80.0
8617,Utsikten,Johannes Selvén,e64bb19b-fefb-481a-8644-6e446e718cfb,Sub,True,in,46,46.0,46,46.0,...,https://www.transfermarkt.com/johannes-selven/...,Vestri Ísafjördur,RW,22,150000.0,,johannes+selvén,"1,75 m",exact_name_only,90.0


In [159]:
players_enriched["height"] = (players_enriched["height"].str.replace(" m", "").str.replace(",", ".").astype(float)) * 100
players_enriched["year"] = players_enriched["year"].astype(str)

In [160]:
players_enriched['impact_combo90'] = (
    (players_enriched['goals'] * 1.0 + players_enriched['assists'] * 0.7 +
     players_enriched['clean_sheet'] * 0.5 + players_enriched['plus_minus_raw'] * 0.5) /
     (players_enriched['minutes_played'] / 90)
)

players_enriched

Unnamed: 0,team,name,fplguid,player_type,was_substituted,sub_direction,sub_minute,sub_minute_int,sub_in_minute,sub_in_minute_int,...,club,position,age,market_value,agent,player_query,height,tm_match_type,tm_match_score,impact_combo90
0,Helsingborgs IF,Johan Brattberg,cc256365-a139-46a0-88e1-4e5c2fe15dfe,Starting,False,,,,,,...,Helsingborgs IF,GK,29,350000.0,Global Soccer Management,johan+brattberg,200.0,exact_name_exact_club,100.0,2.0000
1,Helsingborgs IF,Benjamin Örn,aa942ed4-3f35-46d8-af83-3bc117c9bc8d,Starting,False,,,,,,...,Randers FC,RB,21,300000.0,Global Soccer Management,benjamin+örn,188.0,exact_name_only,90.0,2.0000
2,Helsingborgs IF,Jon Birkfeldt,fbd3c879-1be4-4d66-b566-f1623db692d7,Starting,False,,,,,,...,,CB,29,,,jon+birkfeldt,183.0,exact_name_only,90.0,2.0000
3,Helsingborgs IF,Wilhelm Nilsson,e37747df-5f51-4db7-8624-951e2f80edc9,Starting,False,,,,,,...,Boldklubben af 1893,CB,28,350000.0,MD Management,wilhelm+nilsson,183.0,exact_name_only,90.0,2.7000
4,Helsingborgs IF,Simon Bengtsson,5dc6220e-c444-412f-a3cd-e6f404b9113e,Starting,False,,,,,,...,Helsingborgs IF,LB,21,200000.0,Global Soccer Management,simon+bengtsson,177.0,exact_name_exact_club,100.0,2.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8614,Utsikten,Mass Modou Sise,80ab783a-484d-4c1e-b70e-58af1082454c,Sub,True,in,80,80.0,80,80.0,...,,,,,,,,no_match,,8.4375
8615,Utsikten,Amadou-David Sanyang,86a2ca09-236f-4566-a817-6919f9032834,Sub,False,,,,,,...,Utsiktens BK,AM,21,25000.0,,amadou-david+sanyang,,exact_name_fuzzy_club,80.0,
8616,Utsikten,Enzo Andrén,830a7b4a-b0de-4393-b212-35b067efe827,Sub,False,,,,,,...,Utsiktens BK,AM,20,25000.0,ICON MGMT,enzo+andrén,175.0,exact_name_fuzzy_club,80.0,
8617,Utsikten,Johannes Selvén,e64bb19b-fefb-481a-8644-6e446e718cfb,Sub,True,in,46,46.0,46,46.0,...,Vestri Ísafjördur,RW,22,150000.0,,johannes+selvén,175.0,exact_name_only,90.0,0.9000


In [166]:
players_enriched["age"] = players_enriched["age"].astype("Int64")

In [168]:
players_enriched.dtypes

team                            object
name                            object
fplguid                         object
player_type                     object
was_substituted                   bool
sub_direction                   object
sub_minute                      object
sub_minute_int                 float64
sub_in_minute                   object
sub_in_minute_int              float64
sub_out_minute                  object
sub_out_minute_int             float64
goals                            int64
goal_minutes                    object
assists                          int64
assists_minutes                 object
yellow_card                       bool
red_card                          bool
red_card_minute                float64
yellow_card_minutes             object
match_length                     int64
match_start_datetime    datetime64[ns]
match_id                        object
year                            object
month                            int64
start_minute             

In [169]:
summary_stats = (
    players_enriched
    # [players_enriched["team"] == "IK Brage"]
    .groupby(["name"])
    .agg(
        # -------- IDENTITET --------
        team = ("team", "max"),
        position=("position", "first"),
        ålder = ("age", "max"),
        längd=("height", "max"),
        market_value=("market_value", "max"),

        # -------- AVAILABILITY --------
        matcher=("matches_played", "sum"),
        minuter=("minutes_played", "sum"),
        starter=("player_type", lambda x: (x == "Starting").sum()),

        # -------- PRODUCTION (OFF) --------
        mål=("goals", "sum"),
        assists=("assists", "sum"),
        gf_on=("gf_on", "sum"),

        # -------- PRODUCTION (DEF) --------
        ga_on=("ga_on", "sum"),
        clean_sheets=("clean_sheet", "sum"),

        # -------- IMPACT --------
        plus_minus_total=("plus_minus_raw", "sum"),
        on_off_total=("on_off_diff", "sum"),
        plus_minus_impact_total=("plus_minus_impact", "sum"),

        # -------- DISCIPLINE --------
        yellow_cards=("yellow_card", "sum"),
        red_cards=("red_card", "sum"),
    )
    .reset_index()
    .round(3)
)

summary_stats["usage_rate"] = summary_stats["minuter"] / (summary_stats["matcher"] * 93)
summary_stats["starter_rate"] = summary_stats["starter"] / summary_stats["matcher"]

summary_stats["poäng_p90"] = ((summary_stats["mål"] + summary_stats["assists"]) / 90).round(2)

summary_stats["gf90"] = summary_stats["gf_on"] / summary_stats["minuter"] * 90
summary_stats["ga90"] = summary_stats["ga_on"] / summary_stats["minuter"] * 90
summary_stats["pm90"] = summary_stats["plus_minus_total"] / summary_stats["minuter"] * 90
summary_stats["impact90"] = summary_stats["plus_minus_impact_total"] / summary_stats["minuter"] * 90
summary_stats["clean_sheet_rate"] = summary_stats["clean_sheets"] / summary_stats["matcher"]

summary_stats["availability_index"] = (
    0.5 * summary_stats["usage_rate"] +
    0.5 * summary_stats["starter_rate"]
)

summary_stats["def_reliability"] = (
    (1 - summary_stats["ga90"].rank(pct=True)) * 0.6 +
    summary_stats["clean_sheet_rate"].rank(pct=True) * 0.4
)

summary_stats["risk_measure"] = (summary_stats["yellow_cards"] + (2 * summary_stats["red_cards"])) / (summary_stats["minuter"]*90)

summary_stats['impact_combo90'] = (
    (summary_stats['mål'] * 1.0 + summary_stats['assists'] * 0.7 +
     summary_stats['clean_sheets'] * 0.5 + summary_stats['plus_minus_total'] * 0.5) /
     (summary_stats['minuter'] / 90)
)

summary_stats = summary_stats.round(5)
summary_stats

Unnamed: 0,name,team,position,ålder,längd,market_value,matcher,minuter,starter,mål,...,poäng_p90,gf90,ga90,pm90,impact90,clean_sheet_rate,availability_index,def_reliability,risk_measure,impact_combo90
0,Aaron Le Forestier Banind Bibout,Västerås SK FK,,,,,12,1030.0,11,6,...,0.07,1.48544,1.57282,-0.08738,0.43689,0.25000,0.91980,0.30184,0.00002,0.61165
1,Abbe Rehn,Trelleborgs FF,CB,21,178.0,,3,39.0,0,0,...,0.00,0.00000,2.30769,-2.30769,0.00000,0.66667,0.06989,0.37811,0.00000,1.15385
2,Abdi Sabriye,Kalmar FF,LW,23,,100000.0,17,817.0,7,1,...,0.03,1.21175,0.66095,0.55080,0.20930,0.70588,0.46426,0.86912,0.00001,1.20073
3,Abdul Rashid Adam,Utsikten,Attack,19,,,0,0.0,0,0,...,0.00,,,,,,,,,
4,Abdulahi Shino,GIF Sundsvall,Attack,20,,,3,59.0,0,0,...,0.00,1.52542,0.00000,1.52542,0.00000,1.00000,0.10573,0.96521,0.00000,3.05085
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
462,York Rafael,IK Oddevold,LM,26,177.0,150000.0,12,674.0,9,0,...,0.01,0.53412,1.06825,-0.53412,0.02671,0.66667,0.67697,0.76175,0.00002,0.36053
463,Zakaria Loukili,Varbergs BoIS FC,AM,20,180.0,600000.0,10,552.0,6,1,...,0.02,1.46739,1.30435,0.16304,0.19565,0.40000,0.59677,0.50046,0.00002,0.68478
464,Zakarias Råvik,Kalmar FF,CB,19,,50000.0,2,150.0,2,0,...,0.00,0.60000,0.00000,0.60000,-0.30000,1.00000,0.90323,0.96521,0.00007,0.90000
465,Zean Peetz Dalügge,Trelleborgs FF,,,,,13,1067.0,13,4,...,0.04,0.84349,1.60262,-0.75914,0.12652,0.30769,0.94127,0.31060,0.00005,0.12652


In [191]:
(players_enriched
    [players_enriched["team"] == "Örgryte IS Fotboll"]
    .groupby(["match_id"])
    .agg(
        datum = ("match_start_datetime", "first"),
        mål = ("goals", "sum")
    )
    .reset_index()

)

Unnamed: 0,match_id,datum,mål
0,2025_Falkenbergs FF - Örgryte IS Fotboll - Ma...,2025-10-18 17:00:00,0
1,2025_GIF Sundsvall - Örgryte IS Fotboll - Mat...,2025-11-08 15:00:00,1
2,2025_Helsingborgs IF - Örgryte IS Fotboll - M...,2025-09-20 13:00:00,1
3,2025_IK Brage - Örgryte IS Fotboll - Matchfakt...,2025-06-15 15:00:00,1
4,2025_IK Oddevold - Örgryte IS Fotboll - Match...,2025-07-19 15:00:00,1
5,2025_Kalmar FF - Örgryte IS Fotboll - Matchfa...,2025-06-22 17:00:00,3
6,2025_Landskrona BoIS - Örgryte IS Fotboll - M...,2025-05-12 19:00:00,1
7,2025_Örebro - Örgryte IS Fotboll - Matchfakt...,2025-08-17 15:00:00,3
8,2025_Örgryte IS Fotboll - Falkenbergs FF - Ma...,2025-05-04 15:00:00,4
9,2025_Örgryte IS Fotboll - GIF Sundsvall - Mat...,2025-04-12 15:00:00,0


#### Write summary stats to csv

In [192]:
# summary_stats.to_csv("data/summary_stats.csv", index = False)
summary_stats.to_parquet("data/summary_stats.parquet", index = False)

#### Write raw data to csv

In [193]:
# players_enriched.to_csv("data/players_stats.csv", index = False)
players_enriched.to_parquet("data/players_stats.parquet", index = False)

In [129]:
players_enriched

Unnamed: 0,team,name,fplguid,player_type,was_substituted,sub_direction,sub_minute,sub_minute_int,sub_in_minute,sub_in_minute_int,...,tm_match_score,profile_url,club,position,age,market_value,agent,player_query,height,impact_combo90
0,Jönköpings Södra IF,David Celic (MV),4f5d9221-ad25-46f6-be57-6ccadd219d5a,Starting,False,,,,,,...,,,,,,,,,,1.500000
1,Jönköpings Södra IF,Jesper Manns,254600e2-975c-4e13-abff-1ee1b13ff2de,Starting,False,,,,,,...,,,,,,,,,,1.500000
2,Jönköpings Södra IF,Samuel Adrian,b5cfd03e-a0ac-4006-b6a0-29ee4e82bcaf,Starting,True,out,82:43,82.0,,,...,,,,,,,,,,2.414634
3,Jönköpings Södra IF,Alexander Berntsson,39ff4c7a-7577-405d-8cee-0876f1642812,Starting,False,,,,,,...,,,,,,,,,,1.500000
4,Jönköpings Södra IF,Malkolm Moenza,4b6e514b-2e07-4b32-9bce-2a143c6a39d2,Starting,True,out,83:06,83.0,,,...,,,,,,,,,,2.385542
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3227,IK Brage,Lorik Konjuhi,f502e048-1178-474a-aab2-e0ab8f8713bb,Sub,True,in,88,88.0,88,88.0,...,100.0,https://www.transfermarkt.com/lorik-konjuhi/pr...,IK Brage,CB,23,75000.0,,lorik+konjuhi,,-9.000000
3228,IK Brage,Marinus Larsen,16bee41b-51df-48a2-aa2c-ded2d43c6579,Sub,True,in,88,88.0,88,88.0,...,100.0,https://www.transfermarkt.com/marinus-larsen/p...,IK Brage,CM,22,400000.0,People In Sport,marinus+larsen,,-9.000000
3229,IK Brage,Pontus Jonsson,c2d33e21-d2f9-447a-98c8-6bc7815073ab,Sub,True,in,60,60.0,60,60.0,...,100.0,https://www.transfermarkt.com/pontus-jonsson/p...,IK Brage,CF,24,150000.0,Global Football Consulting,pontus+jonsson,,-1.363636
3230,IK Brage,Noah Östberg,bd70422a-62a3-46b2-8b40-d8e415e6025a,Sub,True,in,74,74.0,74,74.0,...,100.0,https://www.transfermarkt.com/noah-ostberg/pro...,IK Brage,RB,20,125000.0,,noah+östberg,182.0,-2.368421


## Dashboard skiss

### Block 1: Player list

In [None]:
summary_stats

#### Overview table

In [1155]:
main_table = summary_stats[['team', 'name', 'ålder', 'position', 'minuter', 'usage_rate', 'starter',
               'mål', 'assists', 'gf90', 'ga90', 'pm90', 'impact90',
               'yellow_cards', 'red_cards']]

main_table

Unnamed: 0,team,name,ålder,position,minuter,usage_rate,starter,mål,assists,gf90,ga90,pm90,impact90,yellow_cards,red_cards
0,IK Brage,Adil Titi,26.0,DM,958.0,0.3962,9,3,0,1.0334,1.12735,-0.09395,0.18789,2,0
1,IK Brage,Adrian Engdahl,31.0,GK,505.0,0.77573,5,0,0,1.42574,1.78218,-0.35644,0.0,0,0
2,IK Brage,Albin Pihlström,18.0,Attack,0.0,,0,0,0,,,,,0,0
3,IK Brage,Alexander Zetterström,30.0,CB,5066.0,0.97273,55,0,0,1.20805,1.13699,0.07106,-0.09771,9,1
4,IK Brage,Amar Muhsin,28.0,CF,3501.0,0.83656,38,25,5,1.38817,1.31105,0.07712,0.64781,3,0
5,IK Brage,André Bernardini,29.0,GK,0.0,,0,0,0,,,,,0,0
6,IK Brage,Anton Lundin,30.0,RM,1640.0,0.6298,17,4,2,1.31707,1.42683,-0.10976,0.03841,8,0
7,IK Brage,Cesar Weilid,28.0,RB,4222.0,0.90796,48,1,6,1.23638,1.02321,0.21317,0.02771,5,0
8,IK Brage,Christopher Redenstrand,27.0,LB,2563.0,0.91864,29,1,1,0.98322,0.91299,0.07023,0.04214,1,0
9,IK Brage,Eirik Asante Gayi,25.0,CB,72.0,0.15484,0,0,0,0.0,1.25,-1.25,0.0,0,0


#### Graph 1: availability

In [1156]:
fig = px.scatter(summary_stats, x = "minuter", y = "usage_rate", size = "matcher",
                 color = "position", hover_name="name")

fig.show()

#### Graph 2: Impact vs minutes

In [1157]:
fig = px.scatter(summary_stats, x = "minuter", y = "impact90", size = "matcher",
                 hover_name="name")

fig.show()

#### Graph 3: Off vs def balance

In [1158]:
fig = px.scatter(
    summary_stats,
    x="gf90",
    y="ga90",
    color="position",
    size="minuter",
    hover_name="name",
    title="Offensiv vs defensiv balans"
)

fig.update_yaxes(autorange="reversed")
fig.show()


#### Graph 4: Risk plot

In [1159]:

fig = px.scatter(summary_stats, x = "minuter", y = "risk_measure", hover_name="name")

fig.show()

### Block 2: Player profile

#### Player header

In [1160]:
col_selection = ['name', 'team', 'position', 'ålder', 'längd', 'market_value']
summary_stats[summary_stats["name"] == "Gustav Nordh"][col_selection]

Unnamed: 0,name,team,position,ålder,längd,market_value
17,Gustav Nordh,IK Brage,LW,25,175.0,350000.0


#### Match log

#### Minutes per match

In [1161]:
(players_enriched[players_enriched["name"] == "Gustav Nordh"]
    [['match_start_datetime', 'match_id_short', 'player_type', 'minutes_played', 'goals',
    'assists', 'gf_on', 'ga_on', 'plus_minus_raw', 'on_goal_diff', 'off_goal_diff',
    'yellow_card', 'red_card']]
    .sort_values("match_start_datetime")
)

Unnamed: 0,match_start_datetime,match_id_short,player_type,minutes_played,goals,assists,gf_on,ga_on,plus_minus_raw,on_goal_diff,off_goal_diff,yellow_card,red_card
97,2024-06-23 15:00:00,2024_IK Brage - GIF Sundsvall,Starting,90.0,0,0,0.0,5.0,-5.0,-5.0,0.0,False,False
402,2024-09-14 15:00:00,2024_GIF Sundsvall - IK Brage,Starting,67.0,0,0,0.0,0.0,0.0,0.0,0.0,False,False
1604,2025-03-30 13:00:00,2025_Landskrona BoIS - IK Brage,Starting,38.0,0,0,0.0,1.0,-1.0,-1.0,1.0,False,False
1954,2025-04-07 19:00:00,2025_IK Brage - Falkenbergs FF,Sub,45.0,0,0,0.0,1.0,-1.0,-1.0,-1.0,False,False
1704,2025-04-13 15:00:00,2025_IK Brage - Varbergs BoIS FC,Sub,18.0,0,0,0.0,0.0,0.0,0.0,0.0,False,False
1100,2025-04-20 15:00:00,2025_Trelleborgs FF - IK Brage,Starting,65.0,0,0,0.0,0.0,0.0,0.0,1.0,False,False
1469,2025-04-27 13:00:00,2025_Umeå FC - IK Brage,Sub,29.0,1,0,1.0,0.0,1.0,1.0,0.0,False,False
1127,2025-05-05 19:00:00,2025_IK Brage - Östersund,Sub,35.0,0,0,0.0,1.0,-1.0,-1.0,-1.0,False,False
1822,2025-05-13 19:00:00,2025_Helsingborgs IF - IK Brage,Starting,70.0,0,0,0.0,1.0,-1.0,-1.0,0.0,False,False
1375,2025-05-18 15:00:00,2025_IK Brage - IK Oddevold,Starting,46.0,0,0,1.0,1.0,0.0,0.0,-1.0,False,False


In [1162]:
# Från ovan kan klicka på spelare -> tar en till grafik nedan

player_name = "Gustav Nordh"

m = players_enriched[
    # (players_enriched["team"]=="IK Brage") &
    (players_enriched["name"] == player_name)
    # (players_enriched["minutes_played"] > 0)
].copy()

m = m.sort_values("match_start_datetime")
# SÄKERSTÄLL NUMERISKT
m["start_minute"] = m["start_minute"].astype(float)
m["end_minute"] = m["end_minute"].astype(float)

fig = px.bar(m, 
    base = "start_minute",
    # x = "minutes_played",
    # y = "match_id_short",
    x = "match_id_short",
    y = "minutes_played",
    # orientation = 'h',
    color = "player_type",
    hover_name="name"
)


fig.update_yaxes(
    type="linear",
    title="Matchminut",
    range=[0, m["match_length"].max()],
    dtick=5
)

fig.update_layout(
    title=f"Speltid för {player_name}",
    xaxis_tickangle=-45,
    height=500
)

fig.add_hline(y=45, line_dash="dash")
fig.add_hline(y=90, line_dash="dash")

fig.show()

#### Impact per match

In [1163]:
player_data = (players_enriched[players_enriched["name"] == "Gustav Nordh"]
    [['match_start_datetime', 'match_id_short', 'name', 'player_type', 'minutes_played', 'goals',
    'assists', 'gf_on', 'ga_on', 'plus_minus_raw', 'on_goal_diff', 'off_goal_diff',
    'yellow_card', 'red_card']]
    .sort_values("match_start_datetime")
)

fig = px.line(player_data, x = "match_id_short", y = "plus_minus_raw",
              markers = True)

fig.update_layout(
    xaxis_title="",
    yaxis_title="Plus-minus",
    xaxis_tickangle=-45,
    legend_title = ""
)

fig.show()


#### Defensive impact

In [1164]:
player_data = (players_enriched[players_enriched["name"] == "Gustav Nordh"]
    [['match_start_datetime', 'match_id_short', 'name', 'player_type', 'minutes_played', 'goals',
    'assists', 'gf_on', 'ga_on', 'plus_minus_raw', 'on_goal_diff', 'off_goal_diff',
    'yellow_card', 'red_card']]
    .sort_values("match_start_datetime")
)

fig = px.line(player_data, x = "match_id_short", y = "ga_on",
              markers = True)

fig.update_layout(
    xaxis_title="",
    yaxis_title="GA-on",
    xaxis_tickangle=-45,
    legend_title = ""
)

fig.show()


### Block 3: Player Comparison

#### Plots

In [1165]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

players = [
    "Amar Muhsin",
    "Gustav Nordh",
    "Haris Brkic",
    "Jacob Stensson",
    "Alexander Zetterström"
]

cmp = summary_stats[summary_stats["name"].isin(players)].copy()

fig_avail = px.bar(
    cmp,
    y="name",
    x=["usage_rate", "starter_rate"],
    orientation="h",
    barmode="group",
    title="Availability – usage & starter rate",
    labels={"value": "Rate", "variable": "Metric"}
)

fig_avail.update_layout(xaxis_range=[0,1])
fig_avail.show()


In [1166]:
fig_off = px.bar(
    cmp,
    x="name",
    y=["mål", "assists", "gf90"],
    barmode="group",
    title="Offensiv produktion"
)

fig_off.show()


In [1167]:
fig_def = px.scatter(
    cmp,
    x="clean_sheet_rate",
    y="ga90",
    size="minuter",
    color="name",
    title="Defensiv reliabilitet",
    labels={
        "clean_sheet_rate": "Clean sheet rate",
        "ga90": "Goals conceded / 90"
    }
)

fig_def.update_yaxes(autorange="reversed")
fig_def.show()


In [1168]:
fig_impact = px.scatter(
    cmp,
    x="pm90",
    y="on_off_total",
    size="minuter",
    color="name",
    title="Impact – on pitch effect",
    labels={
        "pm90": "Plus/minus per 90",
        "on_off_total": "On–off goal diff"
    }
)

fig_impact.add_hline(y=0, line_dash="dash")
fig_impact.add_vline(x=0, line_dash="dash")
fig_impact.show()


In [1169]:
metrics = {
    "usage_rate": "Usage",
    "gf90": "Offense",
    "ga90": "Defense (inv)",
    "pm90": "Impact",
    "clean_sheet_rate": "Clean sheets"
}

radar_df = summary_stats.copy()

# invertera defensivt mått
radar_df["ga90_inv"] = radar_df["ga90"].max() - radar_df["ga90"]

norm_cols = []
for col in ["usage_rate","gf90","ga90_inv","pm90","clean_sheet_rate"]:
    radar_df[col+"_pct"] = radar_df[col].rank(pct=True) * 100
    norm_cols.append(col+"_pct")



fig = go.Figure()

for _, p in radar_df[radar_df["name"].isin(players)].iterrows():
    fig.add_trace(go.Scatterpolar(
        r=[
            p["usage_rate_pct"],
            p["gf90_pct"],
            p["ga90_inv_pct"],
            p["pm90_pct"],
            p["clean_sheet_rate_pct"]
        ],
        theta=[
            "Availability",
            "Offense",
            "Defense",
            "Impact",
            "Clean sheets"
        ],
        fill="toself",
        name=p["name"]
    ))

fig.update_layout(
    polar=dict(radialaxis=dict(visible=True, range=[0,100])),
    title="Spelarprofil (percentiler inom laget)"
)

fig.show()


#### Mean vs Std impact

In [1170]:
players_enriched["impact90"] = (players_enriched["plus_minus_impact"] / (players_enriched["minutes_played"] * 90))

players_enriched

Unnamed: 0,team,name,fplguid,player_type,was_substituted,sub_direction,sub_minute,sub_minute_int,goals,goal_minutes,...,tm_match_score,profile_url,club,position,age,market_value,agent,player_query,height,impact90
0,Trelleborgs FF,Andreas Beck Larsen (MV),a7853b1b-e62b-4109-9425-275cce5af086,Starting,False,,,,0,[],...,,,,,,,,,,-0.000060
1,Trelleborgs FF,Felix Hörberg,5aeec47a-aa72-4a80-8f78-068dd844818a,Starting,False,,,,0,[],...,,,,,,,,,,0.000084
2,Trelleborgs FF,Abel Ogwuche,84f39d49-e12d-4f3a-bd18-3d798bfe2d0f,Starting,False,,,,0,[],...,,,,,,,,,,0.000000
3,Trelleborgs FF,Tobias Karlsson,9d5fd108-3160-4c01-8b96-cdda76bd4142,Starting,False,,,,0,[],...,,,,,,,,,,0.000000
4,Trelleborgs FF,Eren Alievski,c015f3a6-5c6c-4812-a11c-5a0614d1682e,Starting,False,,,,0,[],...,,,,,,,,,,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2149,IK Brage,Lorik Konjuhi,f502e048-1178-474a-aab2-e0ab8f8713bb,Sub,True,in,88,88.0,0,[],...,100.0,https://www.transfermarkt.com/lorik-konjuhi/pr...,IK Brage,CB,23,75000.0,,lorik+konjuhi,,0.000000
2150,IK Brage,Marinus Larsen,16bee41b-51df-48a2-aa2c-ded2d43c6579,Sub,True,in,88,88.0,0,[],...,100.0,https://www.transfermarkt.com/marinus-larsen/p...,IK Brage,CM,22,400000.0,People In Sport,marinus+larsen,,0.000000
2151,IK Brage,Pontus Jonsson,c2d33e21-d2f9-447a-98c8-6bc7815073ab,Sub,True,in,60,60.0,0,[],...,100.0,https://www.transfermarkt.com/pontus-jonsson/p...,IK Brage,CF,24,150000.0,Global Football Consulting,pontus+jonsson,,0.000000
2152,IK Brage,Noah Östberg,bd70422a-62a3-46b2-8b40-d8e415e6025a,Sub,True,in,74,74.0,0,[],...,100.0,https://www.transfermarkt.com/noah-ostberg/pro...,IK Brage,RB,20,125000.0,,noah+östberg,182.0,0.000000


In [1171]:
impact_summary = (
    players_enriched[
        (players_enriched["team"] == "IK Brage") &
        (players_enriched["minutes_played"] >= 20) &
        (~players_enriched["impact90"].isna())
    ]
    .groupby("name")
    .agg(
        impact_mean=("impact90", "mean"),
        impact_std=("impact90", "std"),
        matches_used=("impact90", "count"),
        minutes_total=("minutes_played", "sum")
    )
    .reset_index()
)


In [1172]:
fig = px.scatter(
    impact_summary,
    x="impact_std",
    y="impact_mean",
    size="minutes_total",
    color = "name",
    hover_name="name",
    title="Impact vs Consistency",
    labels={
        "impact_std": "Impact variability",
        "impact_mean": "Average impact per 90"
    }
)

# fig.update_xaxes(autorange="reversed")

fig.show()


In [None]:
# players_df = players_df[players_df["minutes_played"] > 0]
# players_df

Unnamed: 0,team,name,fplguid,player_type,was_substituted,sub_direction,sub_minute,sub_minute_int,goals,goal_minutes,...,clutch_goals,on_goal_diff,off_goal_diff,on_off_diff,gf_on,ga_on,gf90,ga90,clean_sheet,match_id_short
0,Trelleborgs FF,Mathias Nilsson (MV),21531f26-5933-4af9-a3f2-87fd7bfe30cf,Starting,False,,,,0,[],...,0,-1,0,-1,0,1,0.0,0.967742,0,Trelleborgs FF - IK Brage
1,Trelleborgs FF,Emmanuel Godwin,448bd50d-c89e-4790-b4bf-3dd20f1ccd6d,Starting,False,,,,0,[],...,0,-1,0,-1,0,1,0.0,0.967742,0,Trelleborgs FF - IK Brage
2,Trelleborgs FF,Tobias Karlsson,9d5fd108-3160-4c01-8b96-cdda76bd4142,Starting,False,,,,0,[],...,0,-1,0,-1,0,1,0.0,0.967742,0,Trelleborgs FF - IK Brage
3,Trelleborgs FF,Charlie Weberg,3b59e9e5-b136-4310-ab98-a602f082dd75,Starting,False,,,,0,[],...,0,-1,0,-1,0,1,0.0,0.967742,0,Trelleborgs FF - IK Brage
4,Trelleborgs FF,Eren Alievski,c015f3a6-5c6c-4812-a11c-5a0614d1682e,Starting,False,,,,0,[],...,0,-1,0,-1,0,1,0.0,0.967742,0,Trelleborgs FF - IK Brage
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1071,IK Brage,Malte Persson,5126b059-0ce7-45c3-beac-06026b1bcd5b,Sub,True,in,74,74.0,0,[],...,0,-1,2,-3,0,1,0.0,4.736842,0,Varbergs BoIS FC - IK Brage
1072,IK Brage,Lorik Konjuhi,f502e048-1178-474a-aab2-e0ab8f8713bb,Sub,True,in,88,88.0,0,[],...,0,-1,2,-3,0,1,0.0,18.000000,0,Varbergs BoIS FC - IK Brage
1073,IK Brage,Marinus Larsen,16bee41b-51df-48a2-aa2c-ded2d43c6579,Sub,True,in,88,88.0,0,[],...,0,-1,2,-3,0,1,0.0,18.000000,0,Varbergs BoIS FC - IK Brage
1074,IK Brage,Pontus Jonsson,c2d33e21-d2f9-447a-98c8-6bc7815073ab,Sub,True,in,60,60.0,0,[],...,0,-1,2,-3,0,1,0.0,2.727273,0,Varbergs BoIS FC - IK Brage


In [1173]:
import plotly.express as px

pname = "Alexander Zetterström"

p = players_df[players_df["name"]==pname].sort_values("match_id_short")

fig = px.line(
    p,
    x="match_id_short",
    y=["minutes_played","on_off_diff"],
    markers=True,
    title=f"{pname} – säsongsutveckling"
)

fig.update_layout(xaxis_tickangle=-45)
fig.show()


In [468]:
# AGGREGERAR DATA ÖVER SÄSONG FÖR VISUALISERING

season = (
    players_df[players_df["team"] == "IK Brage"]
    .groupby(["name","team"])
    .agg(
        matches=("matches_played","sum"),
        minutes=("minutes_played","sum"),
        goals=("goals","sum"),
        assists = ("assists", "sum"),
        plus_minus = ("plus_minus_raw", "sum"),
        on_off=("on_off_diff","sum"),
        clutch=("clutch_goals","sum"),
        cs=("clean_sheet","sum")
    )
    .reset_index()
    .sort_values("plus_minus",ascending=False)
)

season


Unnamed: 0,name,team,matches,minutes,goals,assists,plus_minus,on_off,clutch,cs
17,Noah Åstrand John,IK Brage,22,1028.0,1,2,13.0,20.0,0.0,12.0
8,Gustav Berggren,IK Brage,28,2337.0,0,2,11.0,15.0,0.0,6.0
4,Cesar Weilid,IK Brage,21,1715.0,1,4,10.0,13.0,0.0,6.0
20,Pontus Jonsson,IK Brage,22,533.0,3,1,7.0,12.0,3.0,16.0
23,Viktor Frodig (MV),IK Brage,25,2293.0,0,0,6.0,8.0,0.0,3.0
2,Amar Muhsin,IK Brage,29,2175.0,21,5,5.0,9.0,5.0,6.0
9,Gustav Nordh,IK Brage,30,2101.0,5,10,5.0,8.0,0.0,7.0
13,Lorik Konjuhi,IK Brage,17,1201.0,0,0,4.0,0.0,0.0,6.0
7,Filip Trpcevski,IK Brage,28,2481.0,7,7,1.0,1.0,4.0,5.0
1,Alexander Zetterström,IK Brage,28,2591.0,0,0,1.0,1.0,0.0,4.0


In [470]:
import plotly.graph_objects as go

metrics = ["goals","on_off","clutch","minutes","plus_minus"]

radar_df = season.copy()

for m in metrics:
    radar_df[m+"_pct"] = radar_df[m].rank(pct=True)

p = radar_df[radar_df["name"]=="Alexander Zetterström"].iloc[0]

fig = go.Figure()
fig.add_trace(go.Scatterpolar(
    r=[p[m+"_pct"] for m in metrics],
    theta=metrics,
    fill='toself',
    name=p["name"]
))

fig.update_layout(
    title="Spelarprofil (percentil)",
    polar=dict(radialaxis=dict(range=[0,1]))
)
fig.show()

radar_df


Unnamed: 0,name,team,matches,minutes,goals,assists,plus_minus,on_off,clutch,cs,goals_pct,on_off_pct,clutch_pct,minutes_pct,plus_minus_pct
17,Noah Åstrand John,IK Brage,22,1028.0,1,2,13.0,20.0,0.0,12.0,0.645833,1.0,0.395833,0.416667,1.0
8,Gustav Berggren,IK Brage,28,2337.0,0,2,11.0,15.0,0.0,6.0,0.3125,0.958333,0.395833,0.916667,0.958333
4,Cesar Weilid,IK Brage,21,1715.0,1,4,10.0,13.0,0.0,6.0,0.645833,0.916667,0.395833,0.625,0.916667
20,Pontus Jonsson,IK Brage,22,533.0,3,1,7.0,12.0,3.0,16.0,0.791667,0.875,0.916667,0.291667,0.875
23,Viktor Frodig (MV),IK Brage,25,2293.0,0,0,6.0,8.0,0.0,3.0,0.3125,0.770833,0.395833,0.875,0.833333
2,Amar Muhsin,IK Brage,29,2175.0,21,5,5.0,9.0,5.0,6.0,1.0,0.833333,1.0,0.833333,0.770833
9,Gustav Nordh,IK Brage,30,2101.0,5,10,5.0,8.0,0.0,7.0,0.916667,0.770833,0.395833,0.75,0.770833
13,Lorik Konjuhi,IK Brage,17,1201.0,0,0,4.0,0.0,0.0,6.0,0.3125,0.5,0.395833,0.5,0.708333
7,Filip Trpcevski,IK Brage,28,2481.0,7,7,1.0,1.0,4.0,5.0,0.958333,0.666667,0.958333,0.958333,0.645833
1,Alexander Zetterström,IK Brage,28,2591.0,0,0,1.0,1.0,0.0,4.0,0.3125,0.666667,0.395833,1.0,0.645833


In [471]:
px.scatter(
    season,
    x="minutes",
    y="plus_minus",
    size="goals",
    color="team",
    hover_name="name",
    title="Impact vs speltid"
)


In [472]:
import plotly.express as px

m = players_df[
    (players_df["match_id"].str.contains("Landskrona BoIS - IK Brage", na=False)) &
    (players_df["team"]=="IK Brage") &
    (players_df["minutes_played"] > 0)
].copy()

# SÄKERSTÄLL NUMERISKT
m["start_minute"] = m["start_minute"].astype(float)
m["end_minute"] = m["end_minute"].astype(float)

fig = px.bar(m, 
    base = "start_minute",
    x = "minutes_played",
    y = "name",
    orientation = 'h',
    color = "player_type"
)

fig.update_yaxes(autorange="reversed")

fig.update_xaxes(
    type="linear",
    title="Matchminut",
    range=[0, m["match_length"].max()],
    dtick=5
)

fig.update_layout(
    title="IK Brage – match usage vs Landskrona",
    height=400
)

fig.add_vline(x=45, line_dash="dash")
fig.add_vline(x=90, line_dash="dash")

fig.show()


In [473]:
keepers

Unnamed: 0,name,team,matches,minutes,goals,on_off,clutch,cs
362,Viktor Frodig (MV),IK Brage,25,2293.0,0,8,0,3
271,Morten Sætra (MV),IK Oddevold,2,180.0,0,5,0,1
43,Amr Kaddoura (MV),Landskrona BoIS,0,0.0,0,3,0,1
172,Jakob Kindberg (MV),Kalmar FF,1,90.0,0,3,0,1
108,Elias Markusson Kurula (MV),IK Brage,0,0.0,0,2,0,5
301,Otto Lindell (MV),Sandvikens IF,0,0.0,0,2,0,2
46,André Bernardini (MV),Västerås SK FK,0,0.0,0,2,0,2
250,Max Croon (MV),Östersund,1,93.0,0,1,0,1
353,Victor Astor (MV),Trelleborgs FF,0,0.0,0,1,0,1
260,Melker Uppenberg (MV),Umeå FC,0,0.0,0,1,0,2


In [474]:
keepers = season[season["name"].str.contains("(MV)",regex=False)]

px.bar(
    keepers.sort_values("cs",ascending=False),
    x="name",
    y="cs",
    title="Clean sheets"
)


In [475]:
px.bar(
    season.sort_values("clutch",ascending=False).head(15),
    x="name",
    y="clutch",
    title="Clutch goals"
)


In [476]:
team = "IK Brage"

t = season[season["team"]==team]

px.bar(
    t.sort_values("plus_minus"),
    x="name",
    y="plus_minus",
    title=f"{team} – påverkan"
)


In [302]:
def aggregate_season_stats(players_df):

    players_df["matches_played"] = players_df["minutes_played"] > 0

    df = players_df.copy()
    
    # Grupp per spelare + lag
    agg = df.groupby(["name","team"]).agg(
        matches_played = ("matches_played","sum"),
        minutes_played = ("minutes_played","sum"),
        total_minutes = ("match_length", "sum"),
        goals = ("goals","sum"),
        assists = ("assists","sum"),
        yellow_cards = ("yellow_card", "sum"),
        red_cards = ("red_card", "sum"),
        plus_minus_raw = ("plus_minus_raw","sum"),
        plus_minus_impact = ("plus_minus_impact","sum")
    ).reset_index()

    # Plus/minus per 90
    agg["plus_minus_per90"] = agg["plus_minus_raw"] / agg["minutes_played"] * 90
    agg["impact_per90"] = agg["plus_minus_impact"] / agg["minutes_played"] * 90

    # Skydda mot div 0
    agg.loc[agg["minutes_played"]==0, ["plus_minus_per90","impact_per90"]] = 0

    # Sortera på impact
    agg = agg.sort_values("plus_minus_per90", ascending=False)

    return agg

# Använd:
season_stats_df = aggregate_season_stats(players_df)
season_stats_df

Unnamed: 0,name,team,matches_played,minutes_played,total_minutes,goals,assists,yellow_cards,red_cards,plus_minus_raw,plus_minus_impact,plus_minus_per90,impact_per90
279,Nils Salomonsson Önnebo,Varbergs BoIS FC,1,2.0,93,0,0,0,0,1,0.0,45.000000,0.000000
328,Seif Ali Hindi,Falkenbergs FF,1,5.0,187,0,0,0,0,2,0.0,36.000000,0.000000
120,Erick Brendon Pinheiro Da Silva,Östersund,1,16.0,185,0,0,0,0,2,0.0,11.250000,0.000000
19,Albert Ejupi,IK Oddevold,1,10.0,90,0,0,0,0,1,0.0,9.000000,0.000000
381,Zakaria Loukili,Varbergs BoIS FC,1,13.0,93,1,0,0,0,1,1.0,6.923077,6.923077
...,...,...,...,...,...,...,...,...,...,...,...,...,...
159,Hasan Dana,Örebro,1,13.0,95,1,0,0,0,-1,1.0,-6.923077,6.923077
289,Oliwer Stark,IK Brage,3,22.0,1210,0,1,0,0,-2,0.7,-8.181818,2.863636
44,Andreas Murbeck,Landskrona BoIS,1,25.0,92,0,0,1,0,-3,-0.5,-10.800000,-1.800000
42,Ammar Asani,Trelleborgs FF,1,7.0,185,0,0,0,0,-2,0.0,-25.714286,0.000000


## Plot season stats by player for team

In [224]:
import plotly.express as px

# IKB samt minst 0 min
ikb_df = season_stats_df[(season_stats_df["team"] == "IK Brage") & (season_stats_df["minutes_played"] > 45)]

fig = px.bar(ikb_df, x = "name", y = "plus_minus_per90", color = "name")

fig.update_layout(
    xaxis_title="",
    # yaxis_title=plot_var,
    xaxis_tickangle=-45,
    legend_title = ""
)

fig.show()

In [221]:
agg = (players_df
    [players_df["team"] == "IK Brage"]
    .groupby(["name", "match_id"]).agg(
        matches_played = ("matches_played","sum"),
        minutes_played = ("minutes_played","sum"),
        total_minutes = ("match_length", "sum"),
        goals = ("goals","sum"),
        assists = ("assists","sum"),
        yellow_cards = ("yellow_card", "sum"),
        red_cards = ("red_card", "sum"),
        plus_minus_raw = ("plus_minus_raw","sum"),
        plus_minus_impact = ("plus_minus_impact","sum")
    ).reset_index()
)

agg = agg[agg["minutes_played"] > 0]

agg["plus_minus_per90"] = agg["plus_minus_raw"] / agg["minutes_played"] * 90

agg.loc[agg["minutes_played"]==0, ["plus_minus_per90","impact_per90"]] = 0

agg["match_id_short"] = agg["match_id"].str.split(" - Matchfakta", expand = True)[0]

match_order = (
    players_df
        .drop_duplicates("match_id")
        ["match_id"]
        .tolist()
)

# kort version
match_order_short = [m.split(" - Matchfakta")[0] for m in match_order]

agg["match_id_short"] = pd.Categorical(
    agg["match_id_short"],
    categories=match_order_short,
    ordered=True
)

agg = agg.sort_values("match_id_short")

all_matches = match_order_short

fixed = []

for name in agg["name"].unique():
    
    sub = agg[agg["name"] == name].set_index("match_id_short")
    
    sub = sub.reindex(all_matches)
    sub["name"] = name
    sub["match_id_short"] = sub.index
    
    fixed.append(sub.reset_index(drop=True))

plot_df = pd.concat(fixed, ignore_index=True)

plot_var = "plus_minus_raw"

fig = px.line(
    plot_df,
    x="match_id_short",
    y=plot_var,
    color="name",
    markers=True,
    title=f"{plot_var} per match",
    category_orders={
        "match_id_short": match_order_short
    }
)

fig.update_layout(
    xaxis_title="",
    yaxis_title=plot_var,
    xaxis_tickangle=-45,
    legend_title = ""
)

# fig.update_yaxes(range=[0, 100])

fig.show()


In [162]:
print(season_stats_df[season_stats_df["team"] == "IK Brage"].head(10))

                    name      team  matches_played  minutes_played  \
13   Adrian Engdahl (MV)  IK Brage               7           505.0   
282    Noah Åstrand John  IK Brage              22          1028.0   
78          Cesar Weilid  IK Brage              21          1715.0   
311       Pontus Jonsson  IK Brage              22           533.0   
145      Gustav Berggren  IK Brage              28          2337.0   
39           Amar Muhsin  IK Brage              29          2175.0   
362   Viktor Frodig (MV)  IK Brage              25          2293.0   
149         Gustav Nordh  IK Brage              30          2101.0   
245       Marinus Larsen  IK Brage              20           674.0   
133      Filip Trpcevski  IK Brage              28          2481.0   

     total_minutes  goals  assists  yellow_cards  red_cards  plus_minus_raw  \
13            2783      0        0             0          0               5   
282           2513      1        2             4          0            

In [81]:
print(players_df)

             team                  name player_type  was_substituted  \
0   GIF Sundsvall  Jonas Olsson (MV)(K)    Starting            False   
1   GIF Sundsvall         Monir Jelassi    Starting            False   
2   GIF Sundsvall        Amaro Bahtijar    Starting            False   
3   GIF Sundsvall          Malte Hallin    Starting             True   
4   GIF Sundsvall        Lucas Forsberg    Starting            False   
..            ...                   ...         ...              ...   
67      Kalmar FF         Gibril Sosseh         Sub            False   
68      Kalmar FF        Jakob Kindberg         Sub            False   
69      Kalmar FF    Charlie Rosenqvist         Sub            False   
70      Kalmar FF             Ali Zaher         Sub             True   
71      Kalmar FF            Lars Sætra         Sub             True   

   sub_direction sub_minute  sub_minute_int  goals goal_minutes  assists  \
0           None       None             NaN      0         

In [124]:
players_df

Unnamed: 0,team,name,player_type,was_substituted,sub_direction,sub_minute,sub_minute_int,goals,goal_minutes,assists,...,red_card,match_id,match_length,start_minute,end_minute,minutes_played,plus_minus,plus_minus_raw,plus_minus_per90,plus_minus_impact
0,Trelleborgs FF,Mathias Nilsson (MV),Starting,False,,,,0,[],0,...,False,Trelleborgs FF - IK Brage - Matchfakta - Svens...,93,0.0,93,93.0,0,-1,-0.000119,0.0
1,Trelleborgs FF,Emmanuel Godwin,Starting,False,,,,0,[],0,...,False,Trelleborgs FF - IK Brage - Matchfakta - Svens...,93,0.0,93,93.0,0,-1,-0.000119,0.0
2,Trelleborgs FF,Tobias Karlsson,Starting,False,,,,0,[],0,...,False,Trelleborgs FF - IK Brage - Matchfakta - Svens...,93,0.0,93,93.0,0,-1,-0.000119,-0.5
3,Trelleborgs FF,Charlie Weberg,Starting,False,,,,0,[],0,...,False,Trelleborgs FF - IK Brage - Matchfakta - Svens...,93,0.0,93,93.0,0,-1,-0.000119,-0.5
4,Trelleborgs FF,Eren Alievski,Starting,False,,,,0,[],0,...,False,Trelleborgs FF - IK Brage - Matchfakta - Svens...,93,0.0,93,93.0,0,-1,-0.000119,-0.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1072,IK Brage,Lorik Konjuhi,Sub,True,in,88,88.0,0,[],0,...,False,Varbergs BoIS FC - IK Brage - Matchfakta - Sve...,93,88.0,93,5.0,0,-1,-0.002222,0.0
1073,IK Brage,Marinus Larsen,Sub,True,in,88,88.0,0,[],0,...,False,Varbergs BoIS FC - IK Brage - Matchfakta - Sve...,93,88.0,93,5.0,0,-1,-0.002222,0.0
1074,IK Brage,Pontus Jonsson,Sub,True,in,60,60.0,0,[],0,...,False,Varbergs BoIS FC - IK Brage - Matchfakta - Sve...,93,60.0,93,33.0,0,-1,-0.000337,0.0
1075,IK Brage,Noah Östberg,Sub,True,in,74,74.0,0,[],0,...,False,Varbergs BoIS FC - IK Brage - Matchfakta - Sve...,93,74.0,93,19.0,0,-1,-0.000585,0.0


In [77]:
players_df[players_df["match_id"] == "GIF Sundsvall - Helsingborgs IF - Matchfakta - Svensk fotboll"]["plus_minus"].sum()

np.int64(-2)

In [60]:
parser = MatchTeamParser()

with open("superettan-matcher/GIF Sundsvall - Helsingborgs IF - Matchfakta - Svensk fotboll.html", encoding="utf-8") as f:
    html = f.read()

match_data = parser.parse_team_info(html)
df = parser.create_lineup_dataframe(match_data)

df


Unnamed: 0,team,name,player_type,was_substituted,sub_direction,sub_minute,sub_minute_int,goals,goal_minutes,assists,assists_minutes,yellow_card,red_card
0,GIF Sundsvall,Jonas Olsson (MV)(K),Starting,False,,,,0,[],0,[],False,False
1,GIF Sundsvall,Monir Jelassi,Starting,False,,,,0,[],0,[],False,False
2,GIF Sundsvall,Amaro Bahtijar,Starting,False,,,,0,[],0,[],False,False
3,GIF Sundsvall,Malte Hallin,Starting,True,out,75,75.0,0,[],0,[],False,False
4,GIF Sundsvall,Lucas Forsberg,Starting,False,,,,0,[],0,[],False,False
5,GIF Sundsvall,Alexandros Pantelidis,Starting,False,,,,0,[],0,[],True,False
6,GIF Sundsvall,Marcelo Palomino,Starting,True,out,75,75.0,1,[53],0,[],False,False
7,GIF Sundsvall,Ture Sandberg,Starting,False,,,,0,[],0,[],True,False
8,GIF Sundsvall,Marc Manchon I Armans,Starting,True,out,90+6,96.0,0,[],0,[],False,False
9,GIF Sundsvall,Taiki Kagayama,Starting,False,,,,0,[],0,[],False,False


In [43]:
# =====================
# MINUT-LOGIK
# =====================

def calculate_minutes_played(df, match_length):

    df = df.copy()

    start_minutes = []
    end_minutes = []
    minutes_played = []

    for _, row in df.iterrows():

        # ----- START -----
        if row["player_type"] == "Starting":
            start = 0
        else:
            start = row["sub_minute_int"]

        # ----- SLUT -----
        if row["sub_direction"] == "out":
            end = row["sub_minute_int"]
        else:
            end = match_length

        # fallback
        if start is None:
            start = 0

        if end is None:
            end = match_length

        played = max(0, end - start)

        start_minutes.append(start)
        end_minutes.append(end)
        minutes_played.append(played)

    df["start_minute"] = start_minutes
    df["end_minute"] = end_minutes
    df["minutes_played"] = minutes_played

    return df


match_length = match_data["match_length"]

df = calculate_minutes_played(df, match_length)

# df[["name","start_minute","end_minute","minutes_played"]]
df

Unnamed: 0,team,name,player_type,was_substituted,sub_direction,sub_minute,sub_minute_int,goals,goal_minutes,assists,assists_minutes,yellow_card,red_card,start_minute,end_minute,minutes_played
0,GIF Sundsvall,Jonas Olsson (MV)(K),Starting,False,,,,0,[],0,[],False,False,0.0,96.0,96.0
1,GIF Sundsvall,Monir Jelassi,Starting,False,,,,0,[],0,[],False,False,0.0,96.0,96.0
2,GIF Sundsvall,Amaro Bahtijar,Starting,False,,,,0,[],0,[],False,False,0.0,96.0,96.0
3,GIF Sundsvall,Malte Hallin,Starting,True,out,75,75.0,0,[],0,[],False,False,0.0,75.0,75.0
4,GIF Sundsvall,Lucas Forsberg,Starting,False,,,,0,[],0,[],False,False,0.0,96.0,96.0
5,GIF Sundsvall,Alexandros Pantelidis,Starting,False,,,,0,[],0,[],True,False,0.0,96.0,96.0
6,GIF Sundsvall,Marcelo Palomino,Starting,True,out,75,75.0,1,[53],0,[],False,False,0.0,75.0,75.0
7,GIF Sundsvall,Ture Sandberg,Starting,False,,,,0,[],0,[],True,False,0.0,96.0,96.0
8,GIF Sundsvall,Marc Manchon I Armans,Starting,True,out,90+6,96.0,0,[],0,[],False,False,0.0,96.0,96.0
9,GIF Sundsvall,Taiki Kagayama,Starting,False,,,,0,[],0,[],False,False,0.0,96.0,96.0


In [44]:
def parse_minute(minute_str):
    if not minute_str:
        return None

    txt = minute_str.replace("'","").strip()

    if "+" in txt:
        base, extra = txt.split("+")
        return int(base) + int(extra)

    return int(txt)

def build_goal_table(match_data):
    rows = []

    for g in match_data["goals"]:
        rows.append({
            "minute": parse_minute(g["minute"]),
            "scorer": g["scorer"],
            "scorer_fplguid": g["scorer_fplguid"]
        })

    return pd.DataFrame(rows)

def calculate_plus_minus(df, goals_df):

    df = df.copy()

    pm_values = []

    for _, row in df.iterrows():

        start = row["start_minute"]
        end = row["end_minute"]
        team = row["team"]

        plus = 0
        minus = 0

        for _, g in goals_df.iterrows():

            m = g["minute"]

            # var spelaren på plan?
            if not (start <= m <= end):
                continue

            # vem gjorde mål?
            scorer_team = df.loc[
                df["name"] == g["scorer"],
                "team"
            ].values

            if len(scorer_team) == 0:
                continue

            scorer_team = scorer_team[0]

            if scorer_team == team:
                plus += 1
            else:
                minus += 1

        pm_values.append(plus - minus)

    df["plus_minus"] = pm_values

    return df


In [46]:
# match_length redan beräknad
df = calculate_minutes_played(df, match_length)

goals_df = build_goal_table(match_data)

df = calculate_plus_minus(df, goals_df)

# df[["name","team","minutes_played","plus_minus"]]
df

Unnamed: 0,team,name,player_type,was_substituted,sub_direction,sub_minute,sub_minute_int,goals,goal_minutes,assists,assists_minutes,yellow_card,red_card,start_minute,end_minute,minutes_played,plus_minus
0,GIF Sundsvall,Jonas Olsson (MV)(K),Starting,False,,,,0,[],0,[],False,False,0.0,96.0,96.0,2
1,GIF Sundsvall,Monir Jelassi,Starting,False,,,,0,[],0,[],False,False,0.0,96.0,96.0,2
2,GIF Sundsvall,Amaro Bahtijar,Starting,False,,,,0,[],0,[],False,False,0.0,96.0,96.0,2
3,GIF Sundsvall,Malte Hallin,Starting,True,out,75,75.0,0,[],0,[],False,False,0.0,75.0,75.0,1
4,GIF Sundsvall,Lucas Forsberg,Starting,False,,,,0,[],0,[],False,False,0.0,96.0,96.0,2
5,GIF Sundsvall,Alexandros Pantelidis,Starting,False,,,,0,[],0,[],True,False,0.0,96.0,96.0,2
6,GIF Sundsvall,Marcelo Palomino,Starting,True,out,75,75.0,1,[53],0,[],False,False,0.0,75.0,75.0,1
7,GIF Sundsvall,Ture Sandberg,Starting,False,,,,0,[],0,[],True,False,0.0,96.0,96.0,2
8,GIF Sundsvall,Marc Manchon I Armans,Starting,True,out,90+6,96.0,0,[],0,[],False,False,0.0,96.0,96.0,2
9,GIF Sundsvall,Taiki Kagayama,Starting,False,,,,0,[],0,[],False,False,0.0,96.0,96.0,2


In [47]:
def build_match_id(filename):
    return filename.replace(".html","").replace(" - Matchfakta - Svensk fotboll","")

In [48]:
import os

parser = MatchTeamParser()

FOLDER = "superettan-matcher"

all_player_rows = []
all_goal_rows = []

for file in os.listdir(FOLDER):

    if not file.endswith(".html"):
        continue

    path = os.path.join(FOLDER, file)
    print(f"Parsar: {file}")

    match_id = build_match_id(file)

    with open(path, encoding="utf-8") as f:
        html = f.read()

    match_data = parser.parse_team_info(html)

    # ----- spelare -----
    df = parser.create_lineup_dataframe(match_data)
    df["match_id"] = match_id

    all_player_rows.append(df)

    # ----- mål -----
    for g in match_data["goals"]:
        g["match_id"] = match_id
        all_goal_rows.append(g)

# Slå ihop
players_df = pd.concat(all_player_rows, ignore_index=True)
goals_df = pd.DataFrame(all_goal_rows)


Parsar: GIF Sundsvall - Helsingborgs IF - Matchfakta - Svensk fotboll.html
Parsar: Sandvikens IF - Kalmar FF - Matchfakta - Svensk fotboll.html


In [54]:
players_df

Unnamed: 0,team,name,player_type,was_substituted,sub_direction,sub_minute,sub_minute_int,goals,goal_minutes,assists,assists_minutes,yellow_card,red_card,match_id
0,GIF Sundsvall,Jonas Olsson (MV)(K),Starting,False,,,,0,[],0,[],False,False,GIF Sundsvall - Helsingborgs IF
1,GIF Sundsvall,Monir Jelassi,Starting,False,,,,0,[],0,[],False,False,GIF Sundsvall - Helsingborgs IF
2,GIF Sundsvall,Amaro Bahtijar,Starting,False,,,,0,[],0,[],False,False,GIF Sundsvall - Helsingborgs IF
3,GIF Sundsvall,Malte Hallin,Starting,True,out,75,75.0,0,[],0,[],False,False,GIF Sundsvall - Helsingborgs IF
4,GIF Sundsvall,Lucas Forsberg,Starting,False,,,,0,[],0,[],False,False,GIF Sundsvall - Helsingborgs IF
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67,Kalmar FF,Gibril Sosseh,Sub,False,,,,0,[],0,[],False,False,Sandvikens IF - Kalmar FF
68,Kalmar FF,Jakob Kindberg,Sub,False,,,,0,[],0,[],False,False,Sandvikens IF - Kalmar FF
69,Kalmar FF,Charlie Rosenqvist,Sub,False,,,,0,[],0,[],False,False,Sandvikens IF - Kalmar FF
70,Kalmar FF,Ali Zaher,Sub,True,in,85,85.0,0,[],0,[],False,False,Sandvikens IF - Kalmar FF


In [55]:
def calculate_plus_minus_all(players_df, goals_df):

    players_df = players_df.copy()
    pm_values = []

    for _, row in players_df.iterrows():

        match_id = row["match_id"]
        team = row["team"]
        start = row["start_minute"]
        end = row["end_minute"]

        match_goals = goals_df[goals_df["match_id"] == match_id]

        plus = 0
        minus = 0

        for _, g in match_goals.iterrows():

            m = parse_minute(g["minute"])

            if not (start <= m <= end):
                continue

            scorer_team = players_df.loc[
                (players_df["match_id"] == match_id) &
                (players_df["name"] == g["scorer"]),
                "team"
            ]

            if len(scorer_team) == 0:
                continue

            scorer_team = scorer_team.values[0]

            if scorer_team == team:
                plus += 1
            else:
                minus += 1

        pm_values.append(plus - minus)

    players_df["plus_minus"] = pm_values
    return players_df


In [57]:
players_df = calculate_minutes_played(players_df, match_length_map)
players_df = calculate_plus_minus_all(players_df, goals_df)

players_df.head()


TypeError: unsupported operand type(s) for -: 'dict' and 'int'

In [58]:
print(match_length_map)


{'GIF Sundsvall - Helsingborgs IF': 91}


In [56]:
import re

def parse_minute(minute_str):
    """
    Konverterar:
    "53'"    -> 53
    "90+6'"  -> 96
    "45+2"   -> 47
    "105+1'" -> 106
    """

    if not minute_str:
        return None

    # ta bort '
    txt = minute_str.replace("'", "").strip()

    # matcha 90+6
    m = re.match(r"(\d+)\+(\d+)", txt)
    if m:
        base = int(m.group(1))
        extra = int(m.group(2))
        return base + extra

    # vanlig minut
    if txt.isdigit():
        return int(txt)

    return None

match_length_map = {}

for match_id in goals_df["match_id"].unique():

    match_goals = goals_df[goals_df["match_id"] == match_id]

    max_minute = 90

    for m in match_goals["minute"]:
        parsed = parse_minute(m)
        if parsed and parsed > max_minute:
            max_minute = parsed

    match_length_map[match_id] = max_minute


In [21]:
final_df["sub_minute"] = final_df["sub_minute"].apply(parse_minute)
final_df

Unnamed: 0,team,name,player_type,was_substituted,sub_direction,sub_minute,goals,assists,assists_minutes,yellow_card,red_card,source_file
0,GIF Sundsvall,Jonas Olsson (MV)(K),Starting,False,,,False,0,[],False,False,GIF Sundsvall - Helsingborgs IF - Matchfakta -...
1,GIF Sundsvall,Monir Jelassi,Starting,False,,,False,0,[],False,False,GIF Sundsvall - Helsingborgs IF - Matchfakta -...
2,GIF Sundsvall,Amaro Bahtijar,Starting,False,,,False,0,[],False,False,GIF Sundsvall - Helsingborgs IF - Matchfakta -...
3,GIF Sundsvall,Malte Hallin,Starting,True,out,75.0,False,0,[],False,False,GIF Sundsvall - Helsingborgs IF - Matchfakta -...
4,GIF Sundsvall,Lucas Forsberg,Starting,False,,,False,0,[],False,False,GIF Sundsvall - Helsingborgs IF - Matchfakta -...
...,...,...,...,...,...,...,...,...,...,...,...,...
67,Kalmar FF,Gibril Sosseh,Sub,False,,,False,0,[],False,False,Sandvikens IF - Kalmar FF - Matchfakta - Svens...
68,Kalmar FF,Jakob Kindberg,Sub,False,,,False,0,[],False,False,Sandvikens IF - Kalmar FF - Matchfakta - Svens...
69,Kalmar FF,Charlie Rosenqvist,Sub,False,,,False,0,[],False,False,Sandvikens IF - Kalmar FF - Matchfakta - Svens...
70,Kalmar FF,Ali Zaher,Sub,True,in,85.0,False,0,[],False,False,Sandvikens IF - Kalmar FF - Matchfakta - Svens...


In [None]:
final_df["sub_direction"] = final_df["sub_direction"].fillna("no_sub")
final_df["sub_minute"] = final_df["sub_minute"].fillna()

0     no_sub
1     no_sub
2     no_sub
3        out
4     no_sub
       ...  
67    no_sub
68    no_sub
69    no_sub
70        in
71        in
Name: sub_direction, Length: 72, dtype: object

In [4]:
match_data

{'teams': {'home': 'GIF Sundsvall', 'away': 'Helsingborgs IF'},
 'lineups': {'home': [{'number': '1',
    'name': 'Jonas Olsson (MV)(K)',
    'fplguid': '4b65da9c-b224-4c25-ab0a-d5f15b415ffa',
    'goals': False,
    'assists': 0,
    'assists_minutes': [],
    'yellow_card': False,
    'red_card': False,
    'was_substituted': False,
    'sub_direction': None,
    'sub_minute': None},
   {'number': '3',
    'name': 'Monir Jelassi',
    'fplguid': '11442e33-d8ae-4617-8280-d85338d3f420',
    'goals': False,
    'assists': 0,
    'assists_minutes': [],
    'yellow_card': False,
    'red_card': False,
    'was_substituted': False,
    'sub_direction': None,
    'sub_minute': None},
   {'number': '27',
    'name': 'Amaro Bahtijar',
    'fplguid': 'd9f34aed-13b7-470a-8b9c-bae2b4139201',
    'goals': False,
    'assists': 0,
    'assists_minutes': [],
    'yellow_card': False,
    'red_card': False,
    'was_substituted': False,
    'sub_direction': None,
    'sub_minute': None},
   {'number

## GET All matches

In [None]:
from bs4 import BeautifulSoup
import pandas as pd
import json
import re

# --------------------------
# Läs HTML-filen med alla matcher
# --------------------------
with open("spelscheman/Spelprogram - Svensk fotboll.html", "r", encoding="utf-8") as f:
    html_content = f.read()

soup = BeautifulSoup(html_content, "html.parser")

# --------------------------
# Hitta alla matcher
# --------------------------
matches = []
for a in soup.select("a.match-list__link"):
    url = a.get("href")

    home_team = a.select_one(".match-list__home .match-list__team-name").text.strip()
    away_team = a.select_one(".match-list__away .match-list__team-name").text.strip()
    
    # Scores
    home_score_text = a.select_one(".match-list__home .match-list__score").text.strip()
    away_score_text = a.select_one(".match-list__away .match-list__score").text.strip()
    
    home_score = int(home_score_text) if home_score_text.isdigit() else None
    away_score = int(away_score_text) if away_score_text.isdigit() else None

    # Datum och tid
    datetime_attr = a.select_one(".match-list__date")["datetime"]
    date_display = a.select_one(".match-list__date").text.strip()

    # Arena och publik
    venue = a.select_one(".match-list__location").text.strip()
    
    audience_text = a.select_one(".match-list__audience").text.strip()
    # Plocka bara siffror från publik-texten
    attendance_match = re.search(r'\d+', audience_text.replace(" ", ""))
    attendance = int(attendance_match.group()) if attendance_match else None

    matches.append({
        "home_team": home_team,
        "home_score": home_score,
        "away_team": away_team,
        "away_score": away_score,
        "datetime": datetime_attr,
        "date_display": date_display,
        "venue": venue,
        "attendance": attendance,
        "match_url": url
    })

# --------------------------
# Visa resultat
# --------------------------
print(f"Totalt {len(matches)} matcher hittade")
for m in matches[:5]:  # visar bara de första 5 för översikt
    print(m)

# --------------------------
# Spara som JSON
# --------------------------
with open("output-superettan/matcher.json", "w", encoding="utf-8") as f:
    json.dump(matches, f, ensure_ascii=False, indent=2)

# --------------------------
# Valfritt: Spara som DataFrame
# --------------------------
matches_df = pd.DataFrame(matches)


Totalt 240 matcher hittade
{'home_team': 'GIF Sundsvall', 'home_score': 2, 'away_team': 'Helsingborgs IF', 'away_score': 0, 'datetime': '2025-03-29T15:00+01:00', 'date_display': '15:00, 29 mars 2025', 'venue': 'NP3 Arena', 'attendance': 2382, 'match_url': 'https://www.svenskfotboll.se/go-to/?fmid=6144519'}
{'home_team': 'Sandvikens IF', 'home_score': 0, 'away_team': 'Kalmar FF', 'away_score': 0, 'datetime': '2025-03-29T15:00+01:00', 'date_display': '15:00, 29 mars 2025', 'venue': 'Jernvallen A-planen', 'attendance': 1656, 'match_url': 'https://www.svenskfotboll.se/go-to/?fmid=6144522'}
{'home_team': 'Örgryte IS Fotboll', 'home_score': 2, 'away_team': 'Utsikten', 'away_score': 1, 'datetime': '2025-03-29T15:00+01:00', 'date_display': '15:00, 29 mars 2025', 'venue': 'Gamla Ullevi', 'attendance': 4208, 'match_url': 'https://www.svenskfotboll.se/go-to/?fmid=6144525'}
{'home_team': 'Falkenbergs FF', 'home_score': 2, 'away_team': 'Varbergs BoIS FC', 'away_score': 2, 'datetime': '2025-03-29T17

In [56]:
matches_df

Unnamed: 0,home_team,home_score,away_team,away_score,datetime,date_display,venue,attendance,match_url
0,GIF Sundsvall,2,Helsingborgs IF,0,2025-03-29T15:00+01:00,"15:00, 29 mars 2025",NP3 Arena,2382,https://www.svenskfotboll.se/go-to/?fmid=6144519
1,Sandvikens IF,0,Kalmar FF,0,2025-03-29T15:00+01:00,"15:00, 29 mars 2025",Jernvallen A-planen,1656,https://www.svenskfotboll.se/go-to/?fmid=6144522
2,Örgryte IS Fotboll,2,Utsikten,1,2025-03-29T15:00+01:00,"15:00, 29 mars 2025",Gamla Ullevi,4208,https://www.svenskfotboll.se/go-to/?fmid=6144525
3,Falkenbergs FF,2,Varbergs BoIS FC,2,2025-03-29T17:00+01:00,"17:00, 29 mars 2025",Falcon Alkoholfri Arena,4231,https://www.svenskfotboll.se/go-to/?fmid=6144518
4,Landskrona BoIS,2,IK Brage,2,2025-03-30T13:00+02:00,"13:00, 30 mars 2025",Landskrona IP A-plan,2550,https://www.svenskfotboll.se/go-to/?fmid=6144521
...,...,...,...,...,...,...,...,...,...
235,Kalmar FF,1,Falkenbergs FF,2,2025-11-08T15:00+01:00,"15:00, 8 nov. 2025","Guldfågeln Arena, Kalmar",6789,https://www.svenskfotboll.se/go-to/?fmid=6144753
236,Sandvikens IF,5,Östersund,0,2025-11-08T15:00+01:00,"15:00, 8 nov. 2025",Jernvallen A-planen,1143,https://www.svenskfotboll.se/go-to/?fmid=6144754
237,Trelleborgs FF,1,Örebro,1,2025-11-08T15:00+01:00,"15:00, 8 nov. 2025","Vångavallen, Trelleborg A-plan",2149,https://www.svenskfotboll.se/go-to/?fmid=6144755
238,Utsikten,1,Umeå FC,4,2025-11-08T15:00+01:00,"15:00, 8 nov. 2025",Nordic Wellness Arena,111,https://www.svenskfotboll.se/go-to/?fmid=6144756


In [54]:
matches

[{'home_team': 'GIF Sundsvall',
  'away_team': 'Helsingborgs IF',
  'date': '15:00, 29 mars 2025',
  'location': 'NP3 Arena',
  'url': 'https://www.svenskfotboll.se/go-to/?fmid=6144519'},
 {'home_team': 'Sandvikens IF',
  'away_team': 'Kalmar FF',
  'date': '15:00, 29 mars 2025',
  'location': 'Jernvallen A-planen',
  'url': 'https://www.svenskfotboll.se/go-to/?fmid=6144522'},
 {'home_team': 'Örgryte IS Fotboll',
  'away_team': 'Utsikten',
  'date': '15:00, 29 mars 2025',
  'location': 'Gamla Ullevi',
  'url': 'https://www.svenskfotboll.se/go-to/?fmid=6144525'},
 {'home_team': 'Falkenbergs FF',
  'away_team': 'Varbergs BoIS FC',
  'date': '17:00, 29 mars 2025',
  'location': 'Falcon Alkoholfri Arena',
  'url': 'https://www.svenskfotboll.se/go-to/?fmid=6144518'},
 {'home_team': 'Landskrona BoIS',
  'away_team': 'IK Brage',
  'date': '13:00, 30 mars 2025',
  'location': 'Landskrona IP A-plan',
  'url': 'https://www.svenskfotboll.se/go-to/?fmid=6144521'},
 {'home_team': 'Trelleborgs FF',


## SLASK

## Player overview - old

### Block 5: Discipline/risk

In [None]:
players_enriched

Unnamed: 0,team,name,fplguid,player_type,was_substituted,sub_direction,sub_minute,sub_minute_int,goals,goal_minutes,...,tm_match_type,tm_match_score,profile_url,club,position,age,market_value,agent,player_query,height
0,Trelleborgs FF,Mathias Nilsson (MV),21531f26-5933-4af9-a3f2-87fd7bfe30cf,Starting,False,,,,0,[],...,no_match,,,,,,,,,
1,Trelleborgs FF,Emmanuel Godwin,448bd50d-c89e-4790-b4bf-3dd20f1ccd6d,Starting,False,,,,0,[],...,no_match,,,,,,,,,
2,Trelleborgs FF,Tobias Karlsson,9d5fd108-3160-4c01-8b96-cdda76bd4142,Starting,False,,,,0,[],...,no_match,,,,,,,,,
3,Trelleborgs FF,Charlie Weberg,3b59e9e5-b136-4310-ab98-a602f082dd75,Starting,False,,,,0,[],...,no_match,,,,,,,,,
4,Trelleborgs FF,Eren Alievski,c015f3a6-5c6c-4812-a11c-5a0614d1682e,Starting,False,,,,0,[],...,no_match,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1072,IK Brage,Lorik Konjuhi,f502e048-1178-474a-aab2-e0ab8f8713bb,Sub,True,in,88,88.0,0,[],...,exact_name_exact_club,100.0,https://www.transfermarkt.com/lorik-konjuhi/pr...,IK Brage,CB,23,75000.0,,lorik+konjuhi,
1073,IK Brage,Marinus Larsen,16bee41b-51df-48a2-aa2c-ded2d43c6579,Sub,True,in,88,88.0,0,[],...,exact_name_exact_club,100.0,https://www.transfermarkt.com/marinus-larsen/p...,IK Brage,CM,22,400000.0,People In Sport,marinus+larsen,
1074,IK Brage,Pontus Jonsson,c2d33e21-d2f9-447a-98c8-6bc7815073ab,Sub,True,in,60,60.0,0,[],...,exact_name_exact_club,100.0,https://www.transfermarkt.com/pontus-jonsson/p...,IK Brage,CF,24,150000.0,Global Football Consulting,pontus+jonsson,
1075,IK Brage,Noah Östberg,bd70422a-62a3-46b2-8b40-d8e415e6025a,Sub,True,in,74,74.0,0,[],...,exact_name_exact_club,100.0,https://www.transfermarkt.com/noah-ostberg/pro...,IK Brage,RB,20,125000.0,,noah+östberg,"1,82 m"


In [None]:
discipline_risk = (players_enriched
    [players_enriched["team"] == "IK Brage"]
    .groupby("name")
    .agg(
        yellow_cards = ("yellow_card", "sum"),
        red_card = ("red_card", "sum"),
        market_value = ("market_value", "max")
    )
    .reset_index()
    .sort_values("yellow_cards", ascending = False)
)

discipline_risk

Unnamed: 0,name,yellow_cards,red_card,market_value
11,Jacob Stensson,8,0,200000.0
3,Anton Lundin,8,0,100000.0
1,Alexander Zetterström,6,1,400000.0
7,Filip Trpcevski,5,0,300000.0
10,Haris Brkic,5,0,450000.0
17,Noah Åstrand John,4,0,150000.0
22,Teodor Wålemark,4,0,125000.0
8,Gustav Berggren,4,0,400000.0
14,Malte Persson,3,0,125000.0
4,Cesar Weilid,3,0,


In [None]:
fig = px.bar(discipline_risk, x = "name", y = "yellow_cards", hover_name="name", color = "name",
                 title = "Discipline/risk")

fig.update_layout(
    xaxis_title="",
    yaxis_title="Gula kort",
    legend_title = "Spelare"
    )

fig.show()

In [None]:
fig = px.scatter(discipline_risk, x = "market_value", y = "yellow_cards", hover_name="name", color = "name",
                 title = "Discipline/risk")

fig.update_layout(
    xaxis_title="Marknadsvärde (Transfermarkt)",
    yaxis_title="Gula kort",
    legend_title = "Spelare"
    )

fig.show()

### Block 4: Overall impact

In [None]:
players_enriched

Unnamed: 0,team,name,fplguid,player_type,was_substituted,sub_direction,sub_minute,sub_minute_int,goals,goal_minutes,...,tm_match_type,tm_match_score,profile_url,club,position,age,market_value,agent,player_query,height
0,Trelleborgs FF,Mathias Nilsson (MV),21531f26-5933-4af9-a3f2-87fd7bfe30cf,Starting,False,,,,0,[],...,no_match,,,,,,,,,
1,Trelleborgs FF,Emmanuel Godwin,448bd50d-c89e-4790-b4bf-3dd20f1ccd6d,Starting,False,,,,0,[],...,no_match,,,,,,,,,
2,Trelleborgs FF,Tobias Karlsson,9d5fd108-3160-4c01-8b96-cdda76bd4142,Starting,False,,,,0,[],...,no_match,,,,,,,,,
3,Trelleborgs FF,Charlie Weberg,3b59e9e5-b136-4310-ab98-a602f082dd75,Starting,False,,,,0,[],...,no_match,,,,,,,,,
4,Trelleborgs FF,Eren Alievski,c015f3a6-5c6c-4812-a11c-5a0614d1682e,Starting,False,,,,0,[],...,no_match,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1072,IK Brage,Lorik Konjuhi,f502e048-1178-474a-aab2-e0ab8f8713bb,Sub,True,in,88,88.0,0,[],...,exact_name_exact_club,100.0,https://www.transfermarkt.com/lorik-konjuhi/pr...,IK Brage,CB,23,75000.0,,lorik+konjuhi,
1073,IK Brage,Marinus Larsen,16bee41b-51df-48a2-aa2c-ded2d43c6579,Sub,True,in,88,88.0,0,[],...,exact_name_exact_club,100.0,https://www.transfermarkt.com/marinus-larsen/p...,IK Brage,CM,22,400000.0,People In Sport,marinus+larsen,
1074,IK Brage,Pontus Jonsson,c2d33e21-d2f9-447a-98c8-6bc7815073ab,Sub,True,in,60,60.0,0,[],...,exact_name_exact_club,100.0,https://www.transfermarkt.com/pontus-jonsson/p...,IK Brage,CF,24,150000.0,Global Football Consulting,pontus+jonsson,
1075,IK Brage,Noah Östberg,bd70422a-62a3-46b2-8b40-d8e415e6025a,Sub,True,in,74,74.0,0,[],...,exact_name_exact_club,100.0,https://www.transfermarkt.com/noah-ostberg/pro...,IK Brage,RB,20,125000.0,,noah+östberg,"1,82 m"


In [None]:
overall_impact = (players_enriched
    [players_enriched["team"] == "IK Brage"]
    .groupby("name")
    .agg(
        plus_minus_raw = ("plus_minus_raw", "sum"),
        plus_minus_per90 = ("plus_minus_per90", "mean"),
        on_off_diff = ("on_off_diff", "sum"),
        plus_minus_impact_mean = ("plus_minus_impact", "mean"),
        market_value = ("market_value", "max")
    )
    .reset_index()
    .sort_values("plus_minus_raw", ascending = False)
)

overall_impact

Unnamed: 0,name,plus_minus_raw,plus_minus_per90,on_off_diff,plus_minus_impact_mean,market_value
17,Noah Åstrand John,13.0,2.330997,20.0,0.018182,150000.0
8,Gustav Berggren,11.0,0.406546,15.0,-0.021429,400000.0
4,Cesar Weilid,10.0,0.494852,13.0,0.109524,
20,Pontus Jonsson,7.0,2.270251,12.0,0.168182,150000.0
23,Viktor Frodig,6.0,0.189207,8.0,-0.08,325000.0
2,Amar Muhsin,5.0,0.221937,9.0,0.782759,450000.0
9,Gustav Nordh,5.0,0.234802,8.0,0.383333,350000.0
13,Lorik Konjuhi,4.0,-0.811917,0.0,-0.088235,75000.0
7,Filip Trpcevski,1.0,0.055015,1.0,0.335714,300000.0
1,Alexander Zetterström,1.0,0.029131,1.0,-0.142857,400000.0


In [None]:
fig = px.scatter(overall_impact, x = "on_off_diff", y = "plus_minus_impact_mean", hover_name="name", color = "name",
                 title = "Overall impact")

fig.update_layout(
    # xaxis_title="Hållda nollor när på plan",
    # yaxis_title="Insläppta mål när på plan",
    legend_title = "Spelare"
    )

fig.show()

In [None]:
fig = px.scatter(overall_impact, x = "market_value", y = "plus_minus_impact_mean", hover_name="name", color = "name",
                 title = "Overall impact")

fig.update_layout(
    # xaxis_title="Hållda nollor när på plan",
    # yaxis_title="Insläppta mål när på plan",
    legend_title = "Spelare"
    )

fig.show()

### Block 3: Defensive impact

In [None]:
players_enriched

Unnamed: 0,team,name,fplguid,player_type,was_substituted,sub_direction,sub_minute,sub_minute_int,goals,goal_minutes,...,tm_match_type,tm_match_score,profile_url,club,position,age,market_value,agent,player_query,height
0,Trelleborgs FF,Mathias Nilsson (MV),21531f26-5933-4af9-a3f2-87fd7bfe30cf,Starting,False,,,,0,[],...,no_match,,,,,,,,,
1,Trelleborgs FF,Emmanuel Godwin,448bd50d-c89e-4790-b4bf-3dd20f1ccd6d,Starting,False,,,,0,[],...,no_match,,,,,,,,,
2,Trelleborgs FF,Tobias Karlsson,9d5fd108-3160-4c01-8b96-cdda76bd4142,Starting,False,,,,0,[],...,no_match,,,,,,,,,
3,Trelleborgs FF,Charlie Weberg,3b59e9e5-b136-4310-ab98-a602f082dd75,Starting,False,,,,0,[],...,no_match,,,,,,,,,
4,Trelleborgs FF,Eren Alievski,c015f3a6-5c6c-4812-a11c-5a0614d1682e,Starting,False,,,,0,[],...,no_match,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1072,IK Brage,Lorik Konjuhi,f502e048-1178-474a-aab2-e0ab8f8713bb,Sub,True,in,88,88.0,0,[],...,exact_name_exact_club,100.0,https://www.transfermarkt.com/lorik-konjuhi/pr...,IK Brage,CB,23,75000.0,,lorik+konjuhi,
1073,IK Brage,Marinus Larsen,16bee41b-51df-48a2-aa2c-ded2d43c6579,Sub,True,in,88,88.0,0,[],...,exact_name_exact_club,100.0,https://www.transfermarkt.com/marinus-larsen/p...,IK Brage,CM,22,400000.0,People In Sport,marinus+larsen,
1074,IK Brage,Pontus Jonsson,c2d33e21-d2f9-447a-98c8-6bc7815073ab,Sub,True,in,60,60.0,0,[],...,exact_name_exact_club,100.0,https://www.transfermarkt.com/pontus-jonsson/p...,IK Brage,CF,24,150000.0,Global Football Consulting,pontus+jonsson,
1075,IK Brage,Noah Östberg,bd70422a-62a3-46b2-8b40-d8e415e6025a,Sub,True,in,74,74.0,0,[],...,exact_name_exact_club,100.0,https://www.transfermarkt.com/noah-ostberg/pro...,IK Brage,RB,20,125000.0,,noah+östberg,182.0


In [None]:
def_impact = (players_enriched
    [players_enriched["team"] == "IK Brage"]
    .groupby("name")
    .agg(
        ga_on = ("ga_on", "sum"),
        ga90 = ("ga90", "sum"),
        clean_sheets = ("clean_sheet", "sum"),
        market_value = ("market_value", "max")
    )
    .reset_index()
    .sort_values("clean_sheets", ascending = False)
)

def_impact

Unnamed: 0,name,ga_on,ga90,clean_sheets,market_value
20,Pontus Jonsson,8.0,26.433587,18.0,150000.0
17,Noah Åstrand John,16.0,23.725295,12.0,150000.0
15,Marinus Larsen,12.0,49.509191,11.0,400000.0
3,Anton Lundin,27.0,40.933115,10.0,100000.0
14,Malte Persson,30.0,46.834457,8.0,125000.0
10,Haris Brkic,38.0,44.477365,8.0,450000.0
9,Gustav Nordh,36.0,45.177305,7.0,350000.0
13,Lorik Konjuhi,21.0,38.074012,7.0,75000.0
4,Cesar Weilid,23.0,25.296701,7.0,
6,Emil Tot Wikström,10.0,17.669014,7.0,250000.0


In [None]:
fig = px.scatter(def_impact, x = "clean_sheets", y = "ga_on", hover_name="name", color = "name",
                 title = "Defensiv impact")

fig.update_layout(
    xaxis_title="Hållda nollor när på plan",
    yaxis_title="Insläppta mål när på plan",
    legend_title = "Spelare"
    )

fig.show()

In [None]:
fig = px.scatter(def_impact, x = "market_value", y = "ga_on", hover_name="name", color = "name",
                 title = "Defensiv impact")

fig.update_layout(
    xaxis_title="Marknadsvärde (Transfermarkt)",
    yaxis_title="Insläppta mål för när på plan",
    legend_title = "Spelare"
    )

fig.show()

### Block 2: Offensive impact

In [None]:
off_impact = (players_enriched
    [players_enriched["team"] == "IK Brage"]
    .groupby("name")
    .agg(
        mål = ("goals", "sum"),
        assists = ("assists", "sum"),
        sena_mål = ("clutch_goals", "sum"),
        on_goal_diff = ("on_goal_diff", "sum"),
        gf_on = ("gf_on", "sum"),
        market_value = ("market_value", "max")
    )
    .reset_index()
    .assign(
        poäng_p90 = lambda df: ((df["mål"] + df["assists"]) / 90).round(2)
    )
    .sort_values("mål", ascending = False)
)

In [None]:
off_impact

Unnamed: 0,name,mål,assists,sena_mål,on_goal_diff,gf_on,market_value,poäng_p90
2,Amar Muhsin,21,5,5.0,5.0,45.0,450000.0,0.29
7,Filip Trpcevski,7,7,4.0,1.0,43.0,300000.0,0.16
9,Gustav Nordh,5,10,0.0,5.0,41.0,350000.0,0.17
15,Marinus Larsen,4,0,1.0,0.0,12.0,400000.0,0.04
3,Anton Lundin,4,2,2.0,0.0,27.0,100000.0,0.07
20,Pontus Jonsson,3,1,3.0,7.0,15.0,150000.0,0.04
22,Teodor Wålemark,2,0,1.0,-3.0,32.0,125000.0,0.02
10,Haris Brkic,2,4,0.0,-4.0,34.0,450000.0,0.07
4,Cesar Weilid,1,4,0.0,10.0,33.0,,0.06
17,Noah Åstrand John,1,2,0.0,13.0,29.0,150000.0,0.03


In [None]:
fig = px.scatter(off_impact, x = "poäng_p90", y = "gf_on", hover_name="name", color = "name",
                 title = "Offensiv impact")

fig.update_layout(
    xaxis_title="Antal poäng per 90",
    yaxis_title="Mål för när på plan",
    legend_title = "Spelare"
    )

fig.show()

In [None]:
fig = px.scatter(off_impact, x = "market_value", y = "gf_on", hover_name="name", color = "name",
                 title = "Offensiv impact")

fig.update_layout(
    xaxis_title="Marknadsvärde (Transfermarkt)",
    yaxis_title="Mål för när på plan",
    legend_title = "Spelare"
    )

fig.show()

### Block 1: Availability - old

In [None]:
availability = (players_enriched
    [players_enriched["team"] == "IK Brage"]
    .groupby("name")
    .agg(
        position = ("position", "max"),
        längd = ("height", "max"),
        matcher = ("matches_played", "sum"),
        minuter = ("minutes_played", "sum"),
        starter = ("player_type", lambda x: (x == "Starting").sum())
    )
    .reset_index()
    .assign(
        usage_rate = lambda df: (df["minuter"] / (df["matcher"] * 95)).round(2),
        starter_rate = lambda df: (df["starter"] / df["matcher"]).round(2)
    )
    .sort_values("usage_rate", ascending = False)
)

print(availability.head(5))

# Visar hur ofta och hur mycket spelare används
fig = px.scatter(availability, x = "matcher", y = "usage_rate", 
                 size = "minuter", color = "starter_rate",
                 title = "Är spelaren tillgänglig ofta – och används han när han är det?",
                 hover_name="name")

fig.update_layout(
    xaxis_title="Matcher",
    yaxis_title="Användningsgrad",
    xaxis_tickangle=-45,
    legend_title = "Andel starter"
    )

# Från ovan kan klicka på spelare -> tar en till grafik nedan

player_name = "Pontus Jonsson"
m = players_enriched[
    (players_enriched["team"]=="IK Brage") &
    (players_enriched["name"] == player_name) &
    (players_enriched["minutes_played"] > 0)
].copy()

# SÄKERSTÄLL NUMERISKT
m["start_minute"] = m["start_minute"].astype(float)
m["end_minute"] = m["end_minute"].astype(float)

fig = px.bar(m, 
    base = "start_minute",
    x = "minutes_played",
    y = "match_id_short",
    orientation = 'h',
    color = "player_type",
    hover_name="name"
)

fig.update_yaxes(autorange="reversed")

fig.update_xaxes(
    type="linear",
    title="Matchminut",
    range=[0, m["match_length"].max()],
    dtick=5
)

fig.update_layout(
    title=f"Speltid för {player_name}",
    height=400
)

fig.add_vline(x=45, line_dash="dash")
fig.add_vline(x=90, line_dash="dash")

fig.show()

import plotly.express as px

fig = px.bar(availability, x = "name", y = "usage_rate", title = "Användningsgrad per spelare")


fig.update_layout(
    xaxis_title="",
    yaxis_title="Användningsgrad",
    xaxis_tickangle=-45,
    legend_title = ""
    )

fig.show()

availability_plot = (players_df
    [players_df["team"] == "IK Brage"]
    .groupby(["name", "match_id_short"])
    .agg(
        minuter = ("minutes_played", "sum"),
    )
    .reset_index()
    .sort_values("minuter", ascending = False)
)

match_order_short = (
    players_df
        .drop_duplicates("match_id_short")
        ["match_id_short"]
        .tolist()
)

availability_plot["match_id_short"] = pd.Categorical(
    availability_plot["match_id_short"],
    categories=match_order_short,
    ordered=True
)

availability_plot = availability_plot.sort_values("match_id_short")

all_matches = match_order_short

fixed = []

for name in availability_plot["name"].unique():
    
    sub = availability_plot[availability_plot["name"] == name].set_index("match_id_short")
    
    sub = sub.reindex(all_matches)
    sub["name"] = name
    sub["match_id_short"] = sub.index
    
    fixed.append(sub.reset_index(drop=True))

plot_df = pd.concat(fixed, ignore_index=True)

plot_var = "minuter"

fig = px.line(
    plot_df,
    x="match_id_short",
    y=plot_var,
    color="name",
    markers=True,
    title=f"{plot_var} per match",
    category_orders={
        "match_id_short": match_order_short
    }
)

fig.update_layout(
    xaxis_title="",
    yaxis_title=plot_var,
    xaxis_tickangle=-45,
    legend_title = ""
)

fig.update_yaxes(range=[0, 100])

fig.show()



In [None]:
def calculate_plus_minus(players_df, goals_df):

    df = players_df.copy()
    df["plus_minus"] = 0

    # För snabb team-lookup
    scorer_team_lookup = {}

    for _, r in df.iterrows():
        scorer_team_lookup[(r["match_id"], r["name"])] = r["team"]

    for i, p in df.iterrows():

        match = p["match_id"]
        team = p["team"]

        goals = goals_df[goals_df["match_id"] == match]

        pm = 0

        for _, g in goals.iterrows():

            m = g["minute_int"]

            # 🔑 KORREKT intervall
            if not (p["start_minute"] <= m <= p["end_minute"]):
                continue

            scorer = g["scorer"]

            key = (match, scorer)
            if key not in scorer_team_lookup:
                continue

            goal_team = scorer_team_lookup[key]

            if goal_team == team:
                pm += 1
            else:
                pm -= 1

        df.at[i,"plus_minus"] = pm

    return df


players_df = calculate_plus_minus(players_df, goals_df)
players_df

In [None]:

# # =====================
# # TIDSHANTERING
# # =====================
# def parse_minute(txt):
#     if not txt:
#         return None

#     txt = txt.replace("'", "").strip()

#     if "+" in txt:
#         base, extra = txt.split("+")
#         return int(base) + int(extra)

#     return int(txt)


# class MatchTeamParser:

#     # =====================
#     # MAIN
#     # =====================
#     def parse_team_info(self, html_content):

#         soup = BeautifulSoup(html_content, 'html.parser')

#         match_data = {
#             'teams': {},
#             'lineups': {},
#             'substitutes': {},
#             'goals': [],
#             'match_length': None
#         }

#         # =====================
#         # Matchlängd
#         # =====================
#         match_data["match_length"] = self.calculate_match_length(soup)

#         # =====================
#         # Assists & mål
#         # =====================
#         assists = self.parse_assists_from_events(soup)
#         goals = self.parse_goals_from_events(soup)
#         match_data["goals"] = goals

#         # Lookup mål
#         goal_map = {}
#         for g in goals:
#             fid = g["scorer_fplguid"]
#             if fid:
#                 goal_map.setdefault(fid, []).append(g["minute_int"])

#         # =====================
#         # Lineups
#         # =====================
#         team_sections = soup.find_all('section', class_='formation-list')

#         for idx, section in enumerate(team_sections):
#             team_key = 'home' if idx == 0 else 'away'

#             team_name = section.find('h3', class_='formation-list__team')
#             if team_name:
#                 match_data['teams'][team_key] = team_name.text.strip()

#             starters, subs = [], []

#             lists = section.find_all('ul', class_='formation-list__items')

#             for ul in lists:
#                 prev = ul.find_previous_sibling('h4', class_='formation-list__section-headline')
#                 is_sub_list = prev and 'Ersättare' in prev.text

#                 for li in ul.find_all('li', class_='formation-list__item'):
#                     div = li.find('div', class_='formation-list-player')
#                     if not div:
#                         continue

#                     p = {}

#                     # Nummer
#                     num = div.find('span', class_='formation-list-player__number')
#                     p['number'] = num.text.strip() if num else None

#                     # Namn + fplguid
#                     link = div.find('a', class_='formation-list-player__link')
#                     if link:
#                         p['name'] = link.text.strip()
#                         href = link.get('href','')
#                         p['fplguid'] = href.split('fplguid=')[-1] if 'fplguid=' in href else None

#                     # Init
#                     p.update({
#                         'goals': 0,
#                         'goal_minutes': [],
#                         'assists': 0,
#                         'assists_minutes': [],
#                         'yellow_card': False,
#                         'red_card': False,
#                         'was_substituted': False,
#                         'sub_direction': None,
#                         'sub_minute': None,
#                         'sub_minute_int': None
#                     })

#                     # EVENTS
#                     events = div.find('span', class_='formation-list-player__events')
#                     if events:

#                         if events.find('use', {'xlink:href': lambda x: x and 'icon-card-yellow' in x}):
#                             p['yellow_card'] = True

#                         if events.find('use', {'xlink:href': lambda x: x and 'icon-card-red' in x}):
#                             p['red_card'] = True

#                         # Byten
#                         sub_wrap = events.find('span', class_='formation-list-player__substitution-wrapper')
#                         if sub_wrap:
#                             icon = sub_wrap.find('use')
#                             href = icon.get('xlink:href','') if icon else ''

#                             txt = sub_wrap.find('span', class_='formation-list-player__substitution-text')
#                             minute = txt.text.strip() if txt else None

#                             if 'substitution-out' in href:
#                                 p['was_substituted'] = True
#                                 p['sub_direction'] = 'out'
#                                 p['sub_minute'] = minute
#                                 p['sub_minute_int'] = parse_minute(minute)

#                             elif 'substitution-in' in href:
#                                 p['was_substituted'] = True
#                                 p['sub_direction'] = 'in'
#                                 p['sub_minute'] = minute
#                                 p['sub_minute_int'] = parse_minute(minute)

#                     # Koppla assists
#                     if p.get('fplguid') in assists:
#                         p['assists'] = len(assists[p['fplguid']])
#                         p['assists_minutes'] = assists[p['fplguid']]

#                     # Koppla mål
#                     if p.get('fplguid') in goal_map:
#                         p['goals'] = len(goal_map[p['fplguid']])
#                         p['goal_minutes'] = goal_map[p['fplguid']]

#                     subs.append(p) if is_sub_list else starters.append(p)

#             match_data['lineups'][team_key] = starters
#             match_data['substitutes'][team_key] = subs

#         return match_data

#     # =====================
#     # DataFrame
#     # =====================
#     def create_lineup_dataframe(self, match_data):

#         rows = []

#         for side in ['home','away']:
#             team = match_data['teams'].get(side)

#             for p in match_data['lineups'][side]:
#                 rows.append(self._row(team,p,"Starting"))

#             for p in match_data['substitutes'][side]:
#                 rows.append(self._row(team,p,"Sub"))

#         return pd.DataFrame(rows)

#     def _row(self, team, p, role):
#         return {
#             'team': team,
#             'name': p.get('name'),
#             'fplguid': p.get('fplguid'),
#             'player_type': role,
#             'was_substituted': p.get('was_substituted'),
#             'sub_direction': p.get('sub_direction'),
#             'sub_minute': p.get('sub_minute'),
#             'sub_minute_int': p.get('sub_minute_int'),
#             'goals': p.get('goals'),
#             'goal_minutes': p.get('goal_minutes'),
#             'assists': p.get('assists'),
#             'assists_minutes': p.get('assists_minutes'),
#             'yellow_card': p.get('yellow_card'),
#             'red_card': p.get('red_card')
#         }

#     # =====================
#     # Assists
#     # =====================
#     def parse_assists_from_events(self, soup):

#         assists = {}
#         events = soup.find_all('li', class_='match-events__item')

#         for e in events:
#             sec = e.find('a', class_='match-event__info-secondary-link')
#             if sec and 'Assist:' in sec.text:
#                 href = sec.get('href','')
#                 if 'fplguid=' in href:
#                     fid = href.split('fplguid=')[-1]
#                     t = e.find('span', class_='match-event__time')
#                     minute = t.text.strip() if t else None
#                     assists.setdefault(fid,[]).append(parse_minute(minute))

#         return assists

#     # =====================
#     # Mål
#     # =====================
#     def parse_goals_from_events(self, soup):

#         goals = []
#         events = soup.find_all('li', class_='match-events__item')

#         for e in events:
#             icon = e.find('use')
#             if not icon:
#                 continue

#             if 'icon-football' not in icon.get('xlink:href',''):
#                 continue

#             # minut
#             t = e.find('span', class_='match-event__time')
#             minute_txt = t.text.strip() if t else None
#             minute_int = parse_minute(minute_txt)

#             info = e.find('a', class_='match-event__info-main-link')
#             if not info:
#                 continue

#             text = info.text.strip()
#             score = text.split()[0]
#             name = text.split(".",1)[-1].strip()

#             href = info.get('href','')
#             fid = href.split('fplguid=')[-1] if 'fplguid=' in href else None

#             # assist
#             assist_elem = e.find('a', class_='match-event__info-secondary-link')
#             assist = None
#             assist_fid = None

#             if assist_elem and "Assist:" in assist_elem.text:
#                 assist = assist_elem.text.replace("Assist:","").strip()
#                 ah = assist_elem.get("href","")
#                 assist_fid = ah.split('fplguid=')[-1] if 'fplguid=' in ah else None

#             # straff
#             penalty = False
#             pen = e.find('div', class_='match-event__info-secondary')
#             if pen and "Straff" in pen.text:
#                 penalty = True

#             goals.append({
#                 "minute": minute_txt,
#                 "minute_int": minute_int,
#                 "score": score,
#                 "scorer": name,
#                 "scorer_fplguid": fid,
#                 "assist": assist,
#                 "assist_fplguid": assist_fid,
#                 "penalty": penalty
#             })

#         return goals

#     # =====================
#     # Matchlängd
#     # =====================
#     def calculate_match_length(self, soup):

#         minutes = []

#         for t in soup.find_all('span', class_='match-event__time'):
#             m = parse_minute(t.text)
#             if m:
#                 minutes.append(m)

#         if not minutes:
#             return 90

#         max_min = max(minutes)

#         # om inga sena events -> normal match
#         if max_min < 90:
#             return 90

#         return max_min


# def build_match_id(filename):
#     return filename.replace(".html","")
