In [294]:
passers = ["Alex", "Boerme", "Bosse", "Frederik", "Mikkel", "Nico Lang", "Nicola", "Vestbjerg", "Hjorth"]


In [7]:
import pandas as pd

path_webreports = "data/web-reports/WebReports-Export-2025-8-29.csv"

In [142]:
import pandas as pd
import re

with open(path_webreports, "r", encoding="utf-8") as f:
    lines = f.readlines()

games_data = []
current_game = None
columns = []

for line in lines:
    line = line.strip()
    if line.startswith("GAME"):
        # Extract set number from GAME1, GAME2, etc.
        game_id, game_info = line.split(":", 1)
        set_number = int(re.search(r"GAME(\d+)", game_id).group(1))
        
        current_game = {"set_number": set_number}
        
        # Parse metadata (venue, opponent, date, score)
        info_parts = game_info.strip().split("-")
        venue = info_parts[0]
        opponent = info_parts[1]
        rest = "-".join(info_parts[2:])
        date_part, score = rest.rsplit("(", 1)
        date = date_part.strip()
        score = score.replace(")", "").strip()
        
        current_game["venue"] = venue
        current_game["opponent"] = opponent
        current_game["date"] = date
        current_game["set_score"] = score
        current_game["rows"] = []
        columns = []
    elif line.startswith("PLAYER_NUM") and current_game is not None:
        columns = line.split(",")
    elif current_game is not None and line:
        current_game["rows"].append(line.split(","))
    elif line == "" and current_game is not None and current_game["rows"]:
        df_game = pd.DataFrame(current_game["rows"], columns=columns)
        # Attach metadata
        for key, value in current_game.items():
            if key not in ["rows"]:
                df_game[key] = value
        games_data.append(df_game)
        current_game = None
        columns = []

# Last block handling
if current_game is not None and current_game["rows"]:
    df_game = pd.DataFrame(current_game["rows"], columns=columns)
    for key, value in current_game.items():
        if key not in ["rows"]:
            df_game[key] = value
    games_data.append(df_game)

# Combine all sets into one dataframe
df_all = pd.concat(games_data, ignore_index=True)

# Make sure date is proper datetime and season extracted
df_all["date"] = pd.to_datetime(df_all["date"], errors="coerce")

# --- 2. Add season ---
def get_season(date):
    if pd.isna(date):
        return None
    year = date.year
    month = date.month
    if month >= 9:
        return f"{year}-{year+1}"
    else:
        return f"{year-1}-{year}"

df_all["season"] = df_all["date"].apply(get_season)

df_all["kampnr"] = df_all["venue"].astype(int)
df_all = df_all.drop(columns=["venue"])

df_all.drop(columns=["date", "PLAYER_NUM"], inplace=True)

df_all = df_all[df_all["PLAYER_NAME"] != "(team)"]


In [None]:
df_all.to_csv("match-statistics.csv", index=False)

df_all.groupby("kampnr")[""]

Unnamed: 0,PLAYER_NAME,GAMES_PLAYED,POINTS_SCORED_ON_SERVE,PT_SCORE_PERCENT,ACES,SERVE_ERRORS,SERVE_ERROR_PERCENT,SERVE_NET_POINTS,PASS_RATING,PASS_ATTEMPTS,...,NET_BLOCKS,DIG_ERRORS,DIGS,DIG_ERROR_PERCENT,FAULTS,set_number,opponent,set_score,season,kampnr
0,Lasse Nielsen,1,1,25.0%,1,0,0.0%,1,,0,...,0,0,4,0.0%,0,1,Gentofte Volley.2,25 - 19,2024-2025,141063
1,Kristian Krag,1,1,33.3%,0,0,0.0%,0,,0,...,0,0,1,0.0%,0,1,Gentofte Volley.2,25 - 19,2024-2025,141063
2,Boerme,1,2,50.0%,0,1,25.0%,-1,2.67,3,...,-1,1,2,33.3%,0,1,Gentofte Volley.2,25 - 19,2024-2025,141063
3,Bo,1,1,33.3%,0,0,0.0%,0,,0,...,1,0,0,,1,1,Gentofte Volley.2,25 - 19,2024-2025,141063
4,Vestbjerg,1,5,71.4%,0,0,0.0%,0,1.40,10,...,0,1,2,33.3%,0,1,Gentofte Volley.2,25 - 19,2024-2025,141063
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
480,Nicola,1,1,33.3%,0,0,0.0%,0,2.20,5,...,0,0,1,0.0%,0,3,Amager,23 - 25,2025-2026,144591
481,Gustav,1,3,60.0%,1,1,20.0%,0,,0,...,0,0,0,,0,3,Amager,23 - 25,2025-2026,144591
482,Martin,1,0,0.0%,0,1,50.0%,-1,,0,...,0,0,3,0.0%,0,3,Amager,23 - 25,2025-2026,144591
483,Alex,1,0,0.0%,0,0,0.0%,0,,0,...,0,0,2,0.0%,0,3,Amager,23 - 25,2025-2026,144591


In [253]:
import pandas as pd

with open(path_webreports, "r", encoding="utf-8") as f:
    lines = f.readlines()

agg_data = []
current_block = None
columns = []

for line in lines:
    line = line.strip()
    
    if line.startswith("AGGREGATE"):
        # Start a new aggregate block
        current_block = {"rows": []}
        columns = []
        
    elif line.startswith("PLAYER_NUM") and current_block is not None:
        # Header line
        columns = line.split(",")
        
    elif current_block is not None and line:
        # Data rows
        current_block["rows"].append(line.split(","))
        
    elif line == "" and current_block is not None and current_block["rows"]:
        # End of block → convert to DataFrame
        df_agg = pd.DataFrame(current_block["rows"], columns=columns)
        agg_data.append(df_agg)
        current_block = None
        columns = []

# Handle last block if file doesn't end with blank line
if current_block is not None and current_block["rows"]:
    df_agg = pd.DataFrame(current_block["rows"], columns=columns)
    agg_data.append(df_agg)



# Combine all aggregate blocks
df_agg = pd.concat(agg_data, ignore_index=True)
df_agg = df_agg[df_agg["PLAYER_NAME"] != "(team)"]
df_agg = df_agg.drop(columns=["PLAYER_NUM"])
df_agg

passers = ["Nico Lang", "Bosse", "Boerme", "Mikkel", ""]

df_agg["PASS_ATTEMPTS"] = pd.to_numeric(df_agg["PASS_ATTEMPTS"], errors="coerce")
df_passing = df_agg[df_agg["PASS_ATTEMPTS"] > 10]
df_passing = df_passing[["PLAYER_NAME", "GAMES_PLAYED", "PASS_RATING", "PASS_ATTEMPTS", "SERVE_RCV_ERRORS"]]
df_passing

Unnamed: 0,PLAYER_NAME,GAMES_PLAYED,PASS_RATING,PASS_ATTEMPTS,SERVE_RCV_ERRORS
4,Bosse,28,1.73,43,3
5,Boerme,22,1.86,84,7
7,Mikkel,13,1.71,65,5
10,Nicola,41,1.94,219,8
11,Vestbjerg,37,1.69,227,21
15,Alex,47,1.92,235,16
16,Hjorth,3,2.43,21,0


In [173]:
from pathlib import Path

parent_dir = Path("/Users/alexandercappelen/Documents/GitHub/frederiksberg-elite-stats/data/web-reports")

# list everything inside
for f in parent_dir.iterdir():
    print(f.name, "(dir)" if f.is_dir() else "(file)")


.DS_Store (file)
Frederiksberg-player_ranking-2025-10-2.csv (file)
WebReports-Export-2025-8-29.csv (file)


In [None]:
from pathlib import Path
import pandas as pd
import re
from datetime import datetime

# folder containing the files
folder = Path("data/web-reports")

# pattern: starts with "Frederiksberg-player_ranking" and ends with "-YYYY-MM-DD.csv"
pattern = re.compile(r"Frederiksberg-player_ranking-(\d{4}-\d{1,2}-\d{1,2})\.csv$")

files_with_dates = []
for f in folder.iterdir():
    if f.is_file():
        match = pattern.match(f.name)
        if match:
            date_str = match.group(1)
            # parse date
            file_date = datetime.strptime(date_str, "%Y-%m-%d")
            files_with_dates.append((file_date, f))

if not files_with_dates:
    raise FileNotFoundError("No matching CSV files found in the folder")

# pick the file with the latest date
latest_file = max(files_with_dates, key=lambda x: x[0])[1]
latest_file


PosixPath('data/web-reports/Frederiksberg-player_ranking-2025-10-2.csv')

In [None]:
# Finding passing stats for player for combined seasons (no seasonal data)

import pandas as pd
from io import StringIO

# Assume latest_file is a string with the path to your CSV
with open(latest_file, "r", encoding="utf-8") as f:
    lines = f.readlines()

# Find the start and end of the passing section
start_idx = None
end_idx = None
for i, line in enumerate(lines):
    if line.startswith("Player Number, Player Name, Games Played, Points Played, Got Aced/Game"):
        start_idx = i
    elif start_idx is not None and line.startswith("Player Number, Player Name, Games Played, Points Played, Kills/Game"):
        end_idx = i
        break

# Extract only the passing section
pass_lines = lines[start_idx:end_idx]

# Convert to DataFrame
pass_df = pd.read_csv(StringIO("".join(pass_lines)))

pass_df.columns = pass_df.columns.str.strip().str.replace(' ', '_')


pass_df = pass_df.rename(columns={
    "Player_Number" : "PLAYER_NUMBER",
    "Player_Name" : "player",
    "Games_Played" : "SETS_PLAYED",
    "Points_Played" : "POINTS_PLAYED",
    "Got_Aced/Game" : "GOT_ACED_PER_SET",
    "Passes/Game" : "PASSES_PER_SET",
    "Perfect_Passes/Game" : "PERFECT_PASSES_PER_SET",
    "Pass_Rating" : "average_pass_rating",
    "Total_Pass_Error_%" : "error_percentage",
    "3-pass_Percent": "perfect_percentage",
    "Pass_Attempts": "pass-attempt",
    "1-pass": "ONE_PASS",
    "2-pass": "two_pass",
    "3-pass": "three_pass",
    "Over_the_Net_Pass": "OVER_THE_NET_PASS",
    "Got_Aced": "GOT_ACED",
    "Total_Pass_Errors": "TOTAL_PASS_ERRORS",
    "First_Ball_Side_Out_%": "FIRST_BALL_SIDE_OUT_PERCENT"
})


pass_df = pass_df.drop(columns=["PLAYER_NUMBER"])

passers = ["Alex", "Boerme", "Bosse", "Frederik", "Mikkel", "Nico Lang", "Nicola", "Vestbjerg", "Hjorth", "Anton"]

pass_df["player"] = (
    pass_df["player"]
    .astype(str)               # ensure strings
    .str.strip()               # remove leading/trailing spaces
    .str.replace('\xa0', ' ')  # remove non-breaking spaces
)

pass_df = pass_df[pass_df["player"].isin(passers)]

pass_df["positive_percentage"] = pd.to_numeric(pass_df["two_pass"], errors="coerce") + pd.to_numeric(pass_df["three_pass"], errors="coerce")
pass_df["positive_percentage"] = pass_df["two_pass"] / pd.to_numeric(pass_df["pass-attempt"], errors="coerce") * 100


pass_df = pass_df[["player", "pass-attempt", "error_percentage", "positive_percentage", "perfect_percentage", "average_pass_rating"]]

pass_df

pass_df.to_json("passing_data_total.json", orient="records", indent=2)


FileNotFoundError: [Errno 2] No such file or directory: 'data/web-reports/Frederiksberg-player_ranking-2025-10-2.csv'

In [600]:
# Seasonal passing stats

# Based on match summaries:
# The match-statistics.csv file created above has seasonal data, but not the more granular passing stats (1-pass, 2-pass, 3-pass, etc.) - for this, i would have the export the individual match reports 
# --> Jeg eksporterer excel-fil for hver kamp, hvis jeg kan


import pandas as pd

path = "data/web-reports/Frederiksberg-player_ranking-gev-frb-141063.csv"


# Read the entire CSV file as raw text
with open(path, "r", encoding="utf-8") as f:
    lines = f.readlines()

# Identify where each section starts
section_starts = [i for i, line in enumerate(lines) if line.startswith("Player Number")]

# Example: extract the 'passes' section (3rd in your file)
# You can also loop through these sections and search for the right one by header keywords
section_index = 2  # (0 = first table, 1 = second, 2 = third = passes)
start = section_starts[section_index]
end = section_starts[section_index + 1] if section_index + 1 < len(section_starts) else len(lines)

# Extract just the lines for that section
section_lines = lines[start:end]

# Save to a temporary string and read with pandas
from io import StringIO
passes_df = pd.read_csv(StringIO("".join(section_lines)))

# Done!
passes_df


Unnamed: 0,Player Number,Player Name,Games Played,Points Played,Got Aced/Game,Passes/Game,Perfect Passes/Game,Pass Rating,Total Pass Error %,3-pass Percent,Pass Attempts,1-pass,2-pass,3-pass,Over the Net Pass,Got Aced,Total Pass Errors,First Ball Side Out %,Unnamed: 19
0,1,Lasse Nielsen,5.0,269.0,0.0,0.0,0.0,-,-,-,0,0,0,0,0,0,0,-,
1,3,Kristian Krag,5.0,73.0,0.0,0.0,0.0,-,-,-,0,0,0,0,0,0,0,0.0%,
2,5,Bosse,2.0,3.0,0.0,0.5,0.0,2.00,0.0%,0.0%,1,0,1,0,0,0,0,0.0%,
3,6,Boerme,5.0,95.0,0.2,3.4,1.6,1.94,5.6%,44.4%,18,4,3,8,2,1,1,22.2%,
4,7,Bo,5.0,47.0,0.2,0.0,0.0,0.00,100.0%,0.0%,1,0,0,0,0,1,1,-,
5,9,Soeren,3.0,13.0,0.0,0.0,0.0,-,-,-,0,0,0,0,0,0,0,-,
6,11,Nicola,2.0,34.0,0.0,7.5,1.5,1.80,0.0%,20.0%,15,6,6,3,0,0,0,46.7%,
7,12,Vestbjerg,4.0,79.0,0.8,6.0,1.5,1.65,11.1%,22.2%,27,5,10,6,3,3,3,24.0%,
8,15,Martin,5.0,87.0,0.0,0.0,0.0,-,-,-,0,0,0,0,0,0,0,0.0%,
9,16,Alex,5.0,47.0,0.0,6.2,2.2,2.05,0.0%,35.5%,31,5,12,11,3,0,0,29.4%,


# Run from here...

1. Download "Ranking"-file from SoloStats Webreports and save in webreports-folder with naming pattern

2. Run collect_match_schedule_from_dvbf.ipynb (remember to update to "komplet kampprogram"-link if new tournaments are added)

3. Run code in this script to extract statistical data and combine with match results

In [5]:
# Player passing data - cumulative all time stats and seasonal stats

import glob
import re
import pandas as pd
from io import StringIO

all_files = glob.glob("data/web-reports/Frederiksberg-player_ranking-*.csv")
all_dfs = []

for file in all_files:
    with open(file, "r", encoding="utf-8") as f:
        lines = f.readlines()

    # Find the 'Passes' section
    section_starts = [i for i, line in enumerate(lines) if line.startswith("Player Number")]
    for i in section_starts:
        if "Passes/Game" in lines[i]:
            start = i
            break
    end = next((j for j in section_starts if j > start), len(lines))

    section_lines = lines[start:end]
    df = pd.read_csv(StringIO("".join(section_lines)))

    # Extract team and match info
    match = re.search(r"Frederiksberg-player_ranking-([a-z0-9]+)-([a-z0-9]+)-(\d+)\.csv", file)
    if match:
        team1, team2, match_number = match.groups()
        df["team1"] = team1
        df["team2"] = team2
        df["match_number"] = int(match_number)

    all_dfs.append(df)

# Combine everything into one big DataFrame
passes_all = pd.concat(all_dfs, ignore_index=True)
passes_all.columns = passes_all.columns.str.strip().str.replace(' ', '_')

passes_all = passes_all[passes_all["Player_Name"] != " (team)"]
passes_all = passes_all[passes_all["Player_Number"] != "Total"]
#passers = ["Alex", "Boerme", "Bosse", "Frederik", "Mikkel", "Nico Lang", "Nicola", "Vestbjerg", "Hjorth"]
passes_all["Player_Name"] = passes_all["Player_Name"].str.strip()

#passes_all = passes_all[passes_all["Player_Name"].isin(passers)]

#passes_all["match_number"] = passes_all["match_number"].astype(int)

passes_all = passes_all.rename(columns={
    "match_number": "kampnr"
})

passing_column_dictionary = {
    "Player_Number" : "PLAYER_NUMBER",
    "Player_Name" : "player",
    "Games_Played" : "SETS_PLAYED",
    "Points_Played" : "POINTS_PLAYED",
    "Got_Aced/Game" : "GOT_ACED_PER_SET",
    "Passes/Game" : "PASSES_PER_SET",
    "Perfect_Passes/Game" : "PERFECT_PASSES_PER_SET",
    "Pass_Rating" : "average_pass_rating",
    "Total_Pass_Error_%" : "error_percentage",
    "3-pass_Percent": "perfect_percentage",
    "Pass_Attempts": "pass-attempt",
    "1-pass": "one_pass",
    "2-pass": "two_pass",
    "3-pass": "three_pass",
    "Over_the_Net_Pass": "over_the_net_pass",
    "Got_Aced": "got_aced",
    "Total_Pass_Errors": "total_pass_errors",
    "First_Ball_Side_Out_%": "FIRST_BALL_SIDE_OUT_PERCENT"
}


passes_all = passes_all.rename(columns=passing_column_dictionary)

# Kombiner med match schedules og resultater
results = pd.read_csv("match-results-dvbf.csv")

results["date"] = results["date"].str.strip()
results["date"] = pd.to_datetime(results["date"])

results_played = results[results["resultat"].notna()]

passes_all = passes_all.merge(
    results_played[["season", "kampnr", "hjemme", "ude", "spillested", "played", "resultat", "result_for_frb", "date", "match_name", "opponent"]],
    on="kampnr",
    how="left"
)

passers = ["Alex", "Boerme", "Bosse", "Frederik", "Mikkel", "Nico Lang", "Nicola", "Vestbjerg", "Hjorth", "Anton"]

passes_all["player"] = (
    passes_all["player"]
    .astype(str)               # ensure strings
    .str.strip()               # remove leading/trailing spaces
    .str.replace('\xa0', ' ')  # remove non-breaking spaces
)

passes_all = passes_all[passes_all["player"].isin(passers)]


passes_all["positive_percentage"] = pd.to_numeric(passes_all["two_pass"], errors="coerce") + pd.to_numeric(passes_all["three_pass"], errors="coerce")

passes_all["positive_percentage"] = passes_all["two_pass"] / pd.to_numeric(passes_all["pass-attempt"], errors="coerce") * 100
#passes_all = passes_all[["player", "kampnr", "pass-attempt", "one_pass", "two_pass", "three_pass", "error_percentage", "positive_percentage","perfect_percentage", "average_pass_rating"]]
passes_all = passes_all[passes_all["pass-attempt"] > 0]
passes_all["positive_passes"] = passes_all["two_pass"] + passes_all["three_pass"]
passes_all["perfect_passes"] = passes_all["three_pass"]
passes_all["pct_error"] = passes_all["total_pass_errors"] / passes_all["pass-attempt"] * 100
passes_all["pct_perfect"] = passes_all["three_pass"] / passes_all["pass-attempt"] * 100
passes_all["pct_positive"] = (passes_all["two_pass"] + passes_all["three_pass"]) / passes_all["pass-attempt"] * 100
passes_all["average_pass_rating"] = pd.to_numeric(passes_all["average_pass_rating"], errors="coerce")

In [6]:
import pandas as pd

# Columns you want to keep
columns_to_keep = [
    "player", "kampnr", "season", "pass-attempt", "got_aced", "over_the_net_pass",
    "total_pass_errors", "three_pass", "two_pass", "one_pass",
    "positive_passes", "perfect_passes", "average_pass_rating"
]

def summarize_passes(df):
    """Summarize pass statistics per player."""
    summary = df.groupby("player").agg({
        "pass-attempt": "sum",
        "total_pass_errors": "sum",
        "perfect_passes": "sum",
        "positive_passes": "sum",
        "average_pass_rating": "mean"
    }).reset_index()
    
    summary["error_pct"] = summary["total_pass_errors"] / summary["pass-attempt"] * 100
    summary["positive_pct"] = summary["positive_passes"] / summary["pass-attempt"] * 100
    summary["perfect_pct"] = summary["perfect_passes"] / summary["pass-attempt"] * 100
    
    cols = ["player", "average_pass_rating", "pass-attempt", "error_pct", "positive_pct", "perfect_pct"]
    return summary[cols]

# Get all unique seasons
seasons = passes_all["season"].unique()

for season in seasons:
    # Filter dataset for this season
    season_df = passes_all[passes_all["season"] == season][columns_to_keep].copy()
    
    # Summarize
    summary = summarize_passes(season_df)
    
    # Export JSON
    output_file = f"{season.replace('/', '_')}_top_passer.json"
    summary.to_json(output_file, orient="records", indent=2)

# Optional: full dataset summary
all_df = passes_all[[col for col in columns_to_keep if col != "season"]].copy()
summary_all = summarize_passes(all_df)
summary_all.to_json("all_time_top_passer.json", orient="records", indent=2)


In [7]:
temp_df = pd.concat(all_dfs, ignore_index=True)
#temp_df.to_csv("temp_df.csv", index=False)
temp_df.columns = temp_df.columns.str.strip().str.replace(' ', '_')
temp_df = temp_df.rename(columns={
    "match_number": "kampnr"
})

print_df = temp_df


# Kombiner med match schedules og resultater
results = pd.read_csv("match-results-dvbf.csv")

results["date"] = results["date"].str.strip()
results["date"] = pd.to_datetime(results["date"])

results_played = results[results["resultat"].notna()]

temp_df["kampnr"].unique()

#temp_df[temp_df["kampnr"] == 141144]
temp_df["Player_Name"] = temp_df["Player_Name"].str.strip()
temp_df = temp_df[temp_df["Player_Name"] != '']
temp_df = temp_df[temp_df["Player_Name"] != '(team)']
#temp_df["Player_Name"].unique()

temp_df["positive_percentage"] = pd.to_numeric(temp_df["2-pass"], errors="coerce") + pd.to_numeric(temp_df["3-pass"], errors="coerce")
temp_df["Pass_Rating"] = pd.to_numeric(temp_df["Pass_Rating"], errors="coerce")

temp_df = (
    temp_df
    .groupby(['kampnr'], as_index=False)
    .agg({
        'positive_percentage': 'mean',
        '1-pass': 'sum',
        '2-pass': 'sum',
        '3-pass': 'sum',
        'Pass_Rating': 'mean',
    })
)

temp_df = temp_df.merge(
    results_played[["season", "kampnr", "hjemme", "ude", "spillested", "played", "resultat", "result_for_frb", "date", "match_name", "opponent"]],
    on="kampnr",
    how="left"
)

temp_df["id_opponent_date"] = (
    temp_df["kampnr"].astype(str) + " - " +
    temp_df["opponent"].astype(str) + " - " +
    temp_df["date"].dt.strftime("%Y.%m.%d")
)

temp_df.to_json("team_passing_per_match.json", orient="records", indent=2)
#print_df.columns

In [8]:
# Player points data - cumulative all time stats and seasonal stats

import glob
import re
import pandas as pd
from io import StringIO

all_files = glob.glob("data/web-reports/Frederiksberg-player_ranking-*.csv")
all_dfs = []

for file in all_files:
    with open(file, "r", encoding="utf-8") as f:
        lines = f.readlines()

    # Find the 'offense' section
    section_starts = [i for i, line in enumerate(lines) if line.startswith("Player Number")]
    for i in section_starts:
        if "Total Earned" in lines[i]:
            start = i
            break
    end = next((j for j in section_starts if j > start), len(lines))

    section_lines = lines[start:end]
    df = pd.read_csv(StringIO("".join(section_lines)))

    # Extract team and match info
    match = re.search(r"Frederiksberg-player_ranking-([a-z0-9]+)-([a-z0-9]+)-(\d+)\.csv", file)
    if match:
        team1, team2, match_number = match.groups()
        df["team1"] = team1
        df["team2"] = team2
        df["match_number"] = int(match_number)

    all_dfs.append(df)



# Combine everything into one big DataFrame
points_all = pd.concat(all_dfs, ignore_index=True)
points_all.columns = points_all.columns.str.strip().str.replace(' ', '_')

points_all = points_all[points_all["Player_Name"] != " (team)"]
points_all = points_all[points_all["Player_Number"] != "Total"]
points_all["Player_Name"] = points_all["Player_Name"].str.strip()


points_all = points_all.rename(columns={
    "match_number": "kampnr"
})

points_column_dictionary = {
    "Player_Number" : "player_number",
    "Player_Name" : "player",
    "Games_Played" : "sets_played",
    "Points_Played" : "points_played",
    "Total_Earned" : "total_earned",
    "Total_Errors" : "total_errors",
}


points_all = points_all.rename(columns=points_column_dictionary)

# Kombiner med match schedules og resultater
results = pd.read_csv("match-results-dvbf.csv")

results["date"] = results["date"].str.strip()
results["date"] = pd.to_datetime(results["date"])

results_played = results[results["resultat"].notna()]

points_all = points_all.merge(
    results_played[["season", "kampnr", "hjemme", "ude", "spillested", "played", "resultat", "result_for_frb", "date", "match_name", "opponent"]],
    on="kampnr",
    how="left"
)



points_all = points_all[["player", "kampnr", "season", "sets_played", "points_played", "total_earned", "total_errors"]]

points_all.loc[points_all['player'] == 'Andreas Christensen', 'player'] = 'Ando'
points_all.loc[points_all['player'] == 'Lasse Nielsen', 'player'] = 'Lasse'
points_all.loc[points_all['player'] == 'Kristian Krag', 'player'] = 'Kristian'
points_all.loc[points_all['player'] == 'Mads Q', 'player'] = 'Mads'

points_2025_2026 = points_all[points_all["season"] == "2025/2026"].copy()
points_2024_2025 = points_all[points_all["season"] == "2024/2025"].copy()
points_all_time = points_all.copy()


matches_per_player = points_2025_2026.groupby("player")["kampnr"].nunique().reset_index(name="matches_played")
points_2025_2026 = points_2025_2026.merge(matches_per_player, on="player", how="left")

matches_per_player = points_2024_2025.groupby("player")["kampnr"].nunique().reset_index(name="matches_played")
points_2024_2025 = points_2024_2025.merge(matches_per_player, on="player", how="left")

matches_per_player = points_all_time.groupby("player")["kampnr"].nunique().reset_index(name="matches_played")
points_all_time = points_all_time.merge(matches_per_player, on="player", how="left")


In [9]:
# Seasonal points stats per player
points_2024_2025["points_played"] = pd.to_numeric(points_2024_2025["points_played"], errors="coerce")
points_2024_2025["sets_played"] = pd.to_numeric(points_2024_2025["sets_played"], errors="coerce")
points_2024_2025["total_earned"] = pd.to_numeric(points_2024_2025["total_earned"], errors="coerce")
points_2024_2025["total_errors"] = pd.to_numeric(points_2024_2025["total_errors"], errors="coerce")

points_agg_per_match_24_25 = points_2024_2025.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "total_earned": "sum",
    "total_errors": "sum",
}).reset_index()

points_agg_per_match_24_25["points_per_set"] = points_agg_per_match_24_25["total_earned"] / points_agg_per_match_24_25["sets_played"]
points_agg_per_match_24_25["errors_per_set"] = points_agg_per_match_24_25["total_errors"] / points_agg_per_match_24_25["sets_played"]

points_agg_per_match_24_25["points_per_match"] = points_agg_per_match_24_25["total_earned"] / points_agg_per_match_24_25["matches_played"]
points_agg_per_match_24_25["errors_per_match"] = points_agg_per_match_24_25["total_errors"] / points_agg_per_match_24_25["matches_played"]


points_agg_per_match_24_25[["player", "total_earned", "total_errors", "points_per_set", "errors_per_set", "points_per_match", "errors_per_match"]].to_json("2024_2025_top_scorer.json", orient="records", indent=2)

In [10]:
# Seasonal points stats per player
points_2025_2026["points_played"] = pd.to_numeric(points_2025_2026["points_played"], errors="coerce")
points_2025_2026["sets_played"] = pd.to_numeric(points_2025_2026["sets_played"], errors="coerce")
points_2025_2026["total_earned"] = pd.to_numeric(points_2025_2026["total_earned"], errors="coerce")
points_2025_2026["total_errors"] = pd.to_numeric(points_2025_2026["total_errors"], errors="coerce")

points_agg_per_match_25_26 = points_2025_2026.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "total_earned": "sum",
    "total_errors": "sum",
}).reset_index()

points_agg_per_match_25_26["points_per_set"] = points_agg_per_match_25_26["total_earned"] / points_agg_per_match_25_26["sets_played"]
points_agg_per_match_25_26["errors_per_set"] = points_agg_per_match_25_26["total_errors"] / points_agg_per_match_25_26["sets_played"]

points_agg_per_match_25_26["points_per_match"] = points_agg_per_match_25_26["total_earned"] / points_agg_per_match_25_26["matches_played"]
points_agg_per_match_25_26["errors_per_match"] = points_agg_per_match_25_26["total_errors"] / points_agg_per_match_25_26["matches_played"]


points_agg_per_match_25_26[["player", "total_earned", "total_errors", "points_per_set", "errors_per_set", "points_per_match", "errors_per_match"]].to_json("2025_2026_top_scorer.json", orient="records", indent=2)
points_agg_per_match_25_26

Unnamed: 0,player,points_played,sets_played,matches_played,total_earned,total_errors,points_per_set,errors_per_set,points_per_match,errors_per_match
0,Alex,524,25,7,5,22,0.2,0.88,0.714286,3.142857
1,Ando,98,10,4,20,4,2.0,0.4,5.0,1.0
2,Anton,436,22,6,80,49,3.636364,2.227273,13.333333,8.166667
3,Bo,56,8,4,8,6,1.0,0.75,2.0,1.5
4,Boerme,151,9,3,17,24,1.888889,2.666667,5.666667,8.0
5,Bosse,91,14,4,1,5,0.071429,0.357143,0.25,1.25
6,Gustav,134,12,4,40,9,3.333333,0.75,10.0,2.25
7,Hjorth,254,15,5,51,27,3.4,1.8,10.2,5.4
8,Jonas,42,13,4,1,3,0.076923,0.230769,0.25,0.75
9,Kristian,133,18,6,42,10,2.333333,0.555556,7.0,1.666667


In [11]:
# Seasonal points stats per player
points_2025_2026["points_played"] = pd.to_numeric(points_2025_2026["points_played"], errors="coerce")
points_2025_2026["sets_played"] = pd.to_numeric(points_2025_2026["sets_played"], errors="coerce")
points_2025_2026["total_earned"] = pd.to_numeric(points_2025_2026["total_earned"], errors="coerce")
points_2025_2026["total_errors"] = pd.to_numeric(points_2025_2026["total_errors"], errors="coerce")

points_agg_per_match_25_26 = points_2025_2026.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "total_earned": "sum",
    "total_errors": "sum",
}).reset_index()

points_agg_per_match_25_26["points_per_set"] = points_agg_per_match_25_26["total_earned"] / points_agg_per_match_25_26["sets_played"]
points_agg_per_match_25_26["errors_per_set"] = points_agg_per_match_25_26["total_errors"] / points_agg_per_match_25_26["sets_played"]

points_agg_per_match_25_26["points_per_match"] = points_agg_per_match_25_26["total_earned"] / points_agg_per_match_25_26["matches_played"]
points_agg_per_match_25_26["errors_per_match"] = points_agg_per_match_25_26["total_errors"] / points_agg_per_match_25_26["matches_played"]


points_agg_per_match_25_26[["player", "total_earned", "total_errors", "points_per_set", "errors_per_set", "points_per_match", "errors_per_match"]].to_json("2025_2026_top_scorer.json", orient="records", indent=2)

In [12]:
points_all_time["points_played"] = pd.to_numeric(points_all_time["points_played"], errors="coerce")
points_all_time["sets_played"] = pd.to_numeric(points_all_time["sets_played"], errors="coerce")
points_all_time["total_earned"] = pd.to_numeric(points_all_time["total_earned"], errors="coerce")
points_all_time["total_errors"] = pd.to_numeric(points_all_time["total_errors"], errors="coerce")


points_agg_all_time = points_all_time.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "total_earned": "sum",
    "total_errors": "sum",
}).reset_index()

points_agg_all_time["points_per_set"] = points_agg_all_time["total_earned"] / points_agg_all_time["sets_played"]
points_agg_all_time["errors_per_set"] = points_agg_all_time["total_errors"] / points_agg_all_time["sets_played"]

points_agg_all_time["points_per_match"] = points_agg_all_time["total_earned"] / points_agg_all_time["matches_played"]
points_agg_all_time["errors_per_match"] = points_agg_all_time["total_errors"] / points_agg_all_time["matches_played"]


points_agg_all_time[["player", "total_earned", "total_errors", "points_per_set", "errors_per_set", "points_per_match", "errors_per_match"]].to_json("all_time_top_scorer.json", orient="records", indent=2)

In [13]:
# Player blocking data - cumulative all time stats and seasonal stats

import glob
import re
import pandas as pd
from io import StringIO

all_files = glob.glob("data/web-reports/Frederiksberg-player_ranking-*.csv")
all_dfs = []

for file in all_files:
    with open(file, "r", encoding="utf-8") as f:
        lines = f.readlines()

    # Find the 'offense' section
    section_starts = [i for i, line in enumerate(lines) if line.startswith("Player Number")]
    for i in section_starts:
        if "Blocks Still" in lines[i]:
            start = i
            break
    end = next((j for j in section_starts if j > start), len(lines))

    section_lines = lines[start:end]
    df = pd.read_csv(StringIO("".join(section_lines)))

    # Extract team and match info
    match = re.search(r"Frederiksberg-player_ranking-([a-z0-9]+)-([a-z0-9]+)-(\d+)\.csv", file)
    if match:
        team1, team2, match_number = match.groups()
        df["team1"] = team1
        df["team2"] = team2
        df["match_number"] = int(match_number)

    all_dfs.append(df)



# Combine everything into one big DataFrame
blocks_all = pd.concat(all_dfs, ignore_index=True)
blocks_all.columns = blocks_all.columns.str.strip().str.replace(' ', '_')

blocks_all = blocks_all[blocks_all["Player_Name"] != " (team)"]
blocks_all = blocks_all[blocks_all["Player_Number"] != "Total"]
blocks_all["Player_Name"] = blocks_all["Player_Name"].str.strip()




blocks_all = blocks_all.rename(columns={
    "match_number": "kampnr"
})

block_column_dictionary = {
    "Player_Number" : "player_number",
    "Player_Name" : "player",
    "Games_Played" : "sets_played",
    "Points_Played" : "points_played",
    "Blocks" : "blocks",
    "Blocks_Still_in_Play" : "blocks_in_play",
}


blocks_all = blocks_all.rename(columns=block_column_dictionary)

# Kombiner med match schedules og resultater
results = pd.read_csv("match-results-dvbf.csv")

results["date"] = results["date"].str.strip()
results["date"] = pd.to_datetime(results["date"])

results_played = results[results["resultat"].notna()]

blocks_all = blocks_all.merge(
    results_played[["season", "kampnr", "hjemme", "ude", "spillested", "played", "resultat", "result_for_frb", "date", "match_name", "opponent"]],
    on="kampnr",
    how="left"
)


matches_per_player = blocks_all.groupby("player")["kampnr"].nunique().reset_index(name="matches_played")
blocks_all = blocks_all.merge(matches_per_player, on="player", how="left")

blocks_all = blocks_all[["player", "kampnr", "season", "matches_played", "sets_played", "points_played", "blocks", "blocks_in_play"]]

blocks_all.loc[blocks_all['player'] == 'Andreas Christensen', 'player'] = 'Ando'
blocks_all.loc[blocks_all['player'] == 'Lasse Nielsen', 'player'] = 'Lasse'
blocks_all.loc[blocks_all['player'] == 'Kristian Krag', 'player'] = 'Kristian'
blocks_all.loc[blocks_all['player'] == 'Mads Q', 'player'] = 'Mads'

blocks_2025_2026 = blocks_all[blocks_all["season"] == "2025/2026"].copy()
blocks_2024_2025 = blocks_all[blocks_all["season"] == "2024/2025"].copy()
blocks_all_time = blocks_all.copy()

In [14]:
blocks_2025_2026["points_played"] = pd.to_numeric(blocks_2025_2026["points_played"], errors="coerce")
blocks_2025_2026["sets_played"] = pd.to_numeric(blocks_2025_2026["sets_played"], errors="coerce")
blocks_2025_2026["blocks"] = pd.to_numeric(blocks_2025_2026["blocks"], errors="coerce")
blocks_2025_2026["blocks_in_play"] = pd.to_numeric(blocks_2025_2026["blocks_in_play"], errors="coerce")

blocks_agg_2025_2026 = blocks_2025_2026.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "blocks": "sum",
    "blocks_in_play": "sum",
}).reset_index()

blocks_agg_2025_2026["blocks_per_set"] = blocks_agg_2025_2026["blocks"] / blocks_agg_2025_2026["sets_played"]
blocks_agg_2025_2026["blocks_in_play_per_set"] = blocks_agg_2025_2026["blocks_in_play"] / blocks_agg_2025_2026["sets_played"]

blocks_agg_2025_2026[["player", "blocks", "blocks_in_play", "blocks_per_set", "blocks_in_play_per_set"]].to_json("2025_2026_top_blocker.json", orient="records", indent=2)

In [15]:
blocks_2024_2025["points_played"] = pd.to_numeric(blocks_2024_2025["points_played"], errors="coerce")
blocks_2024_2025["sets_played"] = pd.to_numeric(blocks_2024_2025["sets_played"], errors="coerce")
blocks_2024_2025["blocks"] = pd.to_numeric(blocks_2024_2025["blocks"], errors="coerce")
blocks_2024_2025["blocks_in_play"] = pd.to_numeric(blocks_2024_2025["blocks_in_play"], errors="coerce")

blocks_agg_2024_2025 = blocks_2024_2025.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "blocks": "sum",
    "blocks_in_play": "sum",
}).reset_index()

blocks_agg_2024_2025["blocks_per_set"] = blocks_agg_2024_2025["blocks"] / blocks_agg_2024_2025["sets_played"]
blocks_agg_2024_2025["blocks_in_play_per_set"] = blocks_agg_2024_2025["blocks_in_play"] / blocks_agg_2024_2025["sets_played"]

blocks_agg_2024_2025[["player", "blocks", "blocks_in_play", "blocks_per_set", "blocks_in_play_per_set"]].to_json("2024_2025_top_blocker.json", orient="records", indent=2)

In [16]:
blocks_all_time["points_played"] = pd.to_numeric(blocks_all_time["points_played"], errors="coerce")
blocks_all_time["sets_played"] = pd.to_numeric(blocks_all_time["sets_played"], errors="coerce")
blocks_all_time["blocks"] = pd.to_numeric(blocks_all_time["blocks"], errors="coerce")
blocks_all_time["blocks_in_play"] = pd.to_numeric(blocks_all_time["blocks_in_play"], errors="coerce")

blocks_agg_all_time = blocks_all_time.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "blocks": "sum",
    "blocks_in_play": "sum",
}).reset_index()

blocks_agg_all_time["blocks_per_set"] = blocks_agg_all_time["blocks"] / blocks_agg_all_time["sets_played"]
blocks_agg_all_time["blocks_in_play_per_set"] = blocks_agg_all_time["blocks_in_play"] / blocks_agg_all_time["sets_played"]

blocks_agg_all_time[["player", "blocks", "blocks_in_play", "blocks_per_set", "blocks_in_play_per_set"]].to_json("all_time_top_blocker.json", orient="records", indent=2)

In [17]:
# Player digging stats


import glob
import re
import pandas as pd
from io import StringIO

all_files = glob.glob("data/web-reports/Frederiksberg-player_ranking-*.csv")
all_dfs = []

for file in all_files:
    with open(file, "r", encoding="utf-8") as f:
        lines = f.readlines()

    # Find the 'offense' section
    section_starts = [i for i, line in enumerate(lines) if line.startswith("Player Number")]
    for i in section_starts:
        if "Dig Attempts/Game" in lines[i]:
            start = i
            break
    end = next((j for j in section_starts if j > start), len(lines))

    section_lines = lines[start:end]
    df = pd.read_csv(StringIO("".join(section_lines)))

    # Extract team and match info
    match = re.search(r"Frederiksberg-player_ranking-([a-z0-9]+)-([a-z0-9]+)-(\d+)\.csv", file)
    if match:
        team1, team2, match_number = match.groups()
        df["team1"] = team1
        df["team2"] = team2
        df["match_number"] = int(match_number)

    all_dfs.append(df)

# Combine everything into one big DataFrame
digs_all = pd.concat(all_dfs, ignore_index=True)
digs_all.columns = digs_all.columns.str.strip().str.replace(' ', '_')

digs_all = digs_all[digs_all["Player_Name"] != " (team)"]
digs_all = digs_all[digs_all["Player_Number"] != "Total"]
digs_all["Player_Name"] = digs_all["Player_Name"].str.strip()

digs_all = digs_all.rename(columns={
    "match_number": "kampnr"
})

dig_column_dictionary = {
    "Player_Number" : "player_number",
    "Player_Name" : "player",
    "Games_Played" : "sets_played",
    "Points_Played" : "points_played",
    "Digs" : "digs",
}


digs_all = digs_all.rename(columns=dig_column_dictionary)

# Kombiner med match schedules og resultater
results = pd.read_csv("match-results-dvbf.csv")

results["date"] = results["date"].str.strip()
results["date"] = pd.to_datetime(results["date"])

results_played = results[results["resultat"].notna()]

digs_all = digs_all.merge(
    results_played[["season", "kampnr", "hjemme", "ude", "spillested", "played", "resultat", "result_for_frb", "date", "match_name", "opponent"]],
    on="kampnr",
    how="left"
)

matches_per_player = digs_all.groupby("player")["kampnr"].nunique().reset_index(name="matches_played")
digs_all = digs_all.merge(matches_per_player, on="player", how="left")

digs_all = digs_all[["player", "kampnr", "season", "matches_played", "sets_played", "points_played", "digs", "opponent"]]

digs_all.loc[digs_all['player'] == 'Andreas Christensen', 'player'] = 'Ando'
digs_all.loc[digs_all['player'] == 'Lasse Nielsen', 'player'] = 'Lasse'
digs_all.loc[digs_all['player'] == 'Kristian Krag', 'player'] = 'Kristian'
digs_all.loc[digs_all['player'] == 'Mads Q', 'player'] = 'Mads'

digs_2025_2026 = digs_all[digs_all["season"] == "2025/2026"].copy()
digs_2024_2025 = digs_all[digs_all["season"] == "2024/2025"].copy()
digs_all_time = digs_all.copy()

In [18]:
digs_all["opponent"].unique()

array(['VLI', 'Hvidovre VK.2', 'Gentofte Volley.2', 'Team Køge', 'KV 61',
       'Amager Volley.2', 'DTU Volley', 'Grøndal EV', 'Farum-Holte',
       'Odense Volleyball'], dtype=object)

In [19]:
digs_2025_2026["points_played"] = pd.to_numeric(digs_2025_2026["points_played"], errors="coerce")
digs_2025_2026["sets_played"] = pd.to_numeric(digs_2025_2026["sets_played"], errors="coerce")
digs_2025_2026["digs"] = pd.to_numeric(digs_2025_2026["digs"], errors="coerce")

digs_agg_2025_2026 = digs_2025_2026.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "digs": "sum",
}).reset_index()

digs_agg_2025_2026["digs_per_set"] = digs_agg_2025_2026["digs"] / digs_agg_2025_2026["sets_played"]
digs_agg_2025_2026["digs_per_match"] = digs_agg_2025_2026["digs"] / digs_agg_2025_2026["matches_played"]

digs_agg_2025_2026[["player", "digs", "digs_per_set", "digs_per_match"]].to_json("2025_2026_top_digger.json", orient="records", indent=2)

In [20]:
digs_2024_2025["points_played"] = pd.to_numeric(digs_2024_2025["points_played"], errors="coerce")
digs_2024_2025["sets_played"] = pd.to_numeric(digs_2024_2025["sets_played"], errors="coerce")
digs_2024_2025["digs"] = pd.to_numeric(digs_2024_2025["digs"], errors="coerce")

digs_agg_2024_2025 = digs_2024_2025.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "digs": "sum",
}).reset_index()

digs_agg_2024_2025["digs_per_set"] = digs_agg_2024_2025["digs"] / digs_agg_2024_2025["sets_played"]
digs_agg_2024_2025["digs_per_match"] = digs_agg_2024_2025["digs"] / digs_agg_2024_2025["matches_played"]

digs_agg_2024_2025[["player", "digs", "digs_per_set", "digs_per_match"]].to_json("2024_2025_top_digger.json", orient="records", indent=2)

In [21]:
digs_all_time["points_played"] = pd.to_numeric(digs_all_time["points_played"], errors="coerce")
digs_all_time["sets_played"] = pd.to_numeric(digs_all_time["sets_played"], errors="coerce")
digs_all_time["digs"] = pd.to_numeric(digs_all_time["digs"], errors="coerce")

digs_agg_all_time = digs_all_time.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "digs": "sum",
}).reset_index()

digs_agg_all_time["digs_per_set"] = digs_agg_all_time["digs"] / digs_agg_all_time["sets_played"]
digs_agg_all_time["digs_per_match"] = digs_agg_all_time["digs"] / digs_agg_all_time["matches_played"]

digs_agg_all_time[["player", "digs", "digs_per_set", "digs_per_match"]].to_json("all_time_top_digger.json", orient="records", indent=2)

In [22]:
# Player server stats

import glob
import re
import pandas as pd
from io import StringIO

all_files = glob.glob("data/web-reports/Frederiksberg-player_ranking-*.csv")
all_dfs = []

for file in all_files:
    with open(file, "r", encoding="utf-8") as f:
        lines = f.readlines()

    # Find the 'offense' section
    section_starts = [i for i, line in enumerate(lines) if line.startswith("Player Number")]
    for i in section_starts:
        if "Serve Attempts/Game" in lines[i]:
            start = i
            break
    end = next((j for j in section_starts if j > start), len(lines))

    section_lines = lines[start:end]
    df = pd.read_csv(StringIO("".join(section_lines)))

    # Extract team and match info
    match = re.search(r"Frederiksberg-player_ranking-([a-z0-9]+)-([a-z0-9]+)-(\d+)\.csv", file)
    if match:
        team1, team2, match_number = match.groups()
        df["team1"] = team1
        df["team2"] = team2
        df["kampnr"] = int(match_number)

    all_dfs.append(df)

# Combine everything into one big DataFrame
serve_all = pd.concat(all_dfs, ignore_index=True)
serve_all.columns = serve_all.columns.str.strip().str.replace(' ', '_')

serve_all = serve_all[serve_all["Player_Name"] != " (team)"]
serve_all = serve_all[serve_all["Player_Number"] != "Total"]
serve_all["Player_Name"] = serve_all["Player_Name"].str.strip()

serve_column_dictionary = {
    "Player_Number" : "player_number",
    "Player_Name" : "player",
    "Games_Played" : "sets_played",
    "Points_Played" : "points_played",
    "Serve_Attempts" : "serve_attempts",
    "Serve_Rating" : "average_serve_rating",
    "1-Serve" : "one_serve",
    "2-Serve" : "two_serve",
    "3-Serve" : "three_serve",
    "Aces" : "aces",
    "Serve_Errors" : "serve_errors",
    "Serve_Error_%": "serve_error_pct",
    "Points_Won_on_Serve" : "points_won_on_serve",
    "Point_Scoring_%" : "point_scoring_pct"
}

serve_all = serve_all.rename(columns=serve_column_dictionary)

# Kombiner med match schedules og resultater
results = pd.read_csv("match-results-dvbf.csv")

results["date"] = results["date"].str.strip()
results["date"] = pd.to_datetime(results["date"])

results_played = results[results["resultat"].notna()]

serve_all = serve_all.merge(
    results_played[["season", "kampnr", "hjemme", "ude", "spillested", "played", "resultat", "result_for_frb", "date", "match_name", "opponent"]],
    on="kampnr",
    how="left"
)

serve_all['point_scoring_pct'] = (
    serve_all['point_scoring_pct']
    .str.strip()                           # remove spaces
    .str.replace('%', '', regex=False)     # remove %
    .replace('-', None)                    # replace dashes with None (not pd.NA)
)

# Now safely convert to numeric (invalid parsing -> NaN)
serve_all['point_scoring_pct'] = pd.to_numeric(serve_all['point_scoring_pct'], errors='coerce')


serve_all['serve_error_pct'] = (
    serve_all['serve_error_pct']
    .str.strip()                           # remove spaces
    .str.replace('%', '', regex=False)     # remove %
    .replace('-', None)                    # replace dashes with None (not pd.NA)
)

serve_all['serve_error_pct'] = pd.to_numeric(serve_all['serve_error_pct'], errors='coerce')

matches_per_player = serve_all.groupby("player")["kampnr"].nunique().reset_index(name="matches_played")
serve_all = serve_all.merge(matches_per_player, on="player", how="left")
temp_serve_df = serve_all

serve_all = serve_all[["player", "kampnr", "season", "matches_played", "sets_played", "points_played", "serve_attempts", "average_serve_rating", "one_serve", "two_serve", "three_serve", "aces", "serve_errors", "serve_error_pct", "points_won_on_serve", "point_scoring_pct"]]

serve_all.loc[serve_all['player'] == 'Andreas Christensen', 'player'] = 'Ando'
serve_all.loc[serve_all['player'] == 'Lasse Nielsen', 'player'] = 'Lasse'
serve_all.loc[serve_all['player'] == 'Kristian Krag', 'player'] = 'Kristian'
serve_all.loc[serve_all['player'] == 'Mads Q', 'player'] = 'Mads'

serve_2025_2026 = serve_all[serve_all["season"] == "2025/2026"].copy()
serve_2024_2025 = serve_all[serve_all["season"] == "2024/2025"].copy()
serve_all_time = serve_all.copy()
#temp_serve_df.columns
serve_all_time


Unnamed: 0,player,kampnr,season,matches_played,sets_played,points_played,serve_attempts,average_serve_rating,one_serve,two_serve,three_serve,aces,serve_errors,serve_error_pct,points_won_on_serve,point_scoring_pct
0,Lasse,147057,2025/2026,15,3,172,10,1.90,2,5,1,1,1,10.0,2,20.0
1,Kristian,147057,2025/2026,17,2,19,5,2.60,1,1,2,1,0,0.0,1,20.0
2,Ando,147057,2025/2026,7,3,33,14,1.69,4,3,4,0,2,14.3,7,50.0
3,Boerme,147057,2025/2026,10,2,33,5,1.40,1,0,2,0,2,40.0,2,40.0
4,Gustav,147057,2025/2026,11,2,9,0,-,0,0,0,0,0,,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
180,Nicola,144595,2025/2026,14,5,79,15,1.93,4,6,3,1,1,6.7,7,46.7
181,Martin,144595,2025/2026,15,5,60,11,1.27,6,1,2,0,2,18.2,3,27.3
182,Alex,144595,2025/2026,18,5,41,0,-,0,0,0,0,0,,0,
183,Anton,144595,2025/2026,6,5,119,21,2.24,1,7,8,2,3,14.3,10,47.6


In [23]:

non_numeric_cols = ["player", "season"]

serve_2025_2026[serve_2025_2026.columns.difference(non_numeric_cols)] = serve_2025_2026[serve_2025_2026.columns.difference(non_numeric_cols)].apply(pd.to_numeric, errors="coerce")


serve_agg_2025_2026 = serve_2025_2026.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "serve_attempts": "sum",
    "average_serve_rating": "mean",
    "one_serve": "sum",
    "two_serve": "sum",
    "three_serve": "sum",
    "aces": "sum",
    "serve_errors": "sum",
    "points_won_on_serve": "sum",
    "point_scoring_pct": "mean"
}).reset_index()

serve_agg_2025_2026["serve_efficiency"] = (serve_agg_2025_2026["aces"] - serve_agg_2025_2026["serve_errors"]) / serve_agg_2025_2026["serve_attempts"]

serve_agg_2025_2026["ace_percentage"] = serve_agg_2025_2026["aces"] / serve_agg_2025_2026["serve_attempts"] * 100
serve_agg_2025_2026["error_percentage"] = serve_agg_2025_2026["serve_errors"] / serve_agg_2025_2026["serve_attempts"] * 100

serve_agg_2025_2026["points_won_percentage"] = serve_agg_2025_2026["points_won_on_serve"] / serve_agg_2025_2026["serve_attempts"] * 100

serve_agg_2025_2026[["player", "average_serve_rating", "aces", "points_won_on_serve", "serve_efficiency", "ace_percentage", "error_percentage", "points_won_percentage", "point_scoring_pct"]].to_json("2025_2026_top_server.json", orient="records", indent=2)

In [24]:

non_numeric_cols = ["player", "season"]

serve_2024_2025[serve_2024_2025.columns.difference(non_numeric_cols)] = serve_2024_2025[serve_2024_2025.columns.difference(non_numeric_cols)].apply(pd.to_numeric, errors="coerce")


serve_agg_2024_2025 = serve_2024_2025.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "serve_attempts": "sum",
    "average_serve_rating": "mean",
    "one_serve": "sum",
    "two_serve": "sum",
    "three_serve": "sum",
    "aces": "sum",
    "serve_errors": "sum",
    "points_won_on_serve": "sum",
    "point_scoring_pct": "mean"

}).reset_index()

serve_agg_2024_2025["serve_efficiency"] = (serve_agg_2024_2025["aces"] - serve_agg_2024_2025["serve_errors"]) / serve_agg_2024_2025["serve_attempts"]

serve_agg_2024_2025["ace_percentage"] = serve_agg_2024_2025["aces"] / serve_agg_2024_2025["serve_attempts"] * 100
serve_agg_2024_2025["error_percentage"] = serve_agg_2024_2025["serve_errors"] / serve_agg_2024_2025["serve_attempts"] * 100

serve_agg_2024_2025["points_won_percentage"] = serve_agg_2024_2025["points_won_on_serve"] / serve_agg_2024_2025["serve_attempts"] * 100

serve_agg_2024_2025[["player", "average_serve_rating", "aces", "points_won_on_serve", "serve_efficiency", "ace_percentage", "error_percentage", "points_won_percentage", "point_scoring_pct"]].to_json("2024_2025_top_server.json", orient="records", indent=2)

In [25]:

non_numeric_cols = ["player", "season"]

serve_all_time[serve_all_time.columns.difference(non_numeric_cols)] = serve_all_time[serve_all_time.columns.difference(non_numeric_cols)].apply(pd.to_numeric, errors="coerce")


serve_agg_all_time = serve_all_time.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "serve_attempts": "sum",
    "average_serve_rating": "mean",
    "one_serve": "sum",
    "two_serve": "sum",
    "three_serve": "sum",
    "aces": "sum",
    "serve_errors": "sum",
    "points_won_on_serve": "sum",
    "point_scoring_pct": "mean"
}).reset_index()

serve_agg_all_time["serve_efficiency"] = (serve_agg_all_time["aces"] - serve_agg_all_time["serve_errors"]) / serve_agg_all_time["serve_attempts"]

serve_agg_all_time["ace_percentage"] = serve_agg_all_time["aces"] / serve_agg_all_time["serve_attempts"] * 100
serve_agg_all_time["error_percentage"] = serve_agg_all_time["serve_errors"] / serve_agg_all_time["serve_attempts"] * 100

serve_agg_all_time["points_won_percentage"] = serve_agg_all_time["points_won_on_serve"] / serve_agg_all_time["serve_attempts"] * 100

serve_agg_all_time[["player", "average_serve_rating", "aces", "points_won_on_serve", "serve_efficiency", "ace_percentage", "error_percentage", "points_won_percentage", "point_scoring_pct"]].to_json("all_time_top_server.json", orient="records", indent=2)
#serve_agg_all_time

In [26]:
# Player offense stats

import glob
import re
import pandas as pd
from io import StringIO

all_files = glob.glob("data/web-reports/Frederiksberg-player_ranking-*.csv")
all_dfs = []

for file in all_files:
    with open(file, "r", encoding="utf-8") as f:
        lines = f.readlines()

    # Find the 'offense' section
    section_starts = [i for i, line in enumerate(lines) if line.startswith("Player Number")]
    for i in section_starts:
        if "Kills/Game" in lines[i]:
            start = i
            break
    end = next((j for j in section_starts if j > start), len(lines))

    section_lines = lines[start:end]
    df = pd.read_csv(StringIO("".join(section_lines)))

    # Extract team and match info
    match = re.search(r"Frederiksberg-player_ranking-([a-z0-9]+)-([a-z0-9]+)-(\d+)\.csv", file)
    if match:
        team1, team2, match_number = match.groups()
        df["team1"] = team1
        df["team2"] = team2
        df["kampnr"] = int(match_number)

    all_dfs.append(df)

# Combine everything into one big DataFrame
offense_all = pd.concat(all_dfs, ignore_index=True)
offense_all.columns = offense_all.columns.str.strip().str.replace(' ', '_')

offense_all = offense_all[offense_all["Player_Name"] != " (team)"]
offense_all = offense_all[offense_all["Player_Number"] != "Total"]
offense_all["Player_Name"] = offense_all["Player_Name"].str.strip()

offense_column_dictionary = {
    "Player_Number" : "player_number",
    "Player_Name" : "player",
    "Games_Played" : "sets_played",
    "Points_Played" : "points_played",
    "Attack_Attempts" : "attack_attempts",
    "Attack_Errors" : "attack_errors",
    "Kills" : "total_kills",
}


offense_all = offense_all.rename(columns=offense_column_dictionary)

# Kombiner med match schedules og resultater
results = pd.read_csv("match-results-dvbf.csv")

results["date"] = results["date"].str.strip()
results["date"] = pd.to_datetime(results["date"])

results_played = results[results["resultat"].notna()]

offense_all = offense_all.merge(
    results_played[["season", "kampnr", "hjemme", "ude", "spillested", "played", "resultat", "result_for_frb", "date", "match_name", "opponent"]],
    on="kampnr",
    how="left"
)

matches_per_player = offense_all.groupby("player")["kampnr"].nunique().reset_index(name="matches_played")
offense_all = offense_all.merge(matches_per_player, on="player", how="left")

offense_all = offense_all[["player", "kampnr", "season", "matches_played", "sets_played", "points_played", "attack_attempts", "attack_errors", "total_kills"]]

offense_all.loc[offense_all['player'] == 'Andreas Christensen', 'player'] = 'Ando'
offense_all.loc[offense_all['player'] == 'Lasse Nielsen', 'player'] = 'Lasse'
offense_all.loc[offense_all['player'] == 'Kristian Krag', 'player'] = 'Kristian'
offense_all.loc[offense_all['player'] == 'Mads Q', 'player'] = 'Mads'

offense_2025_2026 = offense_all[offense_all["season"] == "2025/2026"].copy()
offense_2024_2025 = offense_all[offense_all["season"] == "2024/2025"].copy()
offense_all_time = offense_all.copy()
#offense_all_time

In [27]:

non_numeric_cols = ["player", "season"]

offense_2025_2026[offense_2025_2026.columns.difference(non_numeric_cols)] = offense_2025_2026[offense_2025_2026.columns.difference(non_numeric_cols)].apply(pd.to_numeric, errors="coerce")

offense_agg_2025_2026 = offense_2025_2026.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "attack_attempts" : "sum",
    "attack_errors": "sum",
    "total_kills": "sum",
}).reset_index()


offense_agg_2025_2026["error_percentage"] = offense_agg_2025_2026["attack_errors"] / offense_agg_2025_2026["attack_attempts"] * 100

offense_agg_2025_2026["kill_percentage"] = offense_agg_2025_2026["total_kills"] / offense_agg_2025_2026["attack_attempts"] * 100

offense_agg_2025_2026["hitting_efficiency"] = (offense_agg_2025_2026["total_kills"] - offense_agg_2025_2026["attack_errors"]) / offense_agg_2025_2026["attack_attempts"]

offense_agg_2025_2026[["player", "attack_attempts", "total_kills", "attack_errors", "kill_percentage", "error_percentage", "hitting_efficiency"]].to_json("2025_2026_top_offense.json", orient="records", indent=2)

In [28]:

non_numeric_cols = ["player", "season"]

offense_2024_2025[offense_2024_2025.columns.difference(non_numeric_cols)] = offense_2024_2025[offense_2024_2025.columns.difference(non_numeric_cols)].apply(pd.to_numeric, errors="coerce")

offense_agg_2024_2025 = offense_2024_2025.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "attack_attempts" : "sum",
    "attack_errors": "sum",
    "total_kills": "sum",
}).reset_index()

offense_agg_2024_2025["error_percentage"] = offense_agg_2024_2025["attack_errors"] / offense_agg_2024_2025["attack_attempts"] * 100

offense_agg_2024_2025["kill_percentage"] = offense_agg_2024_2025["total_kills"] / offense_agg_2024_2025["attack_attempts"] * 100

offense_agg_2024_2025["hitting_efficiency"] = (offense_agg_2024_2025["total_kills"] - offense_agg_2024_2025["attack_errors"]) / offense_agg_2024_2025["attack_attempts"]

offense_agg_2024_2025[["player", "attack_attempts", "total_kills", "attack_errors", "kill_percentage", "error_percentage", "hitting_efficiency"]].to_json("2024_2025_top_offense.json", orient="records", indent=2)

In [29]:

non_numeric_cols = ["player", "season"]

offense_all_time[offense_all_time.columns.difference(non_numeric_cols)] = offense_all_time[offense_all_time.columns.difference(non_numeric_cols)].apply(pd.to_numeric, errors="coerce")

offense_agg_all_time = offense_all_time.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "attack_attempts" : "sum",
    "attack_errors": "sum",
    "total_kills": "sum",
}).reset_index()

offense_agg_all_time["error_pct"] = offense_agg_all_time["attack_errors"] / offense_agg_all_time["attack_attempts"] * 100

offense_agg_all_time["kill_percentage"] = offense_agg_all_time["total_kills"] / offense_agg_all_time["attack_attempts"] * 100

offense_agg_all_time["hitting_efficiency"] = (offense_agg_all_time["total_kills"] - offense_agg_all_time["attack_errors"]) / offense_agg_all_time["attack_attempts"]

offense_agg_all_time[["player", "attack_attempts", "total_kills", "attack_errors", "kill_percentage", "error_pct", "hitting_efficiency"]].to_json("all_time_top_offense.json", orient="records", indent=2)

## Team stats per game

In [30]:
import glob
import re
import pandas as pd
from io import StringIO

# Define the themes you want to extract and the unique string for each
themes = {
    "passing": "Passes/Game",
    "serving": "Serve Attempts/Game",
    "scoring": "Total Earned",
    "blocking": "Blocks Still",
    "defense": "Dig Attempts/Game",
    "hitting": "Kills/Game"
}

all_files = glob.glob("data/web-reports/Frederiksberg-player_ranking-*.csv")
all_dfs = []

for file in all_files:
    with open(file, "r", encoding="utf-8") as f:
        lines = f.readlines()

    # Find all section starts
    section_starts = [i for i, line in enumerate(lines) if line.startswith("Player Number")]

    # Extract each theme section
    for theme, keyword in themes.items():
        start = None
        for i in section_starts:
            if keyword in lines[i]:
                start = i
                break
        if start is None:
            continue  # Skip if the theme is not in this file

        # Find the end of this section (next "Player Number" or EOF)
        end = next((j for j in section_starts if j > start), len(lines))
        section_lines = lines[start:end]

        # Read the CSV section
        df = pd.read_csv(StringIO("".join(section_lines)))

        # Add team/match info
        match = re.search(r"Frederiksberg-player_ranking-([a-z0-9]+)-([a-z0-9]+)-(\d+)\.csv", file)
        if match:
            team1, team2, match_number = match.groups()
            df["team1"] = team1
            df["team2"] = team2
            df["match_number"] = int(match_number)

        # Add theme info
        df["theme"] = theme

        all_dfs.append(df)
    

# Combine everything into one big DataFrame
all_data = pd.concat(all_dfs, ignore_index=True)
all_data.columns = all_data.columns.str.strip().str.replace(' ', '_')
all_data["Player_Name"] = all_data["Player_Name"].str.strip()



all_data = all_data.rename(columns={
    "match_number": "kampnr"
})


# Kombiner med match schedules og resultater
results = pd.read_csv("match-results-dvbf.csv")

results["date"] = results["date"].str.strip()
results["date"] = pd.to_datetime(results["date"])

results_played = results[results["resultat"].notna()]

all_data = all_data.merge(
    results_played[["season", "kampnr", "hjemme", "ude", "spillested", "played", "resultat", "result_for_frb", "date", "match_name", "opponent"]],
    on="kampnr",
    how="left"
)



#all_data['Player_Name'] = all_data['Player_Name'].replace(0, 'total')
all_data.loc[all_data['Player_Number'] == 'Total', 'Player_Name'] = 'team_combined'

#all_data[all_data["kampnr"] == 147434]
temp_df = all_data#[all_data["Player_Name"] == "Lasse Nielsen"]
all_data_ungrouped = all_data

temp_df.columns
temp_df#[["Player_Name", "kampnr", "Pass_Rating", "Serve_Rating", "Total_Earned", "Blocks/Game", "Digs/Game", "Hitting_Efficiency"]]
temp_df = temp_df[temp_df["Player_Name"] == "team_combined"]
#temp_df = temp_df[["Player_Name", "Pass_Rating", "Serve_Rating", "Total_Earned", "Blocks/Game", "Digs/Game", "Hitting_Efficiency"]]
#temp_df


collapsed = (
    temp_df.groupby(["kampnr"], as_index=False)
      .apply(lambda x: x.ffill().bfill().iloc[0])
      .reset_index(drop=True)
)

collapsed["Points_Won_on_Serve_%"] = collapsed["Points_Won_on_Serve"] / collapsed["Serve_Attempts"] * 100


collapsed = collapsed.drop(columns=["hjemme", "ude", "spillested", "Player_Number", "Games_Played", "Points_Played", "Pass_Attempts", "1-pass", "2-pass", "3-pass", "3-pass_Percent", "Over_the_Net_Pass", "Got_Aced", "Total_Pass_Errors", "Errors:_Blocked", "Attack_Errors", "Attack_Attempts", "Attack_Error_%", "team1", "team2", "Got_Aced/Game", "Passes/Game", "Perfect_Passes/Game", "Total_Pass_Error_%", "theme", 
                                    #"Serve_Attempts",
                                     "Serve_%", "1-Serve", "2-Serve", "3-Serve", "Serve_Errors", "Points_Lost_on_Serve", #"Points_Won_on_Serve", 
                                     "Serve_Net_Points", "Point_Scoring_%", "Aces/Game", "Player_Name", "Serve_Errors/Game", "Serve_Net_Points/Game", "Serve_Attempts/Game", "Errors:_Out", "Errors:_Into_Net", "Earned/Game", "Faults/Game", "Errors/Game", "Total/Game", "Plus/Minus_Total", "Total_Earned", "Total_Errors", "Plus/Minus_Net_Total", "Total_Faults", "Total_Earned_(them)", "Total_Errors_(them)", "Total_Faults_(them)", "Block_Errors/Game", "Blocks_Still_in_Play/Game", "Block_Assists", "Block_Errors", "Block_Solos", "Blocks_Still_in_Play", "Blocks", "Net_Blocks", "Block_%", "Block_Error_%", "Dig_Errors/Game", "Dig_Attempts/Game", "Digs", "Dig_Attempts", "Kill_to_Dig_%", "Attack_Attempt_to_Dig_%", "Attack_Net_Points/Game", "Dig_Errors", "Dig_Error_%", "Dig_%", "Kills", "played", "resultat", "result_for_frb", "match_name"])

collapsed.rename(columns={'Blocks/Game': 'Blocks/Set', 'Digs/Game': 'Digs/Set', 'Kills/Game': 'Kills/Set', 'Attack_Errors/Game': 'Attack_Errors/Set'}, inplace=True)


collapsed.to_json("per_game_team_stat.json", orient="records", indent=2)

#offense_agg_all_time.to_json("all_time_top_offense.json", orient="records", indent=2)

#collapsed


  .apply(lambda x: x.ffill().bfill().iloc[0])


## Player stats per game

In [31]:
# Stats per player per game
# Presenting data for each player for each game aka get a glimpse of the performance for each game

# Best performer (top scorer, relative top scorer, best passer, blocker, server)

# Most important stats for each player for that match

import numpy as np

all_data_ungrouped = all_data

all_data_ungrouped = all_data_ungrouped[all_data_ungrouped["Player_Name"] != "team_combined"]
all_data_ungrouped = all_data_ungrouped[all_data_ungrouped["Player_Name"] != "(team)"]

all_data_ungrouped["Points_Won_on_Serve_%"] = all_data_ungrouped["Points_Won_on_Serve"] / all_data_ungrouped["Serve_Attempts"] * 100


all_data_ungrouped = (
    all_data_ungrouped.groupby(["kampnr", "Player_Name"], as_index=False)
      .apply(lambda x: x.ffill().bfill().iloc[0])
      .reset_index(drop=True)
)

all_data_ungrouped["date"] = pd.to_datetime(all_data_ungrouped["date"], unit="ms")
all_data_ungrouped["date"] = all_data_ungrouped["date"].dt.strftime("%Y-%m-%d")

all_data_ungrouped["Pass_Rating"]= pd.to_numeric(all_data_ungrouped["Pass_Rating"].replace(" -", np.nan))

all_data_ungrouped = all_data_ungrouped.sort_values(by = ["Player_Name", "kampnr"])

all_data_ungrouped = all_data_ungrouped.drop(columns=["theme", "Player_Number"])

all_data_ungrouped.loc[all_data_ungrouped['Player_Name'] == 'Andreas Christensen', 'Player_Name'] = 'Ando'
all_data_ungrouped.loc[all_data_ungrouped['Player_Name'] == 'Lasse Nielsen', 'Player_Name'] = 'Lasse'
all_data_ungrouped.loc[all_data_ungrouped['Player_Name'] == 'Kristian Krag', 'Player_Name'] = 'Kristian'
all_data_ungrouped.loc[all_data_ungrouped['Player_Name'] == 'Mads Q', 'Player_Name'] = 'Mads'

all_data_ungrouped.to_csv("per_game_player_stat.csv")

all_data_ungrouped.to_json("per_game_player_stat.json", orient="records", indent=2)


  .apply(lambda x: x.ffill().bfill().iloc[0])


# ...to here