In [294]:
passers = ["Alex", "Boerme", "Bosse", "Frederik", "Mikkel", "Nico Lang", "Nicola", "Vestbjerg", "Hjorth"]


In [7]:
import pandas as pd

path_webreports = "data/web-reports/WebReports-Export-2025-8-29.csv"

In [142]:
import pandas as pd
import re

with open(path_webreports, "r", encoding="utf-8") as f:
    lines = f.readlines()

games_data = []
current_game = None
columns = []

for line in lines:
    line = line.strip()
    if line.startswith("GAME"):
        # Extract set number from GAME1, GAME2, etc.
        game_id, game_info = line.split(":", 1)
        set_number = int(re.search(r"GAME(\d+)", game_id).group(1))
        
        current_game = {"set_number": set_number}
        
        # Parse metadata (venue, opponent, date, score)
        info_parts = game_info.strip().split("-")
        venue = info_parts[0]
        opponent = info_parts[1]
        rest = "-".join(info_parts[2:])
        date_part, score = rest.rsplit("(", 1)
        date = date_part.strip()
        score = score.replace(")", "").strip()
        
        current_game["venue"] = venue
        current_game["opponent"] = opponent
        current_game["date"] = date
        current_game["set_score"] = score
        current_game["rows"] = []
        columns = []
    elif line.startswith("PLAYER_NUM") and current_game is not None:
        columns = line.split(",")
    elif current_game is not None and line:
        current_game["rows"].append(line.split(","))
    elif line == "" and current_game is not None and current_game["rows"]:
        df_game = pd.DataFrame(current_game["rows"], columns=columns)
        # Attach metadata
        for key, value in current_game.items():
            if key not in ["rows"]:
                df_game[key] = value
        games_data.append(df_game)
        current_game = None
        columns = []

# Last block handling
if current_game is not None and current_game["rows"]:
    df_game = pd.DataFrame(current_game["rows"], columns=columns)
    for key, value in current_game.items():
        if key not in ["rows"]:
            df_game[key] = value
    games_data.append(df_game)

# Combine all sets into one dataframe
df_all = pd.concat(games_data, ignore_index=True)

# Make sure date is proper datetime and season extracted
df_all["date"] = pd.to_datetime(df_all["date"], errors="coerce")

# --- 2. Add season ---
def get_season(date):
    if pd.isna(date):
        return None
    year = date.year
    month = date.month
    if month >= 9:
        return f"{year}-{year+1}"
    else:
        return f"{year-1}-{year}"

df_all["season"] = df_all["date"].apply(get_season)

df_all["kampnr"] = df_all["venue"].astype(int)
df_all = df_all.drop(columns=["venue"])

df_all.drop(columns=["date", "PLAYER_NUM"], inplace=True)

df_all = df_all[df_all["PLAYER_NAME"] != "(team)"]


In [None]:
df_all.to_csv("match-statistics.csv", index=False)

df_all.groupby("kampnr")[""]

Unnamed: 0,PLAYER_NAME,GAMES_PLAYED,POINTS_SCORED_ON_SERVE,PT_SCORE_PERCENT,ACES,SERVE_ERRORS,SERVE_ERROR_PERCENT,SERVE_NET_POINTS,PASS_RATING,PASS_ATTEMPTS,...,NET_BLOCKS,DIG_ERRORS,DIGS,DIG_ERROR_PERCENT,FAULTS,set_number,opponent,set_score,season,kampnr
0,Lasse Nielsen,1,1,25.0%,1,0,0.0%,1,,0,...,0,0,4,0.0%,0,1,Gentofte Volley.2,25 - 19,2024-2025,141063
1,Kristian Krag,1,1,33.3%,0,0,0.0%,0,,0,...,0,0,1,0.0%,0,1,Gentofte Volley.2,25 - 19,2024-2025,141063
2,Boerme,1,2,50.0%,0,1,25.0%,-1,2.67,3,...,-1,1,2,33.3%,0,1,Gentofte Volley.2,25 - 19,2024-2025,141063
3,Bo,1,1,33.3%,0,0,0.0%,0,,0,...,1,0,0,,1,1,Gentofte Volley.2,25 - 19,2024-2025,141063
4,Vestbjerg,1,5,71.4%,0,0,0.0%,0,1.40,10,...,0,1,2,33.3%,0,1,Gentofte Volley.2,25 - 19,2024-2025,141063
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
480,Nicola,1,1,33.3%,0,0,0.0%,0,2.20,5,...,0,0,1,0.0%,0,3,Amager,23 - 25,2025-2026,144591
481,Gustav,1,3,60.0%,1,1,20.0%,0,,0,...,0,0,0,,0,3,Amager,23 - 25,2025-2026,144591
482,Martin,1,0,0.0%,0,1,50.0%,-1,,0,...,0,0,3,0.0%,0,3,Amager,23 - 25,2025-2026,144591
483,Alex,1,0,0.0%,0,0,0.0%,0,,0,...,0,0,2,0.0%,0,3,Amager,23 - 25,2025-2026,144591


In [253]:
import pandas as pd

with open(path_webreports, "r", encoding="utf-8") as f:
    lines = f.readlines()

agg_data = []
current_block = None
columns = []

for line in lines:
    line = line.strip()
    
    if line.startswith("AGGREGATE"):
        # Start a new aggregate block
        current_block = {"rows": []}
        columns = []
        
    elif line.startswith("PLAYER_NUM") and current_block is not None:
        # Header line
        columns = line.split(",")
        
    elif current_block is not None and line:
        # Data rows
        current_block["rows"].append(line.split(","))
        
    elif line == "" and current_block is not None and current_block["rows"]:
        # End of block → convert to DataFrame
        df_agg = pd.DataFrame(current_block["rows"], columns=columns)
        agg_data.append(df_agg)
        current_block = None
        columns = []

# Handle last block if file doesn't end with blank line
if current_block is not None and current_block["rows"]:
    df_agg = pd.DataFrame(current_block["rows"], columns=columns)
    agg_data.append(df_agg)



# Combine all aggregate blocks
df_agg = pd.concat(agg_data, ignore_index=True)
df_agg = df_agg[df_agg["PLAYER_NAME"] != "(team)"]
df_agg = df_agg.drop(columns=["PLAYER_NUM"])
df_agg

passers = ["Nico Lang", "Bosse", "Boerme", "Mikkel", ""]

df_agg["PASS_ATTEMPTS"] = pd.to_numeric(df_agg["PASS_ATTEMPTS"], errors="coerce")
df_passing = df_agg[df_agg["PASS_ATTEMPTS"] > 10]
df_passing = df_passing[["PLAYER_NAME", "GAMES_PLAYED", "PASS_RATING", "PASS_ATTEMPTS", "SERVE_RCV_ERRORS"]]
df_passing

Unnamed: 0,PLAYER_NAME,GAMES_PLAYED,PASS_RATING,PASS_ATTEMPTS,SERVE_RCV_ERRORS
4,Bosse,28,1.73,43,3
5,Boerme,22,1.86,84,7
7,Mikkel,13,1.71,65,5
10,Nicola,41,1.94,219,8
11,Vestbjerg,37,1.69,227,21
15,Alex,47,1.92,235,16
16,Hjorth,3,2.43,21,0


In [173]:
from pathlib import Path

parent_dir = Path("/Users/alexandercappelen/Documents/GitHub/frederiksberg-elite-stats/data/web-reports")

# list everything inside
for f in parent_dir.iterdir():
    print(f.name, "(dir)" if f.is_dir() else "(file)")


.DS_Store (file)
Frederiksberg-player_ranking-2025-10-2.csv (file)
WebReports-Export-2025-8-29.csv (file)


In [None]:
from pathlib import Path
import pandas as pd
import re
from datetime import datetime

# folder containing the files
folder = Path("data/web-reports")

# pattern: starts with "Frederiksberg-player_ranking" and ends with "-YYYY-MM-DD.csv"
pattern = re.compile(r"Frederiksberg-player_ranking-(\d{4}-\d{1,2}-\d{1,2})\.csv$")

files_with_dates = []
for f in folder.iterdir():
    if f.is_file():
        match = pattern.match(f.name)
        if match:
            date_str = match.group(1)
            # parse date
            file_date = datetime.strptime(date_str, "%Y-%m-%d")
            files_with_dates.append((file_date, f))

if not files_with_dates:
    raise FileNotFoundError("No matching CSV files found in the folder")

# pick the file with the latest date
latest_file = max(files_with_dates, key=lambda x: x[0])[1]
latest_file


PosixPath('data/web-reports/Frederiksberg-player_ranking-2025-10-2.csv')

In [None]:
# Finding passing stats for player for combined seasons (no seasonal data)

import pandas as pd
from io import StringIO

# Assume latest_file is a string with the path to your CSV
with open(latest_file, "r", encoding="utf-8") as f:
    lines = f.readlines()

# Find the start and end of the passing section
start_idx = None
end_idx = None
for i, line in enumerate(lines):
    if line.startswith("Player Number, Player Name, Games Played, Points Played, Got Aced/Game"):
        start_idx = i
    elif start_idx is not None and line.startswith("Player Number, Player Name, Games Played, Points Played, Kills/Game"):
        end_idx = i
        break

# Extract only the passing section
pass_lines = lines[start_idx:end_idx]

# Convert to DataFrame
pass_df = pd.read_csv(StringIO("".join(pass_lines)))

pass_df.columns = pass_df.columns.str.strip().str.replace(' ', '_')


pass_df = pass_df.rename(columns={
    "Player_Number" : "PLAYER_NUMBER",
    "Player_Name" : "player",
    "Games_Played" : "SETS_PLAYED",
    "Points_Played" : "POINTS_PLAYED",
    "Got_Aced/Game" : "GOT_ACED_PER_SET",
    "Passes/Game" : "PASSES_PER_SET",
    "Perfect_Passes/Game" : "PERFECT_PASSES_PER_SET",
    "Pass_Rating" : "average_pass_rating",
    "Total_Pass_Error_%" : "error_percentage",
    "3-pass_Percent": "perfect_percentage",
    "Pass_Attempts": "pass-attempt",
    "1-pass": "ONE_PASS",
    "2-pass": "two_pass",
    "3-pass": "three_pass",
    "Over_the_Net_Pass": "OVER_THE_NET_PASS",
    "Got_Aced": "GOT_ACED",
    "Total_Pass_Errors": "TOTAL_PASS_ERRORS",
    "First_Ball_Side_Out_%": "FIRST_BALL_SIDE_OUT_PERCENT"
})


pass_df = pass_df.drop(columns=["PLAYER_NUMBER"])

passers = ["Alex", "Boerme", "Bosse", "Frederik", "Mikkel", "Nico Lang", "Nicola", "Vestbjerg", "Hjorth"]

pass_df["player"] = (
    pass_df["player"]
    .astype(str)               # ensure strings
    .str.strip()               # remove leading/trailing spaces
    .str.replace('\xa0', ' ')  # remove non-breaking spaces
)

pass_df = pass_df[pass_df["player"].isin(passers)]

pass_df["positive_percentage"] = pd.to_numeric(pass_df["two_pass"], errors="coerce") + pd.to_numeric(pass_df["three_pass"], errors="coerce")
pass_df["positive_percentage"] = pass_df["two_pass"] / pd.to_numeric(pass_df["pass-attempt"], errors="coerce") * 100


pass_df = pass_df[["player", "pass-attempt", "error_percentage", "positive_percentage", "perfect_percentage", "average_pass_rating"]]

pass_df

pass_df.to_json("passing_data_total.json", orient="records", indent=2)


In [262]:
# Seasonal passing stats

# Based on match summaries:
# The match-statistics.csv file created above has seasonal data, but not the more granular passing stats (1-pass, 2-pass, 3-pass, etc.) - for this, i would have the export the individual match reports 
# --> Jeg eksporterer excel-fil for hver kamp, hvis jeg kan


import pandas as pd

path = "data/web-reports/Frederiksberg-player_ranking-gev-frb-141063.csv"


# Read the entire CSV file as raw text
with open(path, "r", encoding="utf-8") as f:
    lines = f.readlines()

# Identify where each section starts
section_starts = [i for i, line in enumerate(lines) if line.startswith("Player Number")]

# Example: extract the 'passes' section (3rd in your file)
# You can also loop through these sections and search for the right one by header keywords
section_index = 2  # (0 = first table, 1 = second, 2 = third = passes)
start = section_starts[section_index]
end = section_starts[section_index + 1] if section_index + 1 < len(section_starts) else len(lines)

# Extract just the lines for that section
section_lines = lines[start:end]

# Save to a temporary string and read with pandas
from io import StringIO
passes_df = pd.read_csv(StringIO("".join(section_lines)))

# Done!
passes_df


Unnamed: 0,Player Number,Player Name,Games Played,Points Played,Got Aced/Game,Passes/Game,Perfect Passes/Game,Pass Rating,Total Pass Error %,3-pass Percent,Pass Attempts,1-pass,2-pass,3-pass,Over the Net Pass,Got Aced,Total Pass Errors,First Ball Side Out %,Unnamed: 19
0,1,Lasse Nielsen,5.0,269.0,0.0,0.0,0.0,-,-,-,0,0,0,0,0,0,0,-,
1,3,Kristian Krag,5.0,73.0,0.0,0.0,0.0,-,-,-,0,0,0,0,0,0,0,0.0%,
2,5,Bosse,2.0,3.0,0.0,0.5,0.0,2.00,0.0%,0.0%,1,0,1,0,0,0,0,0.0%,
3,6,Boerme,5.0,95.0,0.2,3.4,1.6,1.94,5.6%,44.4%,18,4,3,8,2,1,1,22.2%,
4,7,Bo,5.0,47.0,0.2,0.0,0.0,0.00,100.0%,0.0%,1,0,0,0,0,1,1,-,
5,9,Soeren,3.0,13.0,0.0,0.0,0.0,-,-,-,0,0,0,0,0,0,0,-,
6,11,Nicola,2.0,34.0,0.0,7.5,1.5,1.80,0.0%,20.0%,15,6,6,3,0,0,0,46.7%,
7,12,Vestbjerg,4.0,79.0,0.8,6.0,1.5,1.65,11.1%,22.2%,27,5,10,6,3,3,3,24.0%,
8,15,Martin,5.0,87.0,0.0,0.0,0.0,-,-,-,0,0,0,0,0,0,0,0.0%,
9,16,Alex,5.0,47.0,0.0,6.2,2.2,2.05,0.0%,35.5%,31,5,12,11,3,0,0,29.4%,


In [420]:
# Player passing data - cumulative all time stats and seasonal stats

import glob
import re
import pandas as pd
from io import StringIO

all_files = glob.glob("data/web-reports/Frederiksberg-player_ranking-*.csv")
all_dfs = []

for file in all_files:
    with open(file, "r", encoding="utf-8") as f:
        lines = f.readlines()

    # Find the 'Passes' section
    section_starts = [i for i, line in enumerate(lines) if line.startswith("Player Number")]
    for i in section_starts:
        if "Passes/Game" in lines[i]:
            start = i
            break
    end = next((j for j in section_starts if j > start), len(lines))

    section_lines = lines[start:end]
    df = pd.read_csv(StringIO("".join(section_lines)))

    # Extract team and match info
    match = re.search(r"Frederiksberg-player_ranking-([a-z0-9]+)-([a-z0-9]+)-(\d+)\.csv", file)
    if match:
        team1, team2, match_number = match.groups()
        df["team1"] = team1
        df["team2"] = team2
        df["match_number"] = int(match_number)

    all_dfs.append(df)



# Combine everything into one big DataFrame
passes_all = pd.concat(all_dfs, ignore_index=True)
passes_all.columns = passes_all.columns.str.strip().str.replace(' ', '_')


passes_all = passes_all[passes_all["Player_Name"] != " (team)"]
passes_all = passes_all[passes_all["Player_Number"] != "Total"]
passers = ["Alex", "Boerme", "Bosse", "Frederik", "Mikkel", "Nico Lang", "Nicola", "Vestbjerg", "Hjorth"]
passes_all["Player_Name"] = passes_all["Player_Name"].str.strip()

passes_all = passes_all[passes_all["Player_Name"].isin(passers)]

#passes_all["match_number"] = passes_all["match_number"].astype(int)

passes_all = passes_all.rename(columns={
    "match_number": "kampnr"
})

passing_column_dictionary = {
    "Player_Number" : "PLAYER_NUMBER",
    "Player_Name" : "player",
    "Games_Played" : "SETS_PLAYED",
    "Points_Played" : "POINTS_PLAYED",
    "Got_Aced/Game" : "GOT_ACED_PER_SET",
    "Passes/Game" : "PASSES_PER_SET",
    "Perfect_Passes/Game" : "PERFECT_PASSES_PER_SET",
    "Pass_Rating" : "average_pass_rating",
    "Total_Pass_Error_%" : "error_percentage",
    "3-pass_Percent": "perfect_percentage",
    "Pass_Attempts": "pass-attempt",
    "1-pass": "one_pass",
    "2-pass": "two_pass",
    "3-pass": "three_pass",
    "Over_the_Net_Pass": "over_the_net_pass",
    "Got_Aced": "got_aced",
    "Total_Pass_Errors": "total_pass_errors",
    "First_Ball_Side_Out_%": "FIRST_BALL_SIDE_OUT_PERCENT"
}


passes_all = passes_all.rename(columns=passing_column_dictionary)

# Kombiner med match schedules og resultater
results = pd.read_csv("match-results-dvbf.csv")

results["date"] = results["date"].str.strip()
results["date"] = pd.to_datetime(results["date"])

results_played = results[results["resultat"].notna()]

passes_all = passes_all.merge(
    results_played[["season", "kampnr", "hjemme", "ude", "spillested", "played", "resultat", "result_for_frb", "date", "match_name", "opponent"]],
    on="kampnr",
    how="left"
)


# Add season based on match date
#def assign_season(date):
#    year = date.year
#    month = date.month
#    # If month is September (9) to December (12), season starts this year
#    if month >= 9:
#        return f"{year}/{year+1}"
#    else:  # January (1) to May (5), season started last year
#        return f"{year-1}/{year}"
#    
#passes_all["date"] = pd.to_datetime(passes_all["date"], errors="coerce")
#passes_all["season"] = passes_all["date"].apply(assign_season)


passes_all["positive_percentage"] = pd.to_numeric(passes_all["two_pass"], errors="coerce") + pd.to_numeric(passes_all["three_pass"], errors="coerce")
passes_all["positive_percentage"] = passes_all["two_pass"] / pd.to_numeric(passes_all["pass-attempt"], errors="coerce") * 100
#passes_all = passes_all[["player", "kampnr", "pass-attempt", "one_pass", "two_pass", "three_pass", "error_percentage", "positive_percentage","perfect_percentage", "average_pass_rating"]]
passes_all = passes_all[passes_all["pass-attempt"] > 0]


passes_all["positive_passes"] = passes_all["two_pass"] + passes_all["three_pass"]
passes_all["perfect_passes"] = passes_all["three_pass"]


passes_all["pct_error"] = passes_all["total_pass_errors"] / passes_all["pass-attempt"] * 100
passes_all["pct_perfect"] = passes_all["three_pass"] / passes_all["pass-attempt"] * 100
passes_all["pct_positive"] = (passes_all["two_pass"] + passes_all["three_pass"]) / passes_all["pass-attempt"] * 100
passes_all["average_pass_rating"] = pd.to_numeric(passes_all["average_pass_rating"], errors="coerce")


pass_statistics_2025_2026 = passes_all[passes_all["season"] == "2025/2026"]
pass_statistics_2025_2026 = pass_statistics_2025_2026[["player", "kampnr", "season", "pass-attempt", "got_aced", "over_the_net_pass", "total_pass_errors", "three_pass", "two_pass", "one_pass", "positive_passes", "perfect_passes", "average_pass_rating"]].copy()
pass_statistics_2024_2025 = passes_all[passes_all["season"] == "2024/2025"]
pass_statistics_2024_2025 = pass_statistics_2024_2025[["player", "kampnr", "season", "pass-attempt", "got_aced", "over_the_net_pass", "total_pass_errors", "three_pass", "two_pass", "one_pass", "positive_passes", "perfect_passes", "average_pass_rating"]].copy()
pass_statistics_all = passes_all[["player", "kampnr", "pass-attempt", "got_aced", "over_the_net_pass", "total_pass_errors", "three_pass", "two_pass", "one_pass", "positive_passes", "perfect_passes", "average_pass_rating"]].copy()


In [None]:
# Seasonal passing stats per player

agg_per_match_24_25 = pass_statistics_2024_2025.groupby(["player"]).agg({
    "pass-attempt": "sum",         # sum counts
    "total_pass_errors": "sum",
    "perfect_passes": "sum",
    "positive_passes": "sum",
    "average_pass_rating": "mean"          # take average for rating
}).reset_index()

agg_per_match_24_25["error_pct"] = agg_per_match_24_25["total_pass_errors"] / agg_per_match_24_25["pass-attempt"] * 100
agg_per_match_24_25["positive_pct"] = agg_per_match_24_25["positive_passes"] / agg_per_match_24_25["pass-attempt"] * 100
agg_per_match_24_25["perfect_pct"] = agg_per_match_24_25["perfect_passes"] / agg_per_match_24_25["pass-attempt"] * 100

agg_per_match_24_25[["player", "pass-attempt", "error_pct", "positive_pct", "perfect_pct", "average_pass_rating"]].to_json("2024_2025_player_passing.json", orient="records", indent=2)

Unnamed: 0,player,pass-attempt,total_pass_errors,perfect_passes,positive_passes,average_pass_rating,error_pct,positive_pct,perfect_pct
0,Alex,235,16,78,157,1.932727,6.808511,66.808511,33.191489
1,Boerme,84,7,26,56,1.658571,8.333333,66.666667,30.952381
2,Bosse,21,2,5,11,1.82,9.52381,52.380952,23.809524
3,Frederik,7,2,1,2,1.14,28.571429,28.571429,14.285714
4,Mikkel,65,5,9,42,1.6875,7.692308,64.615385,13.846154
5,Nico Lang,4,0,1,3,2.0,0.0,75.0,25.0
6,Nicola,200,7,58,138,1.908,3.5,69.0,29.0
7,Vestbjerg,227,21,50,135,1.616364,9.251101,59.471366,22.026432


In [410]:
# Seasonal passing stats per player

agg_per_match_25_26 = pass_statistics_2025_2026.groupby(["player"]).agg({
    "pass-attempt": "sum",         # sum counts
    "total_pass_errors": "sum",
    "perfect_passes": "sum",
    "positive_passes": "sum",
    "average_pass_rating": "mean"          # take average for rating
}).reset_index()

agg_per_match_25_26["error_pct"] = agg_per_match_25_26["total_pass_errors"] / agg_per_match_25_26["pass-attempt"] * 100
agg_per_match_25_26["positive_pct"] = agg_per_match_25_26["positive_passes"] / agg_per_match_25_26["pass-attempt"] * 100
agg_per_match_25_26["perfect_pct"] = agg_per_match_25_26["perfect_passes"] / agg_per_match_25_26["pass-attempt"] * 100

agg_per_match_25_26[["player", "pass-attempt", "error_pct", "positive_pct", "perfect_pct", "average_pass_rating"]].to_json("2025_2026_player_passing.json", orient="records", indent=2)

In [378]:
# All time passing stats


agg_per_match = pass_statistics_all.groupby(["player"]).agg({
    "pass-attempt": "sum",         # sum counts
    "total_pass_errors": "sum",
    "perfect_passes": "sum",
    "positive_passes": "sum",
    "average_pass_rating": "mean"          # take average for rating
}).reset_index()

agg_per_match["error_pct"] = agg_per_match["total_pass_errors"] / agg_per_match["pass-attempt"] * 100
agg_per_match["positive_pct"] = agg_per_match["positive_passes"] / agg_per_match["pass-attempt"] * 100
agg_per_match["perfect_pct"] = agg_per_match["perfect_passes"] / agg_per_match["pass-attempt"] * 100


agg_per_match[["player", "pass-attempt", "error_pct", "positive_pct", "perfect_pct", "average_pass_rating"]].to_json("all_time_player_passing.json", orient="records", indent=2)

In [None]:
# Player points data - cumulative all time stats and seasonal stats


import glob
import re
import pandas as pd
from io import StringIO

all_files = glob.glob("data/web-reports/Frederiksberg-player_ranking-*.csv")
all_dfs = []

for file in all_files:
    with open(file, "r", encoding="utf-8") as f:
        lines = f.readlines()

    # Find the 'offense' section
    section_starts = [i for i, line in enumerate(lines) if line.startswith("Player Number")]
    for i in section_starts:
        if "Total Earned" in lines[i]:
            start = i
            break
    end = next((j for j in section_starts if j > start), len(lines))

    section_lines = lines[start:end]
    df = pd.read_csv(StringIO("".join(section_lines)))

    # Extract team and match info
    match = re.search(r"Frederiksberg-player_ranking-([a-z0-9]+)-([a-z0-9]+)-(\d+)\.csv", file)
    if match:
        team1, team2, match_number = match.groups()
        df["team1"] = team1
        df["team2"] = team2
        df["match_number"] = int(match_number)

    all_dfs.append(df)



# Combine everything into one big DataFrame
points_all = pd.concat(all_dfs, ignore_index=True)
points_all.columns = points_all.columns.str.strip().str.replace(' ', '_')

points_all = points_all[points_all["Player_Name"] != " (team)"]
points_all = points_all[points_all["Player_Number"] != "Total"]
points_all["Player_Name"] = points_all["Player_Name"].str.strip()


points_all = points_all.rename(columns={
    "match_number": "kampnr"
})

points_column_dictionary = {
    "Player_Number" : "player_number",
    "Player_Name" : "player",
    "Games_Played" : "sets_played",
    "Points_Played" : "points_played",
    "Total_Earned" : "total_earned",
    "Total_Errors" : "total_errors",
}


points_all = points_all.rename(columns=points_column_dictionary)

# Kombiner med match schedules og resultater
results = pd.read_csv("match-results-dvbf.csv")

results["date"] = results["date"].str.strip()
results["date"] = pd.to_datetime(results["date"])

results_played = results[results["resultat"].notna()]

points_all = points_all.merge(
    results_played[["season", "kampnr", "hjemme", "ude", "spillested", "played", "resultat", "result_for_frb", "date", "match_name", "opponent"]],
    on="kampnr",
    how="left"
)


matches_per_player = points_all.groupby("player")["kampnr"].nunique().reset_index(name="matches_played")
points_all = points_all.merge(matches_per_player, on="player", how="left")

points_all = points_all[["player", "kampnr", "season", "matches_played", "sets_played", "points_played", "total_earned", "total_errors"]]

points_all.loc[points_all['player'] == 'Andreas Christensen', 'player'] = 'Ando'

points_2025_2026 = points_all[points_all["season"] == "2025/2026"].copy()
points_2024_2025 = points_all[points_all["season"] == "2024/2025"].copy()
points_all_time = points_all.copy()

In [525]:
# Seasonal points stats per player
points_2024_2025["points_played"] = pd.to_numeric(points_2024_2025["points_played"], errors="coerce")
points_2024_2025["sets_played"] = pd.to_numeric(points_2024_2025["sets_played"], errors="coerce")
points_2024_2025["total_earned"] = pd.to_numeric(points_2024_2025["total_earned"], errors="coerce")
points_2024_2025["total_errors"] = pd.to_numeric(points_2024_2025["total_errors"], errors="coerce")

points_agg_per_match_24_25 = points_2024_2025.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "total_earned": "sum",
    "total_errors": "sum",
}).reset_index()

points_agg_per_match_24_25["points_per_set"] = points_agg_per_match_24_25["total_earned"] / points_agg_per_match_24_25["sets_played"]
points_agg_per_match_24_25["errors_per_set"] = points_agg_per_match_24_25["total_errors"] / points_agg_per_match_24_25["sets_played"]

points_agg_per_match_24_25["points_per_match"] = points_agg_per_match_24_25["total_earned"] / points_agg_per_match_24_25["matches_played"]
points_agg_per_match_24_25["errors_per_match"] = points_agg_per_match_24_25["total_errors"] / points_agg_per_match_24_25["matches_played"]


points_agg_per_match_24_25[["player", "total_earned", "total_errors", "points_per_set", "errors_per_set", "points_per_match", "errors_per_match"]].to_json("2024_2025_top_scorer.json", orient="records", indent=2)

In [526]:
# Seasonal points stats per player
points_2025_2026["points_played"] = pd.to_numeric(points_2025_2026["points_played"], errors="coerce")
points_2025_2026["sets_played"] = pd.to_numeric(points_2025_2026["sets_played"], errors="coerce")
points_2025_2026["total_earned"] = pd.to_numeric(points_2025_2026["total_earned"], errors="coerce")
points_2025_2026["total_errors"] = pd.to_numeric(points_2025_2026["total_errors"], errors="coerce")

points_agg_per_match_25_26 = points_2025_2026.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "total_earned": "sum",
    "total_errors": "sum",
}).reset_index()

points_agg_per_match_25_26["points_per_set"] = points_agg_per_match_25_26["total_earned"] / points_agg_per_match_25_26["sets_played"]
points_agg_per_match_25_26["errors_per_set"] = points_agg_per_match_25_26["total_errors"] / points_agg_per_match_25_26["sets_played"]

points_agg_per_match_25_26["points_per_match"] = points_agg_per_match_25_26["total_earned"] / points_agg_per_match_25_26["matches_played"]
points_agg_per_match_25_26["errors_per_match"] = points_agg_per_match_25_26["total_errors"] / points_agg_per_match_25_26["matches_played"]


points_agg_per_match_25_26[["player", "total_earned", "total_errors", "points_per_set", "errors_per_set", "points_per_match", "errors_per_match"]].to_json("2025_2026_top_scorer.json", orient="records", indent=2)

In [527]:
# Seasonal points stats per player
points_2025_2026["points_played"] = pd.to_numeric(points_2025_2026["points_played"], errors="coerce")
points_2025_2026["sets_played"] = pd.to_numeric(points_2025_2026["sets_played"], errors="coerce")
points_2025_2026["total_earned"] = pd.to_numeric(points_2025_2026["total_earned"], errors="coerce")
points_2025_2026["total_errors"] = pd.to_numeric(points_2025_2026["total_errors"], errors="coerce")

points_agg_per_match_25_26 = points_2025_2026.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "total_earned": "sum",
    "total_errors": "sum",
}).reset_index()

points_agg_per_match_25_26["points_per_set"] = points_agg_per_match_25_26["total_earned"] / points_agg_per_match_25_26["sets_played"]
points_agg_per_match_25_26["errors_per_set"] = points_agg_per_match_25_26["total_errors"] / points_agg_per_match_25_26["sets_played"]

points_agg_per_match_25_26["points_per_match"] = points_agg_per_match_25_26["total_earned"] / points_agg_per_match_25_26["matches_played"]
points_agg_per_match_25_26["errors_per_match"] = points_agg_per_match_25_26["total_errors"] / points_agg_per_match_25_26["matches_played"]


points_agg_per_match_25_26[["player", "total_earned", "total_errors", "points_per_set", "errors_per_set", "points_per_match", "errors_per_match"]].to_json("2025_2026_top_scorer.json", orient="records", indent=2)

In [510]:


points_all_time["points_played"] = pd.to_numeric(points_all_time["points_played"], errors="coerce")
points_all_time["sets_played"] = pd.to_numeric(points_all_time["sets_played"], errors="coerce")
points_all_time["total_earned"] = pd.to_numeric(points_all_time["total_earned"], errors="coerce")
points_all_time["total_errors"] = pd.to_numeric(points_all_time["total_errors"], errors="coerce")


points_agg_all_time = points_all_time.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "total_earned": "sum",
    "total_errors": "sum",
}).reset_index()

points_agg_all_time["points_per_set"] = points_agg_all_time["total_earned"] / points_agg_all_time["sets_played"]
points_agg_all_time["errors_per_set"] = points_agg_all_time["total_errors"] / points_agg_all_time["sets_played"]

points_agg_all_time["points_per_match"] = points_agg_all_time["total_earned"] / points_agg_all_time["matches_played"]
points_agg_all_time["errors_per_match"] = points_agg_all_time["total_errors"] / points_agg_all_time["matches_played"]


points_agg_all_time[["player", "total_earned", "total_errors", "points_per_set", "errors_per_set", "points_per_match", "errors_per_match"]].to_json("all_time_top_scorer.json", orient="records", indent=2)

In [528]:
# Player blocking data - cumulative all time stats and seasonal stats

import glob
import re
import pandas as pd
from io import StringIO

all_files = glob.glob("data/web-reports/Frederiksberg-player_ranking-*.csv")
all_dfs = []

for file in all_files:
    with open(file, "r", encoding="utf-8") as f:
        lines = f.readlines()

    # Find the 'offense' section
    section_starts = [i for i, line in enumerate(lines) if line.startswith("Player Number")]
    for i in section_starts:
        if "Blocks Still" in lines[i]:
            start = i
            break
    end = next((j for j in section_starts if j > start), len(lines))

    section_lines = lines[start:end]
    df = pd.read_csv(StringIO("".join(section_lines)))

    # Extract team and match info
    match = re.search(r"Frederiksberg-player_ranking-([a-z0-9]+)-([a-z0-9]+)-(\d+)\.csv", file)
    if match:
        team1, team2, match_number = match.groups()
        df["team1"] = team1
        df["team2"] = team2
        df["match_number"] = int(match_number)

    all_dfs.append(df)



# Combine everything into one big DataFrame
blocks_all = pd.concat(all_dfs, ignore_index=True)
blocks_all.columns = blocks_all.columns.str.strip().str.replace(' ', '_')

blocks_all = blocks_all[blocks_all["Player_Name"] != " (team)"]
blocks_all = blocks_all[blocks_all["Player_Number"] != "Total"]
blocks_all["Player_Name"] = blocks_all["Player_Name"].str.strip()




blocks_all = blocks_all.rename(columns={
    "match_number": "kampnr"
})

block_column_dictionary = {
    "Player_Number" : "player_number",
    "Player_Name" : "player",
    "Games_Played" : "sets_played",
    "Points_Played" : "points_played",
    "Blocks" : "blocks",
    "Blocks_Still_in_Play" : "blocks_in_play",
}


blocks_all = blocks_all.rename(columns=block_column_dictionary)

# Kombiner med match schedules og resultater
results = pd.read_csv("match-results-dvbf.csv")

results["date"] = results["date"].str.strip()
results["date"] = pd.to_datetime(results["date"])

results_played = results[results["resultat"].notna()]

blocks_all = blocks_all.merge(
    results_played[["season", "kampnr", "hjemme", "ude", "spillested", "played", "resultat", "result_for_frb", "date", "match_name", "opponent"]],
    on="kampnr",
    how="left"
)


matches_per_player = blocks_all.groupby("player")["kampnr"].nunique().reset_index(name="matches_played")
blocks_all = blocks_all.merge(matches_per_player, on="player", how="left")

blocks_all = blocks_all[["player", "kampnr", "season", "matches_played", "sets_played", "points_played", "blocks", "blocks_in_play"]]

blocks_all.loc[blocks_all['player'] == 'Andreas Christensen', 'player'] = 'Ando'

blocks_2025_2026 = blocks_all[blocks_all["season"] == "2025/2026"].copy()
blocks_2024_2025 = blocks_all[blocks_all["season"] == "2024/2025"].copy()
blocks_all_time = blocks_all.copy()

In [529]:
blocks_2025_2026["points_played"] = pd.to_numeric(blocks_2025_2026["points_played"], errors="coerce")
blocks_2025_2026["sets_played"] = pd.to_numeric(blocks_2025_2026["sets_played"], errors="coerce")
blocks_2025_2026["blocks"] = pd.to_numeric(blocks_2025_2026["blocks"], errors="coerce")
blocks_2025_2026["blocks_in_play"] = pd.to_numeric(blocks_2025_2026["blocks_in_play"], errors="coerce")

blocks_agg_2025_2026 = blocks_2025_2026.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "blocks": "sum",
    "blocks_in_play": "sum",
}).reset_index()

blocks_agg_2025_2026["blocks_per_set"] = blocks_agg_2025_2026["blocks"] / blocks_agg_2025_2026["sets_played"]
blocks_agg_2025_2026["blocks_in_play_per_set"] = blocks_agg_2025_2026["blocks_in_play"] / blocks_agg_2025_2026["sets_played"]

blocks_agg_2025_2026[["player", "blocks", "blocks_in_play", "blocks_per_set", "blocks_in_play_per_set"]].to_json("2025_2026_top_blocker.json", orient="records", indent=2)

In [530]:
blocks_2024_2025["points_played"] = pd.to_numeric(blocks_2024_2025["points_played"], errors="coerce")
blocks_2024_2025["sets_played"] = pd.to_numeric(blocks_2024_2025["sets_played"], errors="coerce")
blocks_2024_2025["blocks"] = pd.to_numeric(blocks_2024_2025["blocks"], errors="coerce")
blocks_2024_2025["blocks_in_play"] = pd.to_numeric(blocks_2024_2025["blocks_in_play"], errors="coerce")

blocks_agg_2024_2025 = blocks_2024_2025.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "blocks": "sum",
    "blocks_in_play": "sum",
}).reset_index()

blocks_agg_2024_2025["blocks_per_set"] = blocks_agg_2024_2025["blocks"] / blocks_agg_2024_2025["sets_played"]
blocks_agg_2024_2025["blocks_in_play_per_set"] = blocks_agg_2024_2025["blocks_in_play"] / blocks_agg_2024_2025["sets_played"]

blocks_agg_2024_2025[["player", "blocks", "blocks_in_play", "blocks_per_set", "blocks_in_play_per_set"]].to_json("2024_2025_top_blocker.json", orient="records", indent=2)

In [531]:
blocks_all_time["points_played"] = pd.to_numeric(blocks_all_time["points_played"], errors="coerce")
blocks_all_time["sets_played"] = pd.to_numeric(blocks_all_time["sets_played"], errors="coerce")
blocks_all_time["blocks"] = pd.to_numeric(blocks_all_time["blocks"], errors="coerce")
blocks_all_time["blocks_in_play"] = pd.to_numeric(blocks_all_time["blocks_in_play"], errors="coerce")

blocks_agg_all_time = blocks_all_time.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "blocks": "sum",
    "blocks_in_play": "sum",
}).reset_index()

blocks_agg_all_time["blocks_per_set"] = blocks_agg_all_time["blocks"] / blocks_agg_all_time["sets_played"]
blocks_agg_all_time["blocks_in_play_per_set"] = blocks_agg_all_time["blocks_in_play"] / blocks_agg_all_time["sets_played"]

blocks_agg_all_time[["player", "blocks", "blocks_in_play", "blocks_per_set", "blocks_in_play_per_set"]].to_json("all_time_top_blocker.json", orient="records", indent=2)

In [536]:
# Player digging stats


import glob
import re
import pandas as pd
from io import StringIO

all_files = glob.glob("data/web-reports/Frederiksberg-player_ranking-*.csv")
all_dfs = []

for file in all_files:
    with open(file, "r", encoding="utf-8") as f:
        lines = f.readlines()

    # Find the 'offense' section
    section_starts = [i for i, line in enumerate(lines) if line.startswith("Player Number")]
    for i in section_starts:
        if "Dig Attempts/Game" in lines[i]:
            start = i
            break
    end = next((j for j in section_starts if j > start), len(lines))

    section_lines = lines[start:end]
    df = pd.read_csv(StringIO("".join(section_lines)))

    # Extract team and match info
    match = re.search(r"Frederiksberg-player_ranking-([a-z0-9]+)-([a-z0-9]+)-(\d+)\.csv", file)
    if match:
        team1, team2, match_number = match.groups()
        df["team1"] = team1
        df["team2"] = team2
        df["match_number"] = int(match_number)

    all_dfs.append(df)

# Combine everything into one big DataFrame
digs_all = pd.concat(all_dfs, ignore_index=True)
digs_all.columns = digs_all.columns.str.strip().str.replace(' ', '_')

digs_all = digs_all[digs_all["Player_Name"] != " (team)"]
digs_all = digs_all[digs_all["Player_Number"] != "Total"]
digs_all["Player_Name"] = digs_all["Player_Name"].str.strip()

digs_all = digs_all.rename(columns={
    "match_number": "kampnr"
})

dig_column_dictionary = {
    "Player_Number" : "player_number",
    "Player_Name" : "player",
    "Games_Played" : "sets_played",
    "Points_Played" : "points_played",
    "Digs" : "digs",
}


digs_all = digs_all.rename(columns=dig_column_dictionary)

# Kombiner med match schedules og resultater
results = pd.read_csv("match-results-dvbf.csv")

results["date"] = results["date"].str.strip()
results["date"] = pd.to_datetime(results["date"])

results_played = results[results["resultat"].notna()]

digs_all = digs_all.merge(
    results_played[["season", "kampnr", "hjemme", "ude", "spillested", "played", "resultat", "result_for_frb", "date", "match_name", "opponent"]],
    on="kampnr",
    how="left"
)

matches_per_player = digs_all.groupby("player")["kampnr"].nunique().reset_index(name="matches_played")
digs_all = digs_all.merge(matches_per_player, on="player", how="left")

digs_all = digs_all[["player", "kampnr", "season", "matches_played", "sets_played", "points_played", "digs"]]

digs_all.loc[digs_all['player'] == 'Andreas Christensen', 'player'] = 'Ando'

digs_2025_2026 = digs_all[digs_all["season"] == "2025/2026"].copy()
digs_2024_2025 = digs_all[digs_all["season"] == "2024/2025"].copy()
digs_all_time = digs_all.copy()

In [537]:
digs_2025_2026["points_played"] = pd.to_numeric(digs_2025_2026["points_played"], errors="coerce")
digs_2025_2026["sets_played"] = pd.to_numeric(digs_2025_2026["sets_played"], errors="coerce")
digs_2025_2026["digs"] = pd.to_numeric(digs_2025_2026["digs"], errors="coerce")

digs_agg_2025_2026 = digs_2025_2026.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "digs": "sum",
}).reset_index()

digs_agg_2025_2026["digs_per_set"] = digs_agg_2025_2026["digs"] / digs_agg_2025_2026["sets_played"]
digs_agg_2025_2026["digs_per_match"] = digs_agg_2025_2026["digs"] / digs_agg_2025_2026["matches_played"]

digs_agg_2025_2026[["player", "digs", "digs_per_set", "digs_per_match"]].to_json("2025_2026_top_digger.json", orient="records", indent=2)

In [538]:
digs_2024_2025["points_played"] = pd.to_numeric(digs_2024_2025["points_played"], errors="coerce")
digs_2024_2025["sets_played"] = pd.to_numeric(digs_2024_2025["sets_played"], errors="coerce")
digs_2024_2025["digs"] = pd.to_numeric(digs_2024_2025["digs"], errors="coerce")

digs_agg_2024_2025 = digs_2024_2025.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "digs": "sum",
}).reset_index()

digs_agg_2024_2025["digs_per_set"] = digs_agg_2024_2025["digs"] / digs_agg_2024_2025["sets_played"]
digs_agg_2024_2025["digs_per_match"] = digs_agg_2024_2025["digs"] / digs_agg_2024_2025["matches_played"]

digs_agg_2024_2025[["player", "digs", "digs_per_set", "digs_per_match"]].to_json("2024_2025_top_digger.json", orient="records", indent=2)

In [539]:
digs_all_time["points_played"] = pd.to_numeric(digs_all_time["points_played"], errors="coerce")
digs_all_time["sets_played"] = pd.to_numeric(digs_all_time["sets_played"], errors="coerce")
digs_all_time["digs"] = pd.to_numeric(digs_all_time["digs"], errors="coerce")

digs_agg_all_time = digs_all_time.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "digs": "sum",
}).reset_index()

digs_agg_all_time["digs_per_set"] = digs_agg_all_time["digs"] / digs_agg_all_time["sets_played"]
digs_agg_all_time["digs_per_match"] = digs_agg_all_time["digs"] / digs_agg_all_time["matches_played"]

digs_agg_all_time[["player", "digs", "digs_per_set", "digs_per_match"]].to_json("all_time_top_digger.json", orient="records", indent=2)

In [532]:
# Player server stats

import glob
import re
import pandas as pd
from io import StringIO

all_files = glob.glob("data/web-reports/Frederiksberg-player_ranking-*.csv")
all_dfs = []

for file in all_files:
    with open(file, "r", encoding="utf-8") as f:
        lines = f.readlines()

    # Find the 'offense' section
    section_starts = [i for i, line in enumerate(lines) if line.startswith("Player Number")]
    for i in section_starts:
        if "Serve Attempts/Game" in lines[i]:
            start = i
            break
    end = next((j for j in section_starts if j > start), len(lines))

    section_lines = lines[start:end]
    df = pd.read_csv(StringIO("".join(section_lines)))

    # Extract team and match info
    match = re.search(r"Frederiksberg-player_ranking-([a-z0-9]+)-([a-z0-9]+)-(\d+)\.csv", file)
    if match:
        team1, team2, match_number = match.groups()
        df["team1"] = team1
        df["team2"] = team2
        df["kampnr"] = int(match_number)

    all_dfs.append(df)

# Combine everything into one big DataFrame
serve_all = pd.concat(all_dfs, ignore_index=True)
serve_all.columns = serve_all.columns.str.strip().str.replace(' ', '_')

serve_all = serve_all[serve_all["Player_Name"] != " (team)"]
serve_all = serve_all[serve_all["Player_Number"] != "Total"]
serve_all["Player_Name"] = serve_all["Player_Name"].str.strip()

serve_column_dictionary = {
    "Player_Number" : "player_number",
    "Player_Name" : "player",
    "Games_Played" : "sets_played",
    "Points_Played" : "points_played",
    "Serve_Attempts" : "serve_attempts",
    "Serve_Rating" : "average_serve_rating",
    "1-Serve" : "one_serve",
    "2-Serve" : "two_serve",
    "3-Serve" : "three_serve",
    "Aces" : "aces",
    "Serve_Errors" : "serve_errors",
    "Points_Won_on_Serve" : "points_won_on_serve",
}

serve_all = serve_all.rename(columns=serve_column_dictionary)

# Kombiner med match schedules og resultater
results = pd.read_csv("match-results-dvbf.csv")

results["date"] = results["date"].str.strip()
results["date"] = pd.to_datetime(results["date"])

results_played = results[results["resultat"].notna()]

serve_all = serve_all.merge(
    results_played[["season", "kampnr", "hjemme", "ude", "spillested", "played", "resultat", "result_for_frb", "date", "match_name", "opponent"]],
    on="kampnr",
    how="left"
)

matches_per_player = serve_all.groupby("player")["kampnr"].nunique().reset_index(name="matches_played")
serve_all = serve_all.merge(matches_per_player, on="player", how="left")

serve_all = serve_all[["player", "kampnr", "season", "matches_played", "sets_played", "points_played", "serve_attempts", "average_serve_rating", "one_serve", "two_serve", "three_serve", "aces", "serve_errors", "points_won_on_serve"]]

serve_all.loc[serve_all['player'] == 'Andreas Christensen', 'player'] = 'Ando'

serve_2025_2026 = serve_all[serve_all["season"] == "2025/2026"].copy()
serve_2024_2025 = serve_all[serve_all["season"] == "2024/2025"].copy()
serve_all_time = serve_all.copy()

In [533]:

non_numeric_cols = ["player", "season"]

serve_2025_2026[serve_2025_2026.columns.difference(non_numeric_cols)] = serve_2025_2026[serve_2025_2026.columns.difference(non_numeric_cols)].apply(pd.to_numeric, errors="coerce")


serve_agg_2025_2026 = serve_2025_2026.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "serve_attempts": "sum",
    "average_serve_rating": "mean",
    "one_serve": "sum",
    "two_serve": "sum",
    "three_serve": "sum",
    "aces": "sum",
    "serve_errors": "sum",
    "points_won_on_serve": "sum",
}).reset_index()

serve_agg_2025_2026["serve_efficiency"] = (serve_agg_2025_2026["aces"] - serve_agg_2025_2026["serve_errors"]) / serve_agg_2025_2026["serve_attempts"]

serve_agg_2025_2026["ace_percentage"] = serve_agg_2025_2026["aces"] / serve_agg_2025_2026["serve_attempts"] * 100
serve_agg_2025_2026["error_percentage"] = serve_agg_2025_2026["serve_errors"] / serve_agg_2025_2026["serve_attempts"] * 100

serve_agg_2025_2026["points_won_percentage"] = serve_agg_2025_2026["points_won_on_serve"] / serve_agg_2025_2026["serve_attempts"] * 100

serve_agg_2025_2026[["player", "average_serve_rating", "aces", "points_won_on_serve", "serve_efficiency", "ace_percentage", "error_percentage", "points_won_percentage"]].to_json("2025_2026_top_server.json", orient="records", indent=2)

In [534]:

non_numeric_cols = ["player", "season"]

serve_2024_2025[serve_2024_2025.columns.difference(non_numeric_cols)] = serve_2024_2025[serve_2024_2025.columns.difference(non_numeric_cols)].apply(pd.to_numeric, errors="coerce")


serve_agg_2024_2025 = serve_2024_2025.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "serve_attempts": "sum",
    "average_serve_rating": "mean",
    "one_serve": "sum",
    "two_serve": "sum",
    "three_serve": "sum",
    "aces": "sum",
    "serve_errors": "sum",
    "points_won_on_serve": "sum",
}).reset_index()

serve_agg_2024_2025["serve_efficiency"] = (serve_agg_2024_2025["aces"] - serve_agg_2024_2025["serve_errors"]) / serve_agg_2024_2025["serve_attempts"]

serve_agg_2024_2025["ace_percentage"] = serve_agg_2024_2025["aces"] / serve_agg_2024_2025["serve_attempts"] * 100
serve_agg_2024_2025["error_percentage"] = serve_agg_2024_2025["serve_errors"] / serve_agg_2024_2025["serve_attempts"] * 100

serve_agg_2024_2025["points_won_percentage"] = serve_agg_2024_2025["points_won_on_serve"] / serve_agg_2024_2025["serve_attempts"] * 100

serve_agg_2024_2025[["player", "average_serve_rating", "aces", "points_won_on_serve", "serve_efficiency", "ace_percentage", "error_percentage", "points_won_percentage"]].to_json("2024_2025_top_server.json", orient="records", indent=2)

In [535]:

non_numeric_cols = ["player", "season"]

serve_all_time[serve_all_time.columns.difference(non_numeric_cols)] = serve_all_time[serve_all_time.columns.difference(non_numeric_cols)].apply(pd.to_numeric, errors="coerce")


serve_agg_all_time = serve_all_time.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "serve_attempts": "sum",
    "average_serve_rating": "mean",
    "one_serve": "sum",
    "two_serve": "sum",
    "three_serve": "sum",
    "aces": "sum",
    "serve_errors": "sum",
    "points_won_on_serve": "sum",
}).reset_index()

serve_agg_all_time["serve_efficiency"] = (serve_agg_all_time["aces"] - serve_agg_all_time["serve_errors"]) / serve_agg_all_time["serve_attempts"]

serve_agg_all_time["ace_percentage"] = serve_agg_all_time["aces"] / serve_agg_all_time["serve_attempts"] * 100
serve_agg_all_time["error_percentage"] = serve_agg_all_time["serve_errors"] / serve_agg_all_time["serve_attempts"] * 100

serve_agg_all_time["points_won_percentage"] = serve_agg_all_time["points_won_on_serve"] / serve_agg_all_time["serve_attempts"] * 100

serve_agg_all_time[["player", "average_serve_rating", "aces", "points_won_on_serve", "serve_efficiency", "ace_percentage", "error_percentage", "points_won_percentage"]].to_json("all_time_top_server.json", orient="records", indent=2)

In [542]:
# Player offense stats

import glob
import re
import pandas as pd
from io import StringIO

all_files = glob.glob("data/web-reports/Frederiksberg-player_ranking-*.csv")
all_dfs = []

for file in all_files:
    with open(file, "r", encoding="utf-8") as f:
        lines = f.readlines()

    # Find the 'offense' section
    section_starts = [i for i, line in enumerate(lines) if line.startswith("Player Number")]
    for i in section_starts:
        if "Kills/Game" in lines[i]:
            start = i
            break
    end = next((j for j in section_starts if j > start), len(lines))

    section_lines = lines[start:end]
    df = pd.read_csv(StringIO("".join(section_lines)))

    # Extract team and match info
    match = re.search(r"Frederiksberg-player_ranking-([a-z0-9]+)-([a-z0-9]+)-(\d+)\.csv", file)
    if match:
        team1, team2, match_number = match.groups()
        df["team1"] = team1
        df["team2"] = team2
        df["kampnr"] = int(match_number)

    all_dfs.append(df)

# Combine everything into one big DataFrame
offense_all = pd.concat(all_dfs, ignore_index=True)
offense_all.columns = offense_all.columns.str.strip().str.replace(' ', '_')

offense_all = offense_all[offense_all["Player_Name"] != " (team)"]
offense_all = offense_all[offense_all["Player_Number"] != "Total"]
offense_all["Player_Name"] = offense_all["Player_Name"].str.strip()

offense_column_dictionary = {
    "Player_Number" : "player_number",
    "Player_Name" : "player",
    "Games_Played" : "sets_played",
    "Points_Played" : "points_played",
    "Attack_Attempts" : "attack_attempts",
    "Attack_Errors" : "attack_errors",
    "Kills" : "total_kills",
}


offense_all = offense_all.rename(columns=offense_column_dictionary)

# Kombiner med match schedules og resultater
results = pd.read_csv("match-results-dvbf.csv")

results["date"] = results["date"].str.strip()
results["date"] = pd.to_datetime(results["date"])

results_played = results[results["resultat"].notna()]

offense_all = offense_all.merge(
    results_played[["season", "kampnr", "hjemme", "ude", "spillested", "played", "resultat", "result_for_frb", "date", "match_name", "opponent"]],
    on="kampnr",
    how="left"
)

matches_per_player = offense_all.groupby("player")["kampnr"].nunique().reset_index(name="matches_played")
offense_all = offense_all.merge(matches_per_player, on="player", how="left")

offense_all = offense_all[["player", "kampnr", "season", "matches_played", "sets_played", "points_played", "attack_attempts", "attack_errors", "total_kills"]]

offense_all.loc[offense_all['player'] == 'Andreas Christensen', 'player'] = 'Ando'

offense_2025_2026 = offense_all[offense_all["season"] == "2025/2026"].copy()
offense_2024_2025 = offense_all[offense_all["season"] == "2024/2025"].copy()
offense_all_time = offense_all.copy()
offense_all_time

Unnamed: 0,player,kampnr,season,matches_played,sets_played,points_played,attack_attempts,attack_errors,total_kills
0,Lasse Nielsen,141144,2024/2025,11,3,172,7,3,3
1,Kristian Krag,141144,2024/2025,11,3,26,6,3,2
2,Bosse,141144,2024/2025,9,3,8,0,0,0
3,Soeren,141144,2024/2025,10,3,33,5,0,3
4,Nicola,141144,2024/2025,11,3,68,24,3,10
...,...,...,...,...,...,...,...,...,...
122,Vestbjerg,141099,2024/2025,11,5,102,33,3,10
123,Gustav,141099,2024/2025,8,4,34,12,1,9
124,Mads Q,141099,2024/2025,5,5,130,3,0,1
125,Martin,141099,2024/2025,10,2,20,13,6,5


In [544]:

non_numeric_cols = ["player", "season"]

offense_2025_2026[offense_2025_2026.columns.difference(non_numeric_cols)] = offense_2025_2026[offense_2025_2026.columns.difference(non_numeric_cols)].apply(pd.to_numeric, errors="coerce")


offense_agg_2025_2026 = offense_2025_2026.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "attack_attempts" : "sum",
    "attack_errors": "sum",
    "total_kills": "sum",
}).reset_index()


offense_agg_2025_2026["error_percentage"] = offense_agg_2025_2026["attack_errors"] / offense_agg_2025_2026["attack_attempts"] * 100

offense_agg_2025_2026["kill_percentage"] = offense_agg_2025_2026["total_kills"] / offense_agg_2025_2026["attack_attempts"] * 100

offense_agg_2025_2026["hitting_efficiency"] = (offense_agg_2025_2026["total_kills"] - offense_agg_2025_2026["attack_errors"]) / offense_agg_2025_2026["attack_attempts"]

offense_agg_2025_2026[["player", "attack_attempts", "total_kills", "attack_errors", "kill_percentage", "error_percentage", "hitting_efficiency"]].to_json("2025_2026_top_offense.json", orient="records", indent=2)

In [545]:

non_numeric_cols = ["player", "season"]

offense_2024_2025[offense_2024_2025.columns.difference(non_numeric_cols)] = offense_2024_2025[offense_2024_2025.columns.difference(non_numeric_cols)].apply(pd.to_numeric, errors="coerce")

offense_agg_2024_2025 = offense_2024_2025.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "attack_attempts" : "sum",
    "attack_errors": "sum",
    "total_kills": "sum",
}).reset_index()

offense_agg_2024_2025["error_percentage"] = offense_agg_2024_2025["attack_errors"] / offense_agg_2024_2025["attack_attempts"] * 100

offense_agg_2024_2025["kill_percentage"] = offense_agg_2024_2025["total_kills"] / offense_agg_2024_2025["attack_attempts"] * 100

offense_agg_2024_2025["hitting_efficiency"] = (offense_agg_2024_2025["total_kills"] - offense_agg_2024_2025["attack_errors"]) / offense_agg_2024_2025["attack_attempts"]

offense_agg_2024_2025[["player", "attack_attempts", "total_kills", "attack_errors", "kill_percentage", "error_percentage", "hitting_efficiency"]].to_json("2024_2025_top_offense.json", orient="records", indent=2)

In [547]:

non_numeric_cols = ["player", "season"]

offense_all_time[offense_all_time.columns.difference(non_numeric_cols)] = offense_all_time[offense_all_time.columns.difference(non_numeric_cols)].apply(pd.to_numeric, errors="coerce")

offense_agg_all_time = offense_all_time.groupby(["player"]).agg({
    "points_played": "sum",         # sum counts
    "sets_played": "sum",
    "matches_played": "max",
    "attack_attempts" : "sum",
    "attack_errors": "sum",
    "total_kills": "sum",
}).reset_index()

offense_agg_all_time["error_pct"] = offense_agg_all_time["attack_errors"] / offense_agg_all_time["attack_attempts"] * 100

offense_agg_all_time["kill_percentage"] = offense_agg_all_time["total_kills"] / offense_agg_all_time["attack_attempts"] * 100

offense_agg_all_time["hitting_efficiency"] = (offense_agg_all_time["total_kills"] - offense_agg_all_time["attack_errors"]) / offense_agg_all_time["attack_attempts"]

offense_agg_all_time[["player", "attack_attempts", "total_kills", "attack_errors", "kill_percentage", "error_pct", "hitting_efficiency"]].to_json("all_time_top_offense.json", orient="records", indent=2)

In [333]:
passes_all.columns

Index(['PLAYER_NUMBER', 'player', 'SETS_PLAYED', 'POINTS_PLAYED',
       'GOT_ACED_PER_SET', 'PASSES_PER_SET', 'PERFECT_PASSES_PER_SET',
       'average_pass_rating', 'error_percentage', 'perfect_percentage',
       'pass-attempt', 'one_pass', 'two_pass', 'three_pass',
       'OVER_THE_NET_PASS', 'GOT_ACED', 'TOTAL_PASS_ERRORS',
       'FIRST_BALL_SIDE_OUT_PERCENT', '', 'team1', 'team2', 'kampnr', 'season',
       'hjemme', 'ude', 'spillested', 'played', 'resultat', 'result_for_frb',
       'date', 'match_name', 'opponent', 'positive_percentage',
       'positive_passes', 'perfect_passes'],
      dtype='object')

In [323]:
results

Unnamed: 0,season,kampnr,dato_tid,hjemme,ude,spillested,resultat,result_for_frb,date,match_name,opponent,played
0,2025/26,141063,29-09-24 kl. 10:00,Gentofte Volley.2,Frederiksberg Volley,Kildeskovshal 1 1,3 - 2,lost,2024-09-29,gv2-frb,Gentofte Volley.2,yes
1,2025/26,141067,06-10-24 kl. 18:00,Frederiksberg Volley,KV 61,Bülowsvejhallen 1,3 - 0,won,2024-10-06,frb-kv,KV 61,yes
2,2025/26,141074,13-10-24 kl. 14:00,Frederiksberg Volley,Amager Volley.2,Kedelhallen 1,0 - 3,lost,2024-10-13,frb-avk,Amager Volley.2,yes
3,2025/26,141078,27-10-24 kl. 11:30,VLI,Frederiksberg Volley,Kedelhallen 1,3 - 0,lost,2024-10-27,vli-frb,VLI,yes
4,2025/26,141089,02-11-24 kl. 18:00,Frederiksberg Volley,Team Køge,Bülowsvejhallen 1,3 - 0,won,2024-11-02,frb-tkv,Team Køge,yes
5,2025/26,141094,16-11-24 kl. 14:30,DTU Volley,Frederiksberg Volley,Engelsborghallen 1,2 - 3,won,2024-11-16,dtu-frb,DTU Volley,yes
6,2025/26,141099,23-11-24 kl. 14:00,Frederiksberg Volley,Hvidovre VK.2,Bülowsvejhallen 1,2 - 3,lost,2024-11-23,frb-hv2,Hvidovre VK.2,yes
7,2025/26,141104,01-12-24 kl. 14:00,Grøndal EV,Frederiksberg Volley,Hillerødgades Sportssal 1,0 - 3,won,2024-12-01,grø-frb,Grøndal EV,yes
8,2025/26,141108,08-12-24 kl. 18:00,Frederiksberg Volley,Gentofte Volley.2,Bülowsvejhallen 1,3 - 1,won,2024-12-08,frb-gv2,Gentofte Volley.2,yes
9,2025/26,141119,12-01-25 kl. 12:00,Amager Volley.2,Frederiksberg Volley,Sundbyhal 1 1,3 - 2,lost,2025-01-12,avk-frb,Amager Volley.2,yes


In [309]:
results = pd.read_csv("match-results-dvbf.csv")

results["date"] = results["date"].str.strip()
results["date"] = pd.to_datetime(results["date"])


results_played = results[results["resultat"].notna()]

df_merged = passes_all.merge(
    results_played[["kampnr", "hjemme", "ude", "spillested", "played", "resultat", "result_for_frb", "date", "match_name", "opponent"]],
    on="kampnr",
    how="left"
)

df_merged

Unnamed: 0,Player_Number,Player_Name,Games_Played,Points_Played,Got_Aced/Game,Passes/Game,Perfect_Passes/Game,Pass_Rating,Total_Pass_Error_%,3-pass_Percent,...,kampnr,hjemme,ude,spillested,played,resultat,result_for_frb,date,match_name,opponent
0,5,Bosse,3,8,0.0,0.0,0.0,-,-,-,...,141144,Hvidovre VK.2,Frederiksberg Volley,Frihedens Idrætscenter 1,yes,0 - 3,won,2025-03-29,hv2-frb,Hvidovre VK.2
1,11,Nicola,3,68,0.3,5.3,2.3,2.09,5.9%,41.2%,...,141144,Hvidovre VK.2,Frederiksberg Volley,Frihedens Idrætscenter 1,yes,0 - 3,won,2025-03-29,hv2-frb,Hvidovre VK.2
2,12,Vestbjerg,3,69,0.7,6.7,1.0,1.52,9.1%,13.6%,...,141144,Hvidovre VK.2,Frederiksberg Volley,Frihedens Idrætscenter 1,yes,0 - 3,won,2025-03-29,hv2-frb,Hvidovre VK.2
3,16,Alex,3,30,0.3,5.3,2.7,2.18,5.9%,47.1%,...,141144,Hvidovre VK.2,Frederiksberg Volley,Frihedens Idrætscenter 1,yes,0 - 3,won,2025-03-29,hv2-frb,Hvidovre VK.2
4,5,Bosse,2,3,0.0,0.5,0.0,2.00,0.0%,0.0%,...,141063,Gentofte Volley.2,Frederiksberg Volley,Kildeskovshal 1 1,yes,3 - 2,lost,2024-09-29,gv2-frb,Gentofte Volley.2
5,6,Boerme,5,95,0.2,3.4,1.6,1.94,5.6%,44.4%,...,141063,Gentofte Volley.2,Frederiksberg Volley,Kildeskovshal 1 1,yes,3 - 2,lost,2024-09-29,gv2-frb,Gentofte Volley.2
6,11,Nicola,2,34,0.0,7.5,1.5,1.80,0.0%,20.0%,...,141063,Gentofte Volley.2,Frederiksberg Volley,Kildeskovshal 1 1,yes,3 - 2,lost,2024-09-29,gv2-frb,Gentofte Volley.2
7,12,Vestbjerg,4,79,0.8,6.0,1.5,1.65,11.1%,22.2%,...,141063,Gentofte Volley.2,Frederiksberg Volley,Kildeskovshal 1 1,yes,3 - 2,lost,2024-09-29,gv2-frb,Gentofte Volley.2
8,16,Alex,5,47,0.0,6.2,2.2,2.05,0.0%,35.5%,...,141063,Gentofte Volley.2,Frederiksberg Volley,Kildeskovshal 1 1,yes,3 - 2,lost,2024-09-29,gv2-frb,Gentofte Volley.2
9,2,Nico Lang,1,1,0.0,0.0,0.0,-,-,-,...,141089,Frederiksberg Volley,Team Køge,Bülowsvejhallen 1,yes,3 - 0,won,2024-11-02,frb-tkv,Team Køge


In [None]:

with open(path_webreports, "r", encoding="utf-8") as f:
    lines = f.readlines()

games_data = []
current_game = None
columns = []

for line in lines:
    line = line.strip()
    if line.startswith("GAME"):
        # Extract set number from GAME1, GAME2, etc.
        game_id, game_info = line.split(":", 1)
        set_number = int(re.search(r"GAME(\d+)", game_id).group(1))
        
        current_game = {"section": "set", "set_number": set_number}
        
        # Parse metadata (venue, opponent, date, score)
        info_parts = game_info.strip().split("-")
        venue = info_parts[0]
        opponent = info_parts[1]
        rest = "-".join(info_parts[2:])
        date_part, score = rest.rsplit("(", 1)
        date = date_part.strip()
        score = score.replace(")", "").strip()
        
        current_game["venue"] = venue
        current_game["opponent"] = opponent
        current_game["date"] = date
        current_game["set_score"] = score
        current_game["rows"] = []
        columns = []
    elif line.startswith("MATCH"):
        current_game = {"section": "match", "rows": []}
        columns = []
    elif line.startswith("AGGREGATE"):
        current_game = {"section": "aggregate", "rows": []}
        columns = []
    elif "PLAYER_NUM" in line:
        columns = line.split(",")
    elif current_game is not None and line:
        current_game["rows"].append(line.split(","))
    elif line == "" and current_game is not None and current_game["rows"]:
        df_game = pd.DataFrame(current_game["rows"], columns=columns)
        for key, value in current_game.items():
            if key not in ["rows"]:
                df_game[key] = value
        games_data.append(df_game)
        current_game = None
        columns = []

# Last block handling
if current_game is not None and current_game["rows"]:
    df_game = pd.DataFrame(current_game["rows"], columns=columns)
    for key, value in current_game.items():
        if key not in ["rows"]:
            df_game[key] = value
    games_data.append(df_game)

# Combine all sets into one dataframe
df_all = pd.concat(games_data, ignore_index=True)

# Make sure date is proper datetime and season extracted
df_all["date"] = pd.to_datetime(df_all["date"], errors="coerce")

# --- Split into three datasets ---
df_sets = df_all[df_all["section"] == "set"].copy()
df_matches = df_all[df_all["section"] == "match"].copy()
df_aggregate = df_all[df_all["section"] == "aggregate"].copy()

# Drop helper columns if you want
df_all.drop(columns=["section"], inplace=True)

# Optional: convert numeric columns
numeric_cols = ["POINTS_SCORED_ON_SERVE", "ACES", "SERVE_ERRORS", 
                "COMBINED_KILLS", "COMBINED_KILL_ERRORS", "ATTACK_NET_POINTS",
                "COMBINED_KILL_ATTEMPTS", "BLOCKS", "BLOCK_ERRORS", "DIGS",
                "DIG_ERROR_PERCENT", "FAULTS", "ASSISTS", "BALL_HANDLING",
                "SETTING_ERRORS", "NET_BLOCKS", "HITS_STILL_IN_PLAY", "PASS_ATTEMPTS"]

for df in [df_sets, df_matches, df_aggregate]:
    for col in numeric_cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')


In [137]:
df_sets
df_matches

df_aggregate.drop(columns=["PLAYER_NUM", "SETTING_ERRORS", "FAULTS", "ASSISTS", "DIG_ERROR_PERCENT", "GAMES_PLAYED", "NET_BLOCKS", "DIG_ERRORS", "BALL_HANDLING", "set_number", "venue", "opponent", "date", "set_score", "section"], inplace=True)
df_aggregate

Unnamed: 0,PLAYER_NAME,POINTS_SCORED_ON_SERVE,PT_SCORE_PERCENT,ACES,SERVE_ERRORS,SERVE_ERROR_PERCENT,SERVE_NET_POINTS,PASS_RATING,PASS_ATTEMPTS,SERVE_RCV_ERRORS,COMBINED_KILLS,COMBINED_KILL_ERRORS,ATTACK_NET_POINTS,HITTING_EFFICIENCY,COMBINED_KILL_PERCENT,HITS_STILL_IN_PLAY,COMBINED_KILL_ATTEMPTS,BLOCKS,BLOCK_ERRORS,DIGS
626,Lasse Nielsen,77,44.3%,12,17,9.8%,-5,1.0,1,0,22,8,14,0.25,39.3%,26,56,28,10,79
627,Nico Lang,7,63.6%,0,1,9.1%,-1,2.0,4,0,0,0,0,0.0,0.0%,1,1,0,0,1
628,Kristian Krag,49,48.5%,5,13,12.9%,-8,2.0,5,0,37,11,26,0.329,46.8%,31,79,13,10,21
629,Andreas Christensen,11,35.5%,2,3,9.7%,-1,,0,0,14,2,12,0.571,66.7%,5,21,8,2,5
630,Bosse,28,47.5%,8,6,10.2%,2,1.73,43,3,9,5,4,0.148,33.3%,13,27,7,1,17
631,Boerme,18,34.6%,1,18,34.6%,-17,1.86,84,7,46,20,26,0.22,39.0%,52,118,5,5,26
632,Bo,32,38.6%,7,11,13.3%,-4,1.0,2,1,14,7,7,0.189,37.8%,16,37,21,8,14
633,Mikkel,19,50.0%,1,2,5.3%,-1,1.71,65,5,28,14,14,0.219,43.8%,22,64,2,0,11
634,Soeren,36,40.9%,8,10,11.4%,-2,,0,0,61,32,29,0.153,32.1%,97,190,21,11,36
635,Frederik,6,75.0%,1,0,0.0%,1,1.14,7,2,1,1,0,0.0,25.0%,2,4,0,0,2


In [124]:
df_aggregate

Unnamed: 0,PLAYER_NAME,POINTS_SCORED_ON_SERVE,PT_SCORE_PERCENT,ACES,SERVE_ERRORS,SERVE_ERROR_PERCENT,SERVE_NET_POINTS,PASS_RATING,PASS_ATTEMPTS,SERVE_RCV_ERRORS,...,COMBINED_KILL_ATTEMPTS,ASSISTS,SETTING_ERRORS,BLOCKS,BLOCK_ERRORS,NET_BLOCKS,DIG_ERRORS,DIGS,FAULTS,section
626,Lasse Nielsen,77,44.3%,12,17,9.8%,-5,1.0,1,0,...,56,333,6,28,10,18,29,79,13,aggregate
627,Nico Lang,7,63.6%,0,1,9.1%,-1,2.0,4,0,...,1,0,0,0,0,0,2,1,0,aggregate
628,Kristian Krag,49,48.5%,5,13,12.9%,-8,2.0,5,0,...,79,3,1,13,10,3,1,21,5,aggregate
629,Andreas Christensen,11,35.5%,2,3,9.7%,-1,,0,0,...,21,2,0,8,2,6,1,5,1,aggregate
630,Bosse,28,47.5%,8,6,10.2%,2,1.73,43,3,...,27,5,1,7,1,6,3,17,0,aggregate
631,Boerme,18,34.6%,1,18,34.6%,-17,1.86,84,7,...,118,3,0,5,5,0,5,26,0,aggregate
632,Bo,32,38.6%,7,11,13.3%,-4,1.0,2,1,...,37,2,0,21,8,13,6,14,5,aggregate
633,Mikkel,19,50.0%,1,2,5.3%,-1,1.71,65,5,...,64,0,0,2,0,2,1,11,0,aggregate
634,Soeren,36,40.9%,8,10,11.4%,-2,,0,0,...,190,0,0,21,11,10,8,36,3,aggregate
635,Frederik,6,75.0%,1,0,0.0%,1,1.14,7,2,...,4,0,0,0,0,0,0,2,1,aggregate


In [82]:

# Convert numeric columns
numeric_cols = ["POINTS_SCORED_ON_SERVE", "COMBINED_KILLS", "COMBINED_KILL_ATTEMPTS", "COMBINED_KILL_ERRORS", "HITTING_EFFICIENCY", "ACES", "PASS_ATTEMPTS",
                "PASS_RATING", "BLOCKS", "DIGS"]  # extend as needed
for col in numeric_cols:
    if col in df_all.columns:
        df_all[col] = pd.to_numeric(df_all[col], errors="coerce")

# Extract season from date
df_all["date"] = pd.to_datetime(df_all["date"], errors="coerce")
#df_all["season"] = df_all["date"].dt.year
df_all

Unnamed: 0,PLAYER_NUM,PLAYER_NAME,GAMES_PLAYED,POINTS_SCORED_ON_SERVE,PT_SCORE_PERCENT,ACES,SERVE_ERRORS,SERVE_ERROR_PERCENT,SERVE_NET_POINTS,PASS_RATING,...,DIG_ERRORS,DIGS,DIG_ERROR_PERCENT,FAULTS,set_number,venue,opponent,date,set_score,season
0,1,Lasse Nielsen,1,1,25.0%,1,0,0.0%,1,,...,0,4,0.0%,0,1,Kildeskovshallen,Gentofte Volley.2,2025-02-04,25 - 19,2024-2025
1,3,Kristian Krag,1,1,33.3%,0,0,0.0%,0,,...,0,1,0.0%,0,1,Kildeskovshallen,Gentofte Volley.2,2025-02-04,25 - 19,2024-2025
2,6,Boerme,1,2,50.0%,0,1,25.0%,-1,2.67,...,1,2,33.3%,0,1,Kildeskovshallen,Gentofte Volley.2,2025-02-04,25 - 19,2024-2025
3,7,Bo,1,1,33.3%,0,0,0.0%,0,,...,0,0,,1,1,Kildeskovshallen,Gentofte Volley.2,2025-02-04,25 - 19,2024-2025
4,12,Vestbjerg,1,5,71.4%,0,0,0.0%,0,1.40,...,1,2,33.3%,0,1,Kildeskovshallen,Gentofte Volley.2,2025-02-04,25 - 19,2024-2025
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
481,13,Gustav,1,3,60.0%,1,1,20.0%,0,,...,0,0,,0,3,Amager,Amager,2025-09-28,23 - 25,2025-2026
482,15,Martin,1,0,0.0%,0,1,50.0%,-1,,...,0,3,0.0%,0,3,Amager,Amager,2025-09-28,23 - 25,2025-2026
483,16,Alex,1,0,0.0%,0,0,0.0%,0,,...,0,2,0.0%,0,3,Amager,Amager,2025-09-28,23 - 25,2025-2026
484,17,Hjorth,1,3,50.0%,1,1,16.7%,0,2.33,...,0,6,0.0%,0,3,Amager,Amager,2025-09-28,23 - 25,2025-2026


In [61]:
df_all.columns

Index(['PLAYER_NUM', 'PLAYER_NAME', 'GAMES_PLAYED', 'POINTS_SCORED_ON_SERVE',
       'PT_SCORE_PERCENT', 'ACES', 'SERVE_ERRORS', 'SERVE_ERROR_PERCENT',
       'SERVE_NET_POINTS', 'PASS_RATING', 'PASS_ATTEMPTS', 'SERVE_RCV_ERRORS',
       'COMBINED_KILLS', 'COMBINED_KILL_ERRORS', 'ATTACK_NET_POINTS',
       'HITTING_EFFICIENCY', 'COMBINED_KILL_PERCENT', 'HITS_STILL_IN_PLAY',
       'COMBINED_KILL_ATTEMPTS', 'ASSISTS', 'BALL_HANDLING', 'SETTING_ERRORS',
       'BLOCKS', 'BLOCK_ERRORS', 'NET_BLOCKS', 'DIG_ERRORS', 'DIGS',
       'DIG_ERROR_PERCENT', 'FAULTS', 'set_number', 'venue', 'opponent',
       'date', 'set_score', 'season'],
      dtype='object')

In [46]:
player_points = df_all.groupby("PLAYER_NAME")["ACES"].sum().reset_index()
player_points = player_points.sort_values(by="ACES", ascending=False)
print(player_points)


            PLAYER_NAME  ACES
10        Lasse Nielsen    12
16               Soeren     8
5                 Bosse     8
3                    Bo     7
9         Kristian Krag     5
7                Gustav     5
15               Nicola     4
12               Martin     3
11               Mads Q     3
17            Vestbjerg     2
8                Hjorth     2
2   Andreas Christensen     2
1                  Alex     1
6              Frederik     1
13               Mikkel     1
4                Boerme     1
14            Nico Lang     0
0                (team)     0


In [48]:
match_df = df_all[(df_all["date"] == "2025-09-28")]
player_points = match_df.groupby("PLAYER_NAME")["HITTING_EFFICIENCY"].sum().reset_index()
player_points = player_points.sort_values(by="HITTING_EFFICIENCY", ascending=False)
print(player_points)


           PLAYER_NAME  HITTING_EFFICIENCY
2  Andreas Christensen               2.333
6               Hjorth               1.025
5               Gustav               0.867
8               Nicola               0.800
7               Martin               0.710
0               (team)               0.000
1                 Alex               0.000
3                   Bo               0.000
4                Bosse               0.000


In [67]:
df_all

Unnamed: 0,PLAYER_NUM,PLAYER_NAME,GAMES_PLAYED,POINTS_SCORED_ON_SERVE,PT_SCORE_PERCENT,ACES,SERVE_ERRORS,SERVE_ERROR_PERCENT,SERVE_NET_POINTS,PASS_RATING,...,DIG_ERRORS,DIGS,DIG_ERROR_PERCENT,FAULTS,set_number,venue,opponent,date,set_score,season
0,1,Lasse Nielsen,1,1,25.0%,1,0,0.0%,1,,...,0,4,0.0%,0,1,Kildeskovshallen,Gentofte Volley.2,2025-02-04,25 - 19,2024-2025
1,3,Kristian Krag,1,1,33.3%,0,0,0.0%,0,,...,0,1,0.0%,0,1,Kildeskovshallen,Gentofte Volley.2,2025-02-04,25 - 19,2024-2025
2,6,Boerme,1,2,50.0%,0,1,25.0%,-1,2.67,...,1,2,33.3%,0,1,Kildeskovshallen,Gentofte Volley.2,2025-02-04,25 - 19,2024-2025
3,7,Bo,1,1,33.3%,0,0,0.0%,0,,...,0,0,,1,1,Kildeskovshallen,Gentofte Volley.2,2025-02-04,25 - 19,2024-2025
4,12,Vestbjerg,1,5,71.4%,0,0,0.0%,0,1.40,...,1,2,33.3%,0,1,Kildeskovshallen,Gentofte Volley.2,2025-02-04,25 - 19,2024-2025
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
481,13,Gustav,1,3,60.0%,1,1,20.0%,0,,...,0,0,,0,3,Amager,Amager,2025-09-28,23 - 25,2025-2026
482,15,Martin,1,0,0.0%,0,1,50.0%,-1,,...,0,3,0.0%,0,3,Amager,Amager,2025-09-28,23 - 25,2025-2026
483,16,Alex,1,0,0.0%,0,0,0.0%,0,,...,0,2,0.0%,0,3,Amager,Amager,2025-09-28,23 - 25,2025-2026
484,17,Hjorth,1,3,50.0%,1,1,16.7%,0,2.33,...,0,6,0.0%,0,3,Amager,Amager,2025-09-28,23 - 25,2025-2026


In [80]:
#kill_effic_df = 

match_df = df_all[(df_all["date"] == "2025-01-16")]

kill_efficiency_game = (
    df_all.groupby(["date", "PLAYER_NAME"])
    .apply(lambda g: (g["COMBINED_KILLS"].sum() - g["COMBINED_KILL_ERRORS"].sum())
                     / g["COMBINED_KILL_ATTEMPTS"].sum()
           if g["COMBINED_KILL_ATTEMPTS"].sum() > 0 else 0)
    .reset_index(name="KILL_EFFICIENCY")
)

kill_efficiency_game["KILL_EFFICIENCY"] = kill_efficiency_game["KILL_EFFICIENCY"].round(3)

kill_efficiency_game[(kill_efficiency_game["date"] == "2025-01-16")].sort_values(by="KILL_EFFICIENCY", ascending=False)

  .apply(lambda g: (g["COMBINED_KILLS"].sum() - g["COMBINED_KILL_ERRORS"].sum())


Unnamed: 0,date,PLAYER_NAME,KILL_EFFICIENCY
13,2025-01-16,(team),1.0
20,2025-01-16,Mads Q,1.0
24,2025-01-16,Vestbjerg,1.0
15,2025-01-16,Bo,0.5
16,2025-01-16,Boerme,0.25
22,2025-01-16,Nicola,0.171
21,2025-01-16,Martin,0.125
14,2025-01-16,Alex,0.0
17,2025-01-16,Gustav,0.0
18,2025-01-16,Kristian Krag,0.0


In [77]:
agg = (
    df_all.groupby(["season", "date", "PLAYER_NAME"])
    [["COMBINED_KILLS", "COMBINED_KILL_ERRORS", "COMBINED_KILL_ATTEMPTS"]]
    .sum()
    .reset_index()
)

agg["KILL_EFFICIENCY"] = (
    (agg["COMBINED_KILLS"] - agg["COMBINED_KILL_ERRORS"])
    / agg["COMBINED_KILL_ATTEMPTS"]
)

# format nicely
agg["KILL_EFFICIENCY"] = agg["KILL_EFFICIENCY"].round(3)

kill_efficiency_game = agg[["season", "date", "PLAYER_NAME", "KILL_EFFICIENCY"]]
kill_efficiency_game

Unnamed: 0,season,date,PLAYER_NAME,KILL_EFFICIENCY
0,2024-2025,2024-12-24,(team),
1,2024-2025,2024-12-24,Alex,
2,2024-2025,2024-12-24,Andreas Christensen,
3,2024-2025,2024-12-24,Boerme,0.273
4,2024-2025,2024-12-24,Bosse,
...,...,...,...,...
135,2025-2026,2025-09-28,Bosse,
136,2025-2026,2025-09-28,Gustav,0.273
137,2025-2026,2025-09-28,Hjorth,0.333
138,2025-2026,2025-09-28,Martin,0.229


In [26]:
# Offensive column stats:
# 'PLAYER_NAME', 'GAMES_PLAYED', 'COMBINED_KILLS', 'COMBINED_KILL_ERRORS', 'ATTACK_NET_POINTS', 'HITTING_EFFICIENCY', 'COMBINED_KILL_PERCENT', 'HITS_STILL_IN_PLAY' 'COMBINED_KILL_ATTEMPTS', 'opponent', 'date', 'score', 'season'

match_df

Unnamed: 0,PLAYER_NUM,PLAYER_NAME,GAMES_PLAYED,POINTS_SCORED_ON_SERVE,PT_SCORE_PERCENT,ACES,SERVE_ERRORS,SERVE_ERROR_PERCENT,SERVE_NET_POINTS,PASS_RATING,...,NET_BLOCKS,DIG_ERRORS,DIGS,DIG_ERROR_PERCENT,FAULTS,venue,opponent,date,score,season
193,1.0,Lasse Nielsen,1,1,33.3%,0,0,0.0%,0,,...,1,2,2,50.0%,2,Amager,Amager,2025-01-16,21 - 25,2025
194,6.0,Boerme,1,0,0.0%,0,1,50.0%,-1,2.0,...,1,0,0,,0,Amager,Amager,2025-01-16,21 - 25,2025
195,7.0,Bo,1,0,0.0%,0,0,0.0%,0,,...,1,0,0,,0,Amager,Amager,2025-01-16,21 - 25,2025
196,9.0,Soeren,1,2,66.7%,0,0,0.0%,0,,...,0,0,1,0.0%,0,Amager,Amager,2025-01-16,21 - 25,2025
197,11.0,Nicola,1,1,33.3%,0,0,0.0%,0,1.72,...,0,0,1,0.0%,0,Amager,Amager,2025-01-16,21 - 25,2025
198,12.0,Vestbjerg,1,0,,0,0,,0,0.88,...,0,0,1,0.0%,0,Amager,Amager,2025-01-16,21 - 25,2025
199,13.0,Gustav,1,6,75.0%,1,1,12.5%,0,,...,1,0,0,,1,Amager,Amager,2025-01-16,21 - 25,2025
200,14.0,Mads Q,1,0,0.0%,0,0,0.0%,0,,...,0,0,0,,0,Amager,Amager,2025-01-16,21 - 25,2025
201,15.0,Martin,1,0,,0,0,,0,,...,-1,0,0,,0,Amager,Amager,2025-01-16,21 - 25,2025
202,16.0,Alex,1,0,,0,0,,0,1.42,...,0,1,0,100.0%,0,Amager,Amager,2025-01-16,21 - 25,2025


In [None]:
# From aggregate:

# PLAYER_NAME
# GAMES_PLAYED 

# POINTS_SCORED_ON_SERVE
# PT_SCORE_PERCENT
# ACES
# SERVE_ERRORS
# SERVE_ERROR_PERCENT
# SERVE_NET_POINTS

# PASS_RATING
# PASS_ATTEMPTS
# SERVE_RCV_ERRORS

# COMBINED_KILLS
# COMBINED_KILL_ERRORS
# ATTACK_NET_POINTS
# HITTING_EFFICIENCY
# COMBINED_KILL_PERCENT
# HITS_STILL_IN_PLAY
# COMBINED_KILL_ATTEMPTS

# ASSISTS
# ALL_HANDLING
# SETTING_ERRORS

# BLOCKS
# BLOCK_ERRORS
# NET_BLOCKS

# DIG_ERRORS
# DIGS
# DIG_ERROR_PERCENT

# FAULTS

In [54]:
pass_rating_season = (df_all
    .groupby(['season','PLAYER_NAME'])
    .apply(lambda g: (g['PASS_RATING'] * g['PASS_ATTEMPTS']).sum() / g['PASS_ATTEMPTS'].sum()
                     if g['PASS_ATTEMPTS'].sum() > 0 else 0)
    .reset_index(name='PASS_RATING_SEASON'))

  .apply(lambda g: (g['PASS_RATING'] * g['PASS_ATTEMPTS']).sum() / g['PASS_ATTEMPTS'].sum()


In [56]:
passers = ["Alex", "Boerme", "Bosse", "Frederik", "Mikkel", "Nico Lang", "Nicola", "Vestbjerg", "Hjorth"]

pass_rating_season

pass_rating_season[pass_rating_season["PLAYER_NAME"].isin(passers)]


Unnamed: 0,season,PLAYER_NAME,PASS_RATING_SEASON
1,2024-2025,Alex,1.916936
4,2024-2025,Boerme,1.863452
5,2024-2025,Bosse,1.669524
6,2024-2025,Frederik,1.14
12,2024-2025,Mikkel,1.709231
13,2024-2025,Nico Lang,2.0
14,2024-2025,Nicola,1.9301
16,2024-2025,Vestbjerg,1.690837
18,2025-2026,Alex,0.0
21,2025-2026,Bosse,1.795455
