In [7]:
import pandas as pd

path_webreports = "data/web-reports/WebReports-Export-2025-8-29.csv"

In [8]:
#df = pd.read_csv("path_webreports")


# Read the entire CSV as raw lines
with open(path_webreports, "r", encoding="utf-8") as f:
    lines = f.readlines()

games_data = []
current_game = None
columns = []

for line in lines:
    line = line.strip()
    if line.startswith("GAME"):
        # New game header
        current_game = {}
        parts = line.split(":")
        game_info = parts[1].strip()  # "Kildeskovshallen-Gentofte Volley.2-2025-Feb-04 (25 - 19)"
        venue, rest = game_info.split("-", 1)
        team, rest2 = rest.split("-", 1)
        date_part, score = rest2.rsplit("(", 1)
        date = date_part.strip()
        score = score.replace(")", "").strip()
        current_game["venue"] = venue
        current_game["opponent"] = team
        current_game["date"] = date
        current_game["score"] = score
        current_game["rows"] = []
        columns = []
    elif line.startswith("PLAYER_NUM") and current_game is not None:
        # This line contains column headers
        columns = line.split(",")
    elif current_game is not None and line:
        # Data row
        current_game["rows"].append(line.split(","))
    elif line == "" and current_game is not None and current_game["rows"]:
        # End of current game table
        df_game = pd.DataFrame(current_game["rows"], columns=columns)
        df_game["venue"] = current_game["venue"]
        df_game["opponent"] = current_game["opponent"]
        df_game["date"] = current_game["date"]
        df_game["score"] = current_game["score"]
        games_data.append(df_game)
        current_game = None
        columns = []

# If last game hasn't been added
if current_game is not None and current_game["rows"]:
    df_game = pd.DataFrame(current_game["rows"], columns=columns)
    df_game["venue"] = current_game["venue"]
    df_game["opponent"] = current_game["opponent"]
    df_game["date"] = current_game["date"]
    df_game["score"] = current_game["score"]
    games_data.append(df_game)

# Combine all games into a single dataframe
df_all = pd.concat(games_data, ignore_index=True)

In [9]:
df_all

Unnamed: 0,PLAYER_NUM,PLAYER_NAME,GAMES_PLAYED,POINTS_SCORED_ON_SERVE,PT_SCORE_PERCENT,ACES,SERVE_ERRORS,SERVE_ERROR_PERCENT,SERVE_NET_POINTS,PASS_RATING,...,BLOCK_ERRORS,NET_BLOCKS,DIG_ERRORS,DIGS,DIG_ERROR_PERCENT,FAULTS,venue,opponent,date,score
0,1,Lasse Nielsen,1,1,25.0%,1,0,0.0%,1,,...,1,0,0,4,0.0%,0,Kildeskovshallen,Gentofte Volley.2,2025-Feb-04,25 - 19
1,3,Kristian Krag,1,1,33.3%,0,0,0.0%,0,,...,0,0,0,1,0.0%,0,Kildeskovshallen,Gentofte Volley.2,2025-Feb-04,25 - 19
2,6,Boerme,1,2,50.0%,0,1,25.0%,-1,2.67,...,1,-1,1,2,33.3%,0,Kildeskovshallen,Gentofte Volley.2,2025-Feb-04,25 - 19
3,7,Bo,1,1,33.3%,0,0,0.0%,0,,...,0,1,0,0,,1,Kildeskovshallen,Gentofte Volley.2,2025-Feb-04,25 - 19
4,12,Vestbjerg,1,5,71.4%,0,0,0.0%,0,1.40,...,0,0,1,2,33.3%,0,Kildeskovshallen,Gentofte Volley.2,2025-Feb-04,25 - 19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
481,13,Gustav,1,3,60.0%,1,1,20.0%,0,,...,0,0,0,0,,0,Amager,Amager,2025-Sep-28,23 - 25
482,15,Martin,1,0,0.0%,0,1,50.0%,-1,,...,0,0,0,3,0.0%,0,Amager,Amager,2025-Sep-28,23 - 25
483,16,Alex,1,0,0.0%,0,0,0.0%,0,,...,0,0,0,2,0.0%,0,Amager,Amager,2025-Sep-28,23 - 25
484,17,Hjorth,1,3,50.0%,1,1,16.7%,0,2.33,...,0,0,0,6,0.0%,0,Amager,Amager,2025-Sep-28,23 - 25


In [10]:

# Convert numeric columns
numeric_cols = ["POINTS_SCORED_ON_SERVE", "COMBINED_KILLS", "HITTING_EFFICIENCY",
                "PASS_RATING", "BLOCKS", "DIGS"]  # extend as needed
for col in numeric_cols:
    if col in df_all.columns:
        df_all[col] = pd.to_numeric(df_all[col], errors="coerce")

# Extract season from date
df_all["date"] = pd.to_datetime(df_all["date"], errors="coerce")
df_all["season"] = df_all["date"].dt.year
df_all

Unnamed: 0,PLAYER_NUM,PLAYER_NAME,GAMES_PLAYED,POINTS_SCORED_ON_SERVE,PT_SCORE_PERCENT,ACES,SERVE_ERRORS,SERVE_ERROR_PERCENT,SERVE_NET_POINTS,PASS_RATING,...,NET_BLOCKS,DIG_ERRORS,DIGS,DIG_ERROR_PERCENT,FAULTS,venue,opponent,date,score,season
0,1,Lasse Nielsen,1,1,25.0%,1,0,0.0%,1,,...,0,0,4,0.0%,0,Kildeskovshallen,Gentofte Volley.2,2025-02-04,25 - 19,2025
1,3,Kristian Krag,1,1,33.3%,0,0,0.0%,0,,...,0,0,1,0.0%,0,Kildeskovshallen,Gentofte Volley.2,2025-02-04,25 - 19,2025
2,6,Boerme,1,2,50.0%,0,1,25.0%,-1,2.67,...,-1,1,2,33.3%,0,Kildeskovshallen,Gentofte Volley.2,2025-02-04,25 - 19,2025
3,7,Bo,1,1,33.3%,0,0,0.0%,0,,...,1,0,0,,1,Kildeskovshallen,Gentofte Volley.2,2025-02-04,25 - 19,2025
4,12,Vestbjerg,1,5,71.4%,0,0,0.0%,0,1.40,...,0,1,2,33.3%,0,Kildeskovshallen,Gentofte Volley.2,2025-02-04,25 - 19,2025
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
481,13,Gustav,1,3,60.0%,1,1,20.0%,0,,...,0,0,0,,0,Amager,Amager,2025-09-28,23 - 25,2025
482,15,Martin,1,0,0.0%,0,1,50.0%,-1,,...,0,0,3,0.0%,0,Amager,Amager,2025-09-28,23 - 25,2025
483,16,Alex,1,0,0.0%,0,0,0.0%,0,,...,0,0,2,0.0%,0,Amager,Amager,2025-09-28,23 - 25,2025
484,17,Hjorth,1,3,50.0%,1,1,16.7%,0,2.33,...,0,0,6,0.0%,0,Amager,Amager,2025-09-28,23 - 25,2025


In [12]:
df_all.columns

Index(['PLAYER_NUM', 'PLAYER_NAME', 'GAMES_PLAYED', 'POINTS_SCORED_ON_SERVE',
       'PT_SCORE_PERCENT', 'ACES', 'SERVE_ERRORS', 'SERVE_ERROR_PERCENT',
       'SERVE_NET_POINTS', 'PASS_RATING', 'PASS_ATTEMPTS', 'SERVE_RCV_ERRORS',
       'COMBINED_KILLS', 'COMBINED_KILL_ERRORS', 'ATTACK_NET_POINTS',
       'HITTING_EFFICIENCY', 'COMBINED_KILL_PERCENT', 'HITS_STILL_IN_PLAY',
       'COMBINED_KILL_ATTEMPTS', 'ASSISTS', 'BALL_HANDLING', 'SETTING_ERRORS',
       'BLOCKS', 'BLOCK_ERRORS', 'NET_BLOCKS', 'DIG_ERRORS', 'DIGS',
       'DIG_ERROR_PERCENT', 'FAULTS', 'venue', 'opponent', 'date', 'score',
       'season'],
      dtype='object')

In [13]:
player_points = df_all.groupby("PLAYER_NAME")["COMBINED_KILLS"].sum().reset_index()
player_points = player_points.sort_values(by="COMBINED_KILLS", ascending=False)
print(player_points)


            PLAYER_NAME  COMBINED_KILLS
12               Martin             122
15               Nicola             118
16               Soeren              61
17            Vestbjerg              57
4                Boerme              46
9         Kristian Krag              37
7                Gustav              31
13               Mikkel              28
10        Lasse Nielsen              22
3                    Bo              14
2   Andreas Christensen              14
8                Hjorth              11
5                 Bosse               9
11               Mads Q               4
0                (team)               2
6              Frederik               1
1                  Alex               1
14            Nico Lang               0


In [18]:
match_df = df_all[(df_all["date"] == "2025-09-28")]
player_points = match_df.groupby("PLAYER_NAME")["COMBINED_KILLS"].sum().reset_index()
player_points = player_points.sort_values(by="COMBINED_KILLS", ascending=False)
print(player_points)


           PLAYER_NAME  COMBINED_KILLS
7               Martin              13
6               Hjorth              11
2  Andreas Christensen               6
5               Gustav               4
8               Nicola               4
1                 Alex               1
0               (team)               0
3                   Bo               0
4                Bosse               0
