In [1]:
import pandas as pd
import os

# File path
input_path = "team_stats/nrl_2025_team_stats.csv"
output_path = "team_stats/nrl_2025_team_stats_cleaned.csv"

# Load CSV
df = pd.read_csv(input_path)

# Fix double underscores in column names
df.columns = [col.replace("__", "_") for col in df.columns]

# (Optional) Clean team names by removing ' Club Profile' suffix
if "team" in df.columns:
    df["team"] = df["team"].str.replace(r"\s*Club Profile$", "", regex=True)

# Save cleaned CSV
df.to_csv(output_path, index=False)

print(f"Cleaned file saved to: {output_path}")

Cleaned file saved to: team_stats/nrl_2025_team_stats_cleaned.csv


In [None]:
import os
import pandas as pd

input_folder = "nrl_stats_2025"

for filename in os.listdir(input_folder):
    if filename.endswith(".csv"):
        file_path = os.path.join(input_folder, filename)
        print(f"Processing {filename}...")

        # Read CSV with no header inference for empty cols
        df = pd.read_csv(file_path)

        # Drop the first two columns (index 0 and 1)
        df_clean = df.iloc[:, 2:]

        # Save cleaned CSV
        save_path = file_path
        df_clean.to_csv(save_path, index=False)

        print(f"Saved cleaned file to {save_path}")


Processing nrl_2025_all_receipts.csv...
Saved cleaned file to nrl_stats_2025\nrl_2025_all_receipts.csv
Processing nrl_2025_all_runs.csv...
Saved cleaned file to nrl_stats_2025\nrl_2025_all_runs.csv
Processing nrl_2025_all_run_metres.csv...
Saved cleaned file to nrl_stats_2025\nrl_2025_all_run_metres.csv
Processing nrl_2025_charge_downs.csv...
Saved cleaned file to nrl_stats_2025\nrl_2025_charge_downs.csv
Processing nrl_2025_conversion_%.csv...
Saved cleaned file to nrl_stats_2025\nrl_2025_conversion_%.csv
Processing nrl_2025_decoy_runs.csv...
Saved cleaned file to nrl_stats_2025\nrl_2025_decoy_runs.csv
Processing nrl_2025_dummy_half_runs.csv...
Saved cleaned file to nrl_stats_2025\nrl_2025_dummy_half_runs.csv
Processing nrl_2025_errors.csv...
Saved cleaned file to nrl_stats_2025\nrl_2025_errors.csv
Processing nrl_2025_goals.csv...
Saved cleaned file to nrl_stats_2025\nrl_2025_goals.csv
Processing nrl_2025_handling_errors.csv...
Saved cleaned file to nrl_stats_2025\nrl_2025_handling_err

In [2]:
import os

# Folder path where your files are stored
folder_path = '../data/last_5_results'

# Loop through all files in the folder
for filename in os.listdir(folder_path):
    # Check if the file ends with .csv and doesn't already start with 'last_5_'
    if filename.endswith('.csv') and not filename.startswith('last_5_'):
        # New filename with 'last_5_' prefix
        new_filename = 'last_5_' + filename
        
        # Full file paths
        old_file = os.path.join(folder_path, filename)
        new_file = os.path.join(folder_path, new_filename)
        
        # Rename the file
        os.rename(old_file, new_file)
        print(f'Renamed "{filename}" to "{new_filename}"')


Renamed "Brisbane_Broncos.csv" to "last_5_Brisbane_Broncos.csv"
Renamed "Canberra_Raiders.csv" to "last_5_Canberra_Raiders.csv"
Renamed "Canterbury_Bulldogs.csv" to "last_5_Canterbury_Bulldogs.csv"
Renamed "Cronulla_Sharks.csv" to "last_5_Cronulla_Sharks.csv"
Renamed "Dolphins.csv" to "last_5_Dolphins.csv"
Renamed "Gold_Coast_Titans.csv" to "last_5_Gold_Coast_Titans.csv"
Renamed "Manly_Sea_Eagles.csv" to "last_5_Manly_Sea_Eagles.csv"
Renamed "Melbourne_Storm.csv" to "last_5_Melbourne_Storm.csv"
Renamed "Newcastle_Knights.csv" to "last_5_Newcastle_Knights.csv"
Renamed "New_Zealand_Warriors.csv" to "last_5_New_Zealand_Warriors.csv"
Renamed "North_Queensland_Cowboys.csv" to "last_5_North_Queensland_Cowboys.csv"
Renamed "Parramatta_Eels.csv" to "last_5_Parramatta_Eels.csv"
Renamed "Penrith_Panthers.csv" to "last_5_Penrith_Panthers.csv"
Renamed "South_Sydney_Rabbitohs.csv" to "last_5_South_Sydney_Rabbitohs.csv"
Renamed "St_George_Illawarra_Dragons.csv" to "last_5_St_George_Illawarra_Dragons

In [1]:
import pandas as pd
import os

stats_folder = '../data/nrl_stats_2025'

# Get all files starting with nrl_2025_
files = [f for f in os.listdir(stats_folder) if f.startswith('nrl_2025_') and f.endswith('.csv')]

combined_df = None

for filename in files:
    filepath = os.path.join(stats_folder, filename)
    df = pd.read_csv(filepath)
    
    # Extract stat name from filename
    # 'nrl_2025_all_receipts.csv' -> 'all_receipts'
    stat_name = filename.replace('nrl_2025_', '').replace('.csv', '')
    
    # Determine the stat column by excluding 'Team' and 'Played'
    stat_cols = [col for col in df.columns if col not in ['Team', 'Played']]
    
    if len(stat_cols) != 1:
        print(f"Warning: File {filename} expected exactly one stat column, found {stat_cols}")
        continue
    
    stat_col = stat_cols[0]
    
    # Rename stat column to match the file-derived stat_name for clarity
    df_renamed = df[['Team', stat_col]].rename(columns={stat_col: stat_name})
    
    if combined_df is None:
        combined_df = df_renamed
    else:
        combined_df = pd.merge(combined_df, df_renamed, on='Team', how='outer')

combined_df = combined_df.sort_values('Team').reset_index(drop=True)

print(combined_df.head())

combined_df.to_csv('nrl_2025_combined_team_stats.csv', index=False)

       Team all_receipts all_runs all_run_metres  charge_downs  conversion_%  \
0   Broncos        5,124    2,332         20,824           1.0            83   
1  Bulldogs        5,887    2,536         22,638           6.0            84   
2   Cowboys        5,508    2,393         22,460           2.0            71   
3  Dolphins        6,164    2,768         24,788           2.0            88   
4   Dragons        5,424    2,330         21,520           2.0            76   

   decoy_runs  dummy_half_runs  errors  goals  ...  points  \
0         554              110     145     54  ...     338   
1         419               80     132     53  ...     330   
2         761              129     162     39  ...     278   
3         778              109     146     68  ...     408   
4         645              134     146     42  ...     272   

   post_contact_metres  short_dropouts supports  tackles  tackle_breaks  \
0                6,832              10      584    4,423            441