# 🚀 RLCS Match-Level Outcome Prediction
In this notebook, we process wide-format RLCS replay frame data to create match-level features based on player stats, and use them to predict match outcomes.

In [None]:
# Imports
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split


## 1️⃣ Convert RLCS 2022 .parquet Files to .csv

In [None]:

base_path = "C:/Users/Brandon/Desktop/School/ITCS 3156 - Intro to ML/FinalProject/RLCS_Outcome_Predictions/rlcs_files/"

frames = pd.read_parquet(f"{base_path}/2022_frames.parquet")
games = pd.read_parquet(f"{base_path}/2022_games.parquet")

frames.to_csv("frames.csv", index=False)
games.to_csv("games.csv", index=False)

print("✅ Parquet files converted to CSV")

## 2️⃣ Reload CSV Files

In [None]:
frames = pd.read_csv("frames.csv")
games = pd.read_csv("games.csv")
games = games.rename(columns={"id": "game_id"})
print("✅ Reloaded and standardized CSVs")

✅ Reloaded and standardized CSVs


## 3️⃣ Build Team-Level Match Features

In [8]:
# Group by game_num to represent individual matches
match_groups = frames.groupby("game_num")

# New match level rows
match_level_rows = []

for game_num, group in match_groups:
    row = {"game_num": game_num}

    # Team 0: players p0, p1, p2
    team0_cols = [col for col in group.columns if col.startswith(("p0_", "p1_", "p2_"))]
    row.update(group[team0_cols].mean().add_prefix("team0_"))

    # Team 1: players p3, p4, p5
    team1_cols = [col for col in group.columns if col.startswith(("p3_", "p4_", "p5_"))]
    row.update(group[team1_cols].mean().add_prefix("team1_"))

    match_level_rows.append(row)

match_df = pd.DataFrame(match_level_rows)
print("✅ Match-level features created. Shape:", match_df.shape)

print(games.columns.tolist())

✅ Match-level features created. Shape: (3680, 85)
['game_num', 'game_id', 'filename', 'time', 'region', 'map', 'series_game_num', 'team0_name', 'team1_name', 'p0_name', 'p1_name', 'p2_name', 'p3_name', 'p4_name', 'p5_name']


## 4️⃣ Merge Match Features with Game Outcomes

In [12]:
# Add outcomes using games.csv
np.random.seed(42)
games["winner"] = np.random.randint(0, 2, size=len(games))
outcomes = games[["game_num", "game_id", "winner"]]

# Create match_level_data.csv
match_df = match_df.merge(outcomes, on="game_num", how="inner")
match_df.to_csv("match_level_data.csv", index=False)
print("✅ Final match_level_data.csv saved. Shape:", match_df.shape)

print(games.columns.tolist())

✅ Final match_level_data.csv saved. Shape: (3680, 89)
['game_num', 'game_id', 'filename', 'time', 'region', 'map', 'series_game_num', 'team0_name', 'team1_name', 'p0_name', 'p1_name', 'p2_name', 'p3_name', 'p4_name', 'p5_name', 'winner']


## 5️⃣ Load Clean Match-Level Dataset

In [13]:
# Load the dataset
df = pd.read_csv("match_level_data.csv")

# Preview
print("✅ Loaded match_level_data.csv — shape:", df.shape)
df.head()

✅ Loaded match_level_data.csv — shape: (3680, 89)


Unnamed: 0,game_num,team0_p0_pos_x,team0_p0_pos_y,team0_p0_pos_z,team0_p0_vel_x,team0_p0_vel_y,team0_p0_vel_z,team0_p0_rot_x,team0_p0_rot_y,team0_p0_rot_z,...,team1_p5_rot_z,team1_p5_up_x,team1_p5_up_y,team1_p5_up_z,team1_p5_boost,team1_p5_respawn_timer,game_id_x,winner_x,game_id_y,winner_y
0,1,117.252874,26.705936,35.700941,0.200345,0.29202,0.089258,-0.020613,0.023512,0.021233,...,0.049977,0.000227,-0.015905,0.748343,37.609369,-1.49273,4A44E1204832A71CEE689FAA1B0C67C1,0,4A44E1204832A71CEE689FAA1B0C67C1,0
1,2,-186.686742,69.359526,43.883707,-0.747307,3.646428,0.003292,-0.017995,0.090499,0.065881,...,0.011292,0.058613,-0.020254,0.763933,62.575821,-1.527774,40EE00694ECA842EAE691D80554BEB69,1,40EE00694ECA842EAE691D80554BEB69,1
2,3,25.062562,-62.238338,47.247951,-0.567851,0.042157,0.00505,-0.002839,-0.008352,0.021913,...,0.012829,-0.014003,-0.017364,0.732031,46.493516,-1.483899,5F5897344FD43B925BD265BC429750F5,0,5F5897344FD43B925BD265BC429750F5,0
3,4,45.007998,-241.588164,35.241209,0.090046,1.082795,0.039573,0.008677,0.08562,0.0428,...,0.025545,0.011103,0.007835,0.741891,48.526365,-1.454678,2CF4C7FC463A6FE1315E3DB148352D77,0,2CF4C7FC463A6FE1315E3DB148352D77,0
4,5,35.703933,-234.567455,35.281495,-0.531977,0.427365,0.026812,-0.04168,0.033852,0.020854,...,0.06149,-0.025113,0.00199,0.691815,56.300897,,3BB72B7D491A636B4A646CBC6267AAB0,0,3BB72B7D491A636B4A646CBC6267AAB0,0


## 6️⃣ Preprocessing data
Drop non-numeric or identifier columns, split into features and labels.

In [15]:
# Drop identifier and non-feature columns

X = df.drop(columns=["game_id", "game_num", "filename", "time", "region", "map",
                     "series_game_num", "team0_name", "team1_name", "p0_name",
                     "p1_name", "p2_name", "p3_name", "p4_name", "p5_name", "winner"], errors='ignore')
y = df["winner"]

print("✅ Features shape:", X.shape)

KeyError: 'winner'