In [207]:
# Dependencies
import pandas as pd
import json
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [208]:
# Load all CSV files
Fall2022   = pd.read_csv('csv/per_match/Fall2022.csv', delimiter=';')
Fall2023   = pd.read_csv('csv/per_match/Fall2023.csv', delimiter=';')
Spring2023 = pd.read_csv('csv/per_match/Spring2023.csv', delimiter=';')
Spring2024 = pd.read_csv('csv/per_match/Spring2024.csv', delimiter=';')
Summer2023 = pd.read_csv('csv/per_match/Summer2023.csv', delimiter=';')
Winter2023 = pd.read_csv('csv/per_match/Winter2023.csv', delimiter=';')
Winter2024 = pd.read_csv('csv/per_match/Winter2024.csv', delimiter=';')

seasons = [Fall2022, Winter2023, Spring2023, Summer2023, Fall2023, Winter2024, Spring2024]

In [209]:
# Data Wrangling

# We will store all our matches here
matches_list = []

# Load JSON data from file into a Python dictionary
with open('json/username_mapping.json', 'r') as file:
    username_mapping = json.load(file)

for i, season in enumerate(seasons):
    # Drop unnecessary columns
    season = season.drop(['replay id', 'map', 'date', 'team name', 'opposing team name', 'car id', 'car name'], axis=1)
    
    # Make all player names lowercase
    season['player name'] = season['player name'].str.lower()
        
    # Replace mapped names with their dictionary value
    season['player name'] = season['player name'].map(username_mapping).fillna(season['player name'])
    
    # Convert 'result' to binary
    season['result'] = season['result'].map({'loss': 0, 'win': 1})
    
    # Group by replay name
    grouped_matches = season.groupby('replay title')
    season_matches_list = [match for _,match in grouped_matches]
    
    # And append to our result array
    matches_list.extend(season_matches_list)

# Nuke any matches which had less than 6 players (i.e. there was a 2v3)
matches_list = [match for match in matches_list if len(match) >= 6]

In [219]:
# Combine all the matches into a single dataframe
all_matches = pd.concat(matches_list)
display(all_matches.head(n=6))
#all_matches.to_csv('results/dataset.csv', index=False)

Unnamed: 0,replay title,result,player name,score,goals,assists,saves,shots,shots conceded,goals conceded,...,time neutral third,percentage neutral third,time offensive third,percentage offensive third,avg distance to ball,avg distance to ball has possession,avg distance to ball no possession,avg distance to team mates,demos inflicted,demos taken
1790,10588_Game1.replay,1,scootleboot,363,1,0,1,3,5,0,...,96.85,30.85,49.2,15.67,2807,2954,2692,3524,1,3
1791,10588_Game1.replay,1,gabefrfx,130,0,0,0,0,5,0,...,103.57,32.67,42.12,13.29,2311,2134,2425,3338,1,2
1792,10588_Game1.replay,1,commanderboy,165,0,0,1,1,5,0,...,85.5,26.73,47.31,14.79,3098,3095,3032,3484,2,1
1793,10588_Game1.replay,0,jbassfox,220,0,0,2,2,4,1,...,106.72,33.38,96.82,30.28,3173,2951,3468,4239,3,1
1794,10588_Game1.replay,0,terminator,190,0,0,0,2,4,1,...,109.83,34.66,45.97,14.51,3211,3291,3108,4109,2,2
1795,10588_Game1.replay,0,klosty,186,0,0,1,1,4,1,...,108.02,33.77,77.98,24.38,3511,3564,3438,4321,1,1


In [220]:
# Features
X = all_matches[
    ['goals', 
     'shots', 
     'assists', 
     'saves', 
     'demos inflicted', 
     'demos taken']
]  # Add other relevant features if any

# Labels
y = all_matches['result']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Fit the logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

In [221]:
# Feature importance    
feature_importance = pd.DataFrame({'Feature': X.columns, 'Coefficient': model.coef_[0]})

feature_importance.loc[-1] = ['intercept', f'{model.intercept_[0]}']  # adding a row
feature_importance.index = feature_importance.index + 1  # shifting index
feature_importance.sort_index(inplace=True) 

display(feature_importance)

print("MSE: \t", np.mean((model.predict(X_test) - y_test)**2))
print("R2: \t", model.score(X_test, y_test))

Unnamed: 0,Feature,Coefficient
0,intercept,-0.8832085439825244
1,goals,1.01031
2,shots,-0.050167
3,assists,1.071832
4,saves,-0.121554
5,demos inflicted,-0.054036
6,demos taken,-0.025039


MSE: 	 0.32447691796745265
R2: 	 0.6755230820325473
