In [56]:
import pandas as pd
import numpy as np
#from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier  # stronger model
from sklearn.metrics import classification_report, confusion_matrix

# Step 1: Import and Preview Data

In [2]:
participants=pd.read_csv('smoothcomp_participants.csv')

In [3]:
results=pd.read_csv('smoothcomp_results.csv')

In [4]:
participants.head()

Unnamed: 0,First Name,Last Name,Age,Birth Year,Gender,Country,Club,Affiliation,Bracket
0,Damon,Ball,16.0,2008.0,M,Canada,Paradigm Martial Arts,Renzo Gracie Ottawa,Juvenile Boys Gi / White / -118 lbs
1,Lincoln,Donaldson,16.0,2008.0,M,Canada,Academia Brazilian JiuJitsu,Subconscious BJJ,Juvenile Boys Gi / White / -118 lbs
2,Noah,Hingco,18.0,2007.0,M,Canada,Pound4Pound - Resolute Jiu-Jitsu,Body of Four Team,Juvenile Boys Gi / White / -118 lbs
3,Caden,Parker,16.0,2008.0,M,Canada,Big Country MMA,,Juvenile Boys Gi / White / -118 lbs
4,Iker,Arellano Gomesainz,16.0,2008.0,M,Mexico,Octa BJJ,,Juvenile Boys Gi / White / -118 lbs


In [5]:
results.head()

Unnamed: 0,Bracket,Placement,First Name,Last Name,Country,Club
0,Juvenile Boys Gi / White / -118 lbs,1,Noah,Hingco,Canada,Pound4Pound - Resolute Jiu-Jitsu
1,Juvenile Boys Gi / White / -118 lbs,2,Caden,Parker,Canada,Big Country MMA
2,Juvenile Boys Gi / White / -118 lbs,3,Damon,Ball,Canada,Paradigm Martial Arts
3,Juvenile Boys Gi / White / -118 lbs,3,Iker,Arellano Gomesainz,Mexico,Octa BJJ
4,Juvenile Boys Gi / White / -129 lbs,1,Juan,Rios,Venezuela,Octa BJJ


# Step 2: EDA

## 2.1 Participants

In [6]:
participants.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 937 entries, 0 to 936
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   First Name   937 non-null    object 
 1   Last Name    936 non-null    object 
 2   Age          936 non-null    float64
 3   Birth Year   936 non-null    float64
 4   Gender       936 non-null    object 
 5   Country      937 non-null    object 
 6   Club         937 non-null    object 
 7   Affiliation  734 non-null    object 
 8   Bracket      937 non-null    object 
dtypes: float64(2), object(7)
memory usage: 66.0+ KB


### 2.1.1 Null Values

In [7]:
participants.isnull().sum()

First Name       0
Last Name        1
Age              1
Birth Year       1
Gender           1
Country          0
Club             0
Affiliation    203
Bracket          0
dtype: int64

In [8]:
participants[participants[['Last Name', 'Age', 'Birth Year', 'Gender']].isnull().any(axis=1)]

Unnamed: 0,First Name,Last Name,Age,Birth Year,Gender,Country,Club,Affiliation,Bracket
788,Unknown user,,,,,Canada,"Battle Arts Academy, Mississauga",Cicero Costha Canada,Female Gi / Adult / Blue / Super Heavy


In [9]:
participants = participants.dropna(subset=['Last Name', 'Age', 'Birth Year', 'Gender'])

In [10]:
participants.info()

<class 'pandas.core.frame.DataFrame'>
Index: 936 entries, 0 to 936
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   First Name   936 non-null    object 
 1   Last Name    936 non-null    object 
 2   Age          936 non-null    float64
 3   Birth Year   936 non-null    float64
 4   Gender       936 non-null    object 
 5   Country      936 non-null    object 
 6   Club         936 non-null    object 
 7   Affiliation  733 non-null    object 
 8   Bracket      936 non-null    object 
dtypes: float64(2), object(7)
memory usage: 73.1+ KB


In [11]:
participants.loc[:, 'Affiliation'] = participants['Affiliation'].fillna('No Affiliate')

In [12]:
participants.info()

<class 'pandas.core.frame.DataFrame'>
Index: 936 entries, 0 to 936
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   First Name   936 non-null    object 
 1   Last Name    936 non-null    object 
 2   Age          936 non-null    float64
 3   Birth Year   936 non-null    float64
 4   Gender       936 non-null    object 
 5   Country      936 non-null    object 
 6   Club         936 non-null    object 
 7   Affiliation  936 non-null    object 
 8   Bracket      936 non-null    object 
dtypes: float64(2), object(7)
memory usage: 73.1+ KB


### 2.1.2 Feature Engineering

In [13]:
# Drop birth year column to avoid multicollinearity (age & birth year)

participants=participants.drop(columns=['Birth Year'])

In [14]:
participants

Unnamed: 0,First Name,Last Name,Age,Gender,Country,Club,Affiliation,Bracket
0,Damon,Ball,16.0,M,Canada,Paradigm Martial Arts,Renzo Gracie Ottawa,Juvenile Boys Gi / White / -118 lbs
1,Lincoln,Donaldson,16.0,M,Canada,Academia Brazilian JiuJitsu,Subconscious BJJ,Juvenile Boys Gi / White / -118 lbs
2,Noah,Hingco,18.0,M,Canada,Pound4Pound - Resolute Jiu-Jitsu,Body of Four Team,Juvenile Boys Gi / White / -118 lbs
3,Caden,Parker,16.0,M,Canada,Big Country MMA,No Affiliate,Juvenile Boys Gi / White / -118 lbs
4,Iker,Arellano Gomesainz,16.0,M,Mexico,Octa BJJ,No Affiliate,Juvenile Boys Gi / White / -118 lbs
...,...,...,...,...,...,...,...,...
932,Alejandra,Ordaz,42.0,F,Mexico,Action & Reaction MMA,Cicero Costha Canada,Female Gi / Master 2 (40+) / Brown/Black / Med...
933,Patricia,VanderMeer,40.0,F,Canada,5th Dimension Training Academy,Zenith,Female Gi / Master 2 (40+) / Brown/Black / Sup...
934,Patricia,VanderMeer,40.0,F,Canada,5th Dimension Training Academy,Zenith,Female Gi / Master 2 (40+) / Brown/Black / Ope...
935,Alejandra,Ordaz,42.0,F,Mexico,Action & Reaction MMA,Cicero Costha Canada,Female Gi / Master 2 (40+) / Brown/Black / Ope...


In [15]:
# Step 1: Get youngest age per bracket
youngest = participants.groupby('Bracket')['Age'].transform('min')

# Step 2: Subtract bracket's youngest age from each participant's age
participants['Age Diff from Youngest'] = participants['Age'] - youngest

In [16]:
# Drop age column

participants=participants.drop(columns=['Age'])

In [17]:
participants

Unnamed: 0,First Name,Last Name,Gender,Country,Club,Affiliation,Bracket,Age Diff from Youngest
0,Damon,Ball,M,Canada,Paradigm Martial Arts,Renzo Gracie Ottawa,Juvenile Boys Gi / White / -118 lbs,0.0
1,Lincoln,Donaldson,M,Canada,Academia Brazilian JiuJitsu,Subconscious BJJ,Juvenile Boys Gi / White / -118 lbs,0.0
2,Noah,Hingco,M,Canada,Pound4Pound - Resolute Jiu-Jitsu,Body of Four Team,Juvenile Boys Gi / White / -118 lbs,2.0
3,Caden,Parker,M,Canada,Big Country MMA,No Affiliate,Juvenile Boys Gi / White / -118 lbs,0.0
4,Iker,Arellano Gomesainz,M,Mexico,Octa BJJ,No Affiliate,Juvenile Boys Gi / White / -118 lbs,0.0
...,...,...,...,...,...,...,...,...
932,Alejandra,Ordaz,F,Mexico,Action & Reaction MMA,Cicero Costha Canada,Female Gi / Master 2 (40+) / Brown/Black / Med...,0.0
933,Patricia,VanderMeer,F,Canada,5th Dimension Training Academy,Zenith,Female Gi / Master 2 (40+) / Brown/Black / Sup...,0.0
934,Patricia,VanderMeer,F,Canada,5th Dimension Training Academy,Zenith,Female Gi / Master 2 (40+) / Brown/Black / Ope...,0.0
935,Alejandra,Ordaz,F,Mexico,Action & Reaction MMA,Cicero Costha Canada,Female Gi / Master 2 (40+) / Brown/Black / Ope...,2.0


In [18]:
participants['bracket_parts'] = participants['Bracket'].str.split(' / ')
participants['num_parts'] = participants['bracket_parts'].apply(len)
participants['num_parts'].value_counts()

num_parts
4    825
3    111
Name: count, dtype: int64

In [19]:
def parse_bracket(parts):
    group = division = belt = weight = None

    if len(parts) == 3:
        # Juvenile divisions: Group / Belt / Weight
        group, belt, weight = parts
    elif len(parts) == 4:
        # Adult/Master: Group / Division / Belt / Weight
        group, division, belt, weight = parts
    elif len(parts) == 5:
        # e.g., Female Gi / Master 2 (40+) / Brown/Black / Open Weight
        group, division, belt, weight = parts[0], parts[1], parts[2], parts[3] + ' / ' + parts[4]  # combine weight
    else:
        group = parts[0]  # fallback for malformed rows

    return pd.Series([group, division, belt, weight])

In [20]:
participants[['Group', 'Division', 'Belt', 'Weight']] = participants['bracket_parts'].apply(parse_bracket)
participants.drop(columns=['bracket_parts', 'num_parts'], inplace=True)

In [21]:
participants

Unnamed: 0,First Name,Last Name,Gender,Country,Club,Affiliation,Bracket,Age Diff from Youngest,Group,Division,Belt,Weight
0,Damon,Ball,M,Canada,Paradigm Martial Arts,Renzo Gracie Ottawa,Juvenile Boys Gi / White / -118 lbs,0.0,Juvenile Boys Gi,,White,-118 lbs
1,Lincoln,Donaldson,M,Canada,Academia Brazilian JiuJitsu,Subconscious BJJ,Juvenile Boys Gi / White / -118 lbs,0.0,Juvenile Boys Gi,,White,-118 lbs
2,Noah,Hingco,M,Canada,Pound4Pound - Resolute Jiu-Jitsu,Body of Four Team,Juvenile Boys Gi / White / -118 lbs,2.0,Juvenile Boys Gi,,White,-118 lbs
3,Caden,Parker,M,Canada,Big Country MMA,No Affiliate,Juvenile Boys Gi / White / -118 lbs,0.0,Juvenile Boys Gi,,White,-118 lbs
4,Iker,Arellano Gomesainz,M,Mexico,Octa BJJ,No Affiliate,Juvenile Boys Gi / White / -118 lbs,0.0,Juvenile Boys Gi,,White,-118 lbs
...,...,...,...,...,...,...,...,...,...,...,...,...
932,Alejandra,Ordaz,F,Mexico,Action & Reaction MMA,Cicero Costha Canada,Female Gi / Master 2 (40+) / Brown/Black / Med...,0.0,Female Gi,Master 2 (40+),Brown/Black,Medium Heavy
933,Patricia,VanderMeer,F,Canada,5th Dimension Training Academy,Zenith,Female Gi / Master 2 (40+) / Brown/Black / Sup...,0.0,Female Gi,Master 2 (40+),Brown/Black,Super Heavy
934,Patricia,VanderMeer,F,Canada,5th Dimension Training Academy,Zenith,Female Gi / Master 2 (40+) / Brown/Black / Ope...,0.0,Female Gi,Master 2 (40+),Brown/Black,Open Weight
935,Alejandra,Ordaz,F,Mexico,Action & Reaction MMA,Cicero Costha Canada,Female Gi / Master 2 (40+) / Brown/Black / Ope...,2.0,Female Gi,Master 2 (40+),Brown/Black,Open Weight


In [22]:
participants.loc[participants['Division'].isna(), 'Division'] = 'Juvenile'

In [23]:
participants

Unnamed: 0,First Name,Last Name,Gender,Country,Club,Affiliation,Bracket,Age Diff from Youngest,Group,Division,Belt,Weight
0,Damon,Ball,M,Canada,Paradigm Martial Arts,Renzo Gracie Ottawa,Juvenile Boys Gi / White / -118 lbs,0.0,Juvenile Boys Gi,Juvenile,White,-118 lbs
1,Lincoln,Donaldson,M,Canada,Academia Brazilian JiuJitsu,Subconscious BJJ,Juvenile Boys Gi / White / -118 lbs,0.0,Juvenile Boys Gi,Juvenile,White,-118 lbs
2,Noah,Hingco,M,Canada,Pound4Pound - Resolute Jiu-Jitsu,Body of Four Team,Juvenile Boys Gi / White / -118 lbs,2.0,Juvenile Boys Gi,Juvenile,White,-118 lbs
3,Caden,Parker,M,Canada,Big Country MMA,No Affiliate,Juvenile Boys Gi / White / -118 lbs,0.0,Juvenile Boys Gi,Juvenile,White,-118 lbs
4,Iker,Arellano Gomesainz,M,Mexico,Octa BJJ,No Affiliate,Juvenile Boys Gi / White / -118 lbs,0.0,Juvenile Boys Gi,Juvenile,White,-118 lbs
...,...,...,...,...,...,...,...,...,...,...,...,...
932,Alejandra,Ordaz,F,Mexico,Action & Reaction MMA,Cicero Costha Canada,Female Gi / Master 2 (40+) / Brown/Black / Med...,0.0,Female Gi,Master 2 (40+),Brown/Black,Medium Heavy
933,Patricia,VanderMeer,F,Canada,5th Dimension Training Academy,Zenith,Female Gi / Master 2 (40+) / Brown/Black / Sup...,0.0,Female Gi,Master 2 (40+),Brown/Black,Super Heavy
934,Patricia,VanderMeer,F,Canada,5th Dimension Training Academy,Zenith,Female Gi / Master 2 (40+) / Brown/Black / Ope...,0.0,Female Gi,Master 2 (40+),Brown/Black,Open Weight
935,Alejandra,Ordaz,F,Mexico,Action & Reaction MMA,Cicero Costha Canada,Female Gi / Master 2 (40+) / Brown/Black / Ope...,2.0,Female Gi,Master 2 (40+),Brown/Black,Open Weight


In [25]:
participants['Weight'].unique()

array(['-118 lbs', '-129 lbs', '-141,6 lbs', '-152,6 lbs', '-163,6 lbs',
       '-175 lbs', '-186 lbs', '-197 lbs', 'Open Weight', '-125 lbs',
       '-133,6 lbs', '-106,6 lbs', '-152 lbs', '+152 lbs', 'Open weight',
       'Light Feather', 'Feather', 'Light', 'Middle', 'Medium Heavy',
       'Heavy', 'Super Heavy', 'Ultra Heavy', 'Open Weight - Light',
       'Open Weight - Heavy', 'Rooster'], dtype=object)

In [26]:
weight_order = {
    '-106,6 lbs': 1,
    '-118 lbs': 1,
    '-125 lbs': 2,
    '-129 lbs': 2,
    '-133,6 lbs': 3,
    '-141,6 lbs': 3,
    '-152 lbs': 4,
    '-152,6 lbs': 4,
    '-163,6 lbs': 5,
    '-175 lbs': 6,
    '-186 lbs': 7,
    '-197 lbs': 8,
    '+152 lbs': 9,

    # Adult named classes (aligned roughly with juveniles)
    'Rooster': 1,
    'Light Feather': 1,
    'Feather': 2,
    'Light': 3,
    'Middle': 4,
    'Medium Heavy': 5,
    'Heavy': 6,
    'Super Heavy': 7,
    'Ultra Heavy': 8,

    # Open classes (assigned highest or neutral rank)
    'Open Weight': 9,
    'Open weight': 9,
    'Open Weight - Light': 9,
    'Open Weight - Heavy': 9
}

In [27]:
participants['Weight Ordinal'] = participants['Weight'].map(weight_order)

In [28]:
participants

Unnamed: 0,First Name,Last Name,Gender,Country,Club,Affiliation,Bracket,Age Diff from Youngest,Division,Belt,Weight,Weight Ordinal
0,Damon,Ball,M,Canada,Paradigm Martial Arts,Renzo Gracie Ottawa,Juvenile Boys Gi / White / -118 lbs,0.0,Juvenile,White,-118 lbs,1
1,Lincoln,Donaldson,M,Canada,Academia Brazilian JiuJitsu,Subconscious BJJ,Juvenile Boys Gi / White / -118 lbs,0.0,Juvenile,White,-118 lbs,1
2,Noah,Hingco,M,Canada,Pound4Pound - Resolute Jiu-Jitsu,Body of Four Team,Juvenile Boys Gi / White / -118 lbs,2.0,Juvenile,White,-118 lbs,1
3,Caden,Parker,M,Canada,Big Country MMA,No Affiliate,Juvenile Boys Gi / White / -118 lbs,0.0,Juvenile,White,-118 lbs,1
4,Iker,Arellano Gomesainz,M,Mexico,Octa BJJ,No Affiliate,Juvenile Boys Gi / White / -118 lbs,0.0,Juvenile,White,-118 lbs,1
...,...,...,...,...,...,...,...,...,...,...,...,...
932,Alejandra,Ordaz,F,Mexico,Action & Reaction MMA,Cicero Costha Canada,Female Gi / Master 2 (40+) / Brown/Black / Med...,0.0,Master 2 (40+),Brown/Black,Medium Heavy,5
933,Patricia,VanderMeer,F,Canada,5th Dimension Training Academy,Zenith,Female Gi / Master 2 (40+) / Brown/Black / Sup...,0.0,Master 2 (40+),Brown/Black,Super Heavy,7
934,Patricia,VanderMeer,F,Canada,5th Dimension Training Academy,Zenith,Female Gi / Master 2 (40+) / Brown/Black / Ope...,0.0,Master 2 (40+),Brown/Black,Open Weight,9
935,Alejandra,Ordaz,F,Mexico,Action & Reaction MMA,Cicero Costha Canada,Female Gi / Master 2 (40+) / Brown/Black / Ope...,2.0,Master 2 (40+),Brown/Black,Open Weight,9


In [29]:
participants['Division'].unique()

array(['Juvenile', 'Adult', 'Master 1 (30+)', 'Master 2 (40+)',
       'Master 3 (50+)'], dtype=object)

In [30]:
division_order = {
    'Juvenile': 1,
    'Adult': 2,
    'Master 1 (30+)': 3,
    'Master 2 (40+)': 4,
    'Master 3 (50+)': 5
}

In [31]:
participants['Division Ordinal'] = participants['Division'].map(division_order)

In [32]:
participants

Unnamed: 0,First Name,Last Name,Gender,Country,Club,Affiliation,Bracket,Age Diff from Youngest,Division,Belt,Weight,Weight Ordinal,Division Ordinal
0,Damon,Ball,M,Canada,Paradigm Martial Arts,Renzo Gracie Ottawa,Juvenile Boys Gi / White / -118 lbs,0.0,Juvenile,White,-118 lbs,1,1
1,Lincoln,Donaldson,M,Canada,Academia Brazilian JiuJitsu,Subconscious BJJ,Juvenile Boys Gi / White / -118 lbs,0.0,Juvenile,White,-118 lbs,1,1
2,Noah,Hingco,M,Canada,Pound4Pound - Resolute Jiu-Jitsu,Body of Four Team,Juvenile Boys Gi / White / -118 lbs,2.0,Juvenile,White,-118 lbs,1,1
3,Caden,Parker,M,Canada,Big Country MMA,No Affiliate,Juvenile Boys Gi / White / -118 lbs,0.0,Juvenile,White,-118 lbs,1,1
4,Iker,Arellano Gomesainz,M,Mexico,Octa BJJ,No Affiliate,Juvenile Boys Gi / White / -118 lbs,0.0,Juvenile,White,-118 lbs,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
932,Alejandra,Ordaz,F,Mexico,Action & Reaction MMA,Cicero Costha Canada,Female Gi / Master 2 (40+) / Brown/Black / Med...,0.0,Master 2 (40+),Brown/Black,Medium Heavy,5,4
933,Patricia,VanderMeer,F,Canada,5th Dimension Training Academy,Zenith,Female Gi / Master 2 (40+) / Brown/Black / Sup...,0.0,Master 2 (40+),Brown/Black,Super Heavy,7,4
934,Patricia,VanderMeer,F,Canada,5th Dimension Training Academy,Zenith,Female Gi / Master 2 (40+) / Brown/Black / Ope...,0.0,Master 2 (40+),Brown/Black,Open Weight,9,4
935,Alejandra,Ordaz,F,Mexico,Action & Reaction MMA,Cicero Costha Canada,Female Gi / Master 2 (40+) / Brown/Black / Ope...,2.0,Master 2 (40+),Brown/Black,Open Weight,9,4


In [33]:
participants.loc[participants['Division']!='Juvenile','Division']='Adults'

In [34]:
participants

Unnamed: 0,First Name,Last Name,Gender,Country,Club,Affiliation,Bracket,Age Diff from Youngest,Division,Belt,Weight,Weight Ordinal,Division Ordinal
0,Damon,Ball,M,Canada,Paradigm Martial Arts,Renzo Gracie Ottawa,Juvenile Boys Gi / White / -118 lbs,0.0,Juvenile,White,-118 lbs,1,1
1,Lincoln,Donaldson,M,Canada,Academia Brazilian JiuJitsu,Subconscious BJJ,Juvenile Boys Gi / White / -118 lbs,0.0,Juvenile,White,-118 lbs,1,1
2,Noah,Hingco,M,Canada,Pound4Pound - Resolute Jiu-Jitsu,Body of Four Team,Juvenile Boys Gi / White / -118 lbs,2.0,Juvenile,White,-118 lbs,1,1
3,Caden,Parker,M,Canada,Big Country MMA,No Affiliate,Juvenile Boys Gi / White / -118 lbs,0.0,Juvenile,White,-118 lbs,1,1
4,Iker,Arellano Gomesainz,M,Mexico,Octa BJJ,No Affiliate,Juvenile Boys Gi / White / -118 lbs,0.0,Juvenile,White,-118 lbs,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
932,Alejandra,Ordaz,F,Mexico,Action & Reaction MMA,Cicero Costha Canada,Female Gi / Master 2 (40+) / Brown/Black / Med...,0.0,Adults,Brown/Black,Medium Heavy,5,4
933,Patricia,VanderMeer,F,Canada,5th Dimension Training Academy,Zenith,Female Gi / Master 2 (40+) / Brown/Black / Sup...,0.0,Adults,Brown/Black,Super Heavy,7,4
934,Patricia,VanderMeer,F,Canada,5th Dimension Training Academy,Zenith,Female Gi / Master 2 (40+) / Brown/Black / Ope...,0.0,Adults,Brown/Black,Open Weight,9,4
935,Alejandra,Ordaz,F,Mexico,Action & Reaction MMA,Cicero Costha Canada,Female Gi / Master 2 (40+) / Brown/Black / Ope...,2.0,Adults,Brown/Black,Open Weight,9,4


In [35]:
participants['Belt'].unique()

array(['White', 'Blue', 'Purple', 'Brown', 'Black', 'Brown/Black'],
      dtype=object)

In [38]:
participants.drop(columns=['Bracket','Group','Weight'])

Unnamed: 0,First Name,Last Name,Gender,Country,Club,Affiliation,Age Diff from Youngest,Division,Belt,Weight Ordinal,Division Ordinal
0,Damon,Ball,M,Canada,Paradigm Martial Arts,Renzo Gracie Ottawa,0.0,Juvenile,White,1,1
1,Lincoln,Donaldson,M,Canada,Academia Brazilian JiuJitsu,Subconscious BJJ,0.0,Juvenile,White,1,1
2,Noah,Hingco,M,Canada,Pound4Pound - Resolute Jiu-Jitsu,Body of Four Team,2.0,Juvenile,White,1,1
3,Caden,Parker,M,Canada,Big Country MMA,No Affiliate,0.0,Juvenile,White,1,1
4,Iker,Arellano Gomesainz,M,Mexico,Octa BJJ,No Affiliate,0.0,Juvenile,White,1,1
...,...,...,...,...,...,...,...,...,...,...,...
932,Alejandra,Ordaz,F,Mexico,Action & Reaction MMA,Cicero Costha Canada,0.0,Adults,Brown/Black,5,4
933,Patricia,VanderMeer,F,Canada,5th Dimension Training Academy,Zenith,0.0,Adults,Brown/Black,7,4
934,Patricia,VanderMeer,F,Canada,5th Dimension Training Academy,Zenith,0.0,Adults,Brown/Black,9,4
935,Alejandra,Ordaz,F,Mexico,Action & Reaction MMA,Cicero Costha Canada,2.0,Adults,Brown/Black,9,4


## 2.2 Results

In [39]:
results.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 551 entries, 0 to 550
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Bracket     551 non-null    object
 1   Placement   551 non-null    int64 
 2   First Name  551 non-null    object
 3   Last Name   551 non-null    object
 4   Country     551 non-null    object
 5   Club        550 non-null    object
dtypes: int64(1), object(5)
memory usage: 26.0+ KB


### 2.2.1 Null Values

In [40]:
results[results.isnull().any(axis=1)]

Unnamed: 0,Bracket,Placement,First Name,Last Name,Country,Club
9,"Juvenile Boys Gi / White / -141,6 lbs",2,Liam,Bell,Canada,


In [41]:
results.loc[(results['First Name'] == 'Liam') & (results['Last Name'] == 'Bell'), 'Club'] = 'United Brazilian Jiu Jitsu'


In [42]:
results.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 551 entries, 0 to 550
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Bracket     551 non-null    object
 1   Placement   551 non-null    int64 
 2   First Name  551 non-null    object
 3   Last Name   551 non-null    object
 4   Country     551 non-null    object
 5   Club        551 non-null    object
dtypes: int64(1), object(5)
memory usage: 26.0+ KB


In [43]:
results=results.drop(columns=['Country','Club'])

In [44]:
participant_results = results.merge(
    participants,
    on=['First Name', 'Last Name', 'Bracket'],
    how='right'
)


In [45]:
participant_results

Unnamed: 0,Bracket,Placement,First Name,Last Name,Gender,Country,Club,Affiliation,Age Diff from Youngest,Division,Belt,Weight,Weight Ordinal,Division Ordinal
0,Juvenile Boys Gi / White / -118 lbs,3.0,Damon,Ball,M,Canada,Paradigm Martial Arts,Renzo Gracie Ottawa,0.0,Juvenile,White,-118 lbs,1,1
1,Juvenile Boys Gi / White / -118 lbs,,Lincoln,Donaldson,M,Canada,Academia Brazilian JiuJitsu,Subconscious BJJ,0.0,Juvenile,White,-118 lbs,1,1
2,Juvenile Boys Gi / White / -118 lbs,1.0,Noah,Hingco,M,Canada,Pound4Pound - Resolute Jiu-Jitsu,Body of Four Team,2.0,Juvenile,White,-118 lbs,1,1
3,Juvenile Boys Gi / White / -118 lbs,2.0,Caden,Parker,M,Canada,Big Country MMA,No Affiliate,0.0,Juvenile,White,-118 lbs,1,1
4,Juvenile Boys Gi / White / -118 lbs,3.0,Iker,Arellano Gomesainz,M,Mexico,Octa BJJ,No Affiliate,0.0,Juvenile,White,-118 lbs,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
931,Female Gi / Master 2 (40+) / Brown/Black / Med...,1.0,Alejandra,Ordaz,F,Mexico,Action & Reaction MMA,Cicero Costha Canada,0.0,Adults,Brown/Black,Medium Heavy,5,4
932,Female Gi / Master 2 (40+) / Brown/Black / Sup...,,Patricia,VanderMeer,F,Canada,5th Dimension Training Academy,Zenith,0.0,Adults,Brown/Black,Super Heavy,7,4
933,Female Gi / Master 2 (40+) / Brown/Black / Ope...,1.0,Patricia,VanderMeer,F,Canada,5th Dimension Training Academy,Zenith,0.0,Adults,Brown/Black,Open Weight,9,4
934,Female Gi / Master 2 (40+) / Brown/Black / Ope...,2.0,Alejandra,Ordaz,F,Mexico,Action & Reaction MMA,Cicero Costha Canada,2.0,Adults,Brown/Black,Open Weight,9,4


In [46]:
participant_results['Placement'].unique()

array([ 3., nan,  1.,  2.])

In [47]:
podium={
    1.:'First',
    2.:'Second',
    3.:'Third',
    np.nan:'None'
}

In [48]:
participant_results['Podium'] = participant_results['Placement'].map(podium)

In [49]:
participant_results=participant_results.drop(columns='Placement')

In [77]:
participant_results

Unnamed: 0,Bracket,First Name,Last Name,Gender,Country,Club,Affiliation,Division,Belt,Weight Ordinal,Division Ordinal,Podium,Age Diff from Youngest (Scaled)
0,Juvenile Boys Gi / White / -118 lbs,Damon,Ball,M,Canada,Paradigm Martial Arts,Renzo Gracie Ottawa,Juvenile,White,1,1,Third,-0.954411
1,Juvenile Boys Gi / White / -118 lbs,Lincoln,Donaldson,M,Canada,Academia Brazilian JiuJitsu,Subconscious BJJ,Juvenile,White,1,1,,-0.954411
2,Juvenile Boys Gi / White / -118 lbs,Noah,Hingco,M,Canada,Pound4Pound - Resolute Jiu-Jitsu,Body of Four Team,Juvenile,White,1,1,First,-0.443207
3,Juvenile Boys Gi / White / -118 lbs,Caden,Parker,M,Canada,Big Country MMA,No Affiliate,Juvenile,White,1,1,Second,-0.954411
4,Juvenile Boys Gi / White / -118 lbs,Iker,Arellano Gomesainz,M,Mexico,Octa BJJ,No Affiliate,Juvenile,White,1,1,Third,-0.954411
...,...,...,...,...,...,...,...,...,...,...,...,...,...
931,Female Gi / Master 2 (40+) / Brown/Black / Med...,Alejandra,Ordaz,F,Mexico,Action & Reaction MMA,Cicero Costha Canada,Adults,Brown/Black,5,4,First,-0.954411
932,Female Gi / Master 2 (40+) / Brown/Black / Sup...,Patricia,VanderMeer,F,Canada,5th Dimension Training Academy,Zenith,Adults,Brown/Black,7,4,,-0.954411
933,Female Gi / Master 2 (40+) / Brown/Black / Ope...,Patricia,VanderMeer,F,Canada,5th Dimension Training Academy,Zenith,Adults,Brown/Black,9,4,First,-0.954411
934,Female Gi / Master 2 (40+) / Brown/Black / Ope...,Alejandra,Ordaz,F,Mexico,Action & Reaction MMA,Cicero Costha Canada,Adults,Brown/Black,9,4,Second,-0.443207


In [53]:
scaler = StandardScaler()
participant_results['Age Diff from Youngest (Scaled)'] = scaler.fit_transform(participant_results[['Age Diff from Youngest']])


In [73]:
participant_results=participant_results.drop(columns=['Weight','Age Diff from Youngest'])

In [57]:
model = LogisticRegression(class_weight='balanced')

In [82]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# 1. Load and prepare data
df = participant_results.copy()
df = df.dropna(subset=['Podium'])  # Drop rows with missing actual placements

# 2. Define features and label
target = 'Podium'

numeric_features = ['Age Diff from Youngest (Scaled)', 'Weight Ordinal', 'Division Ordinal']
categorical_features = ['Gender', 'Country', 'Club', 'Affiliation', 'Division', 'Belt']

X = df[numeric_features + categorical_features]
label_encoder = LabelEncoder()
df[target] = label_encoder.fit_transform(df[target])  # Encode actual podium
y = df[target]

# 3. Preprocessing
preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numeric_features),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
])

# 4. Pipeline: Random Forest with preprocessing
pipeline = Pipeline([
    ('preprocessing', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42))
])

# 5. Train/test split and model training
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)
pipeline.fit(X_train, y_train)

# 6. Evaluate
y_pred = pipeline.predict(X_test)
print("Classification Report:\n", classification_report(y_test, y_pred, target_names=label_encoder.classes_))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# 7. Predict scores for full dataset
df['Podium_Score'] = pipeline.predict_proba(X)[:, label_encoder.transform(['First'])[0]]  # use "First" class probability

# 8. Bracket ranking
df['Bracket Rank'] = df.groupby('Bracket')['Podium_Score'].rank(method='first', ascending=False)
df['Predicted Podium'] = 'None'

# 9. Rule-based podium assignment
def assign_podium(group):
    group = group.sort_values('Bracket Rank')
    idx = group.index
    
    if len(group) >= 1:
        group.loc[idx[0], 'Predicted Podium'] = 'First'
    if len(group) >= 2:
        group.loc[idx[1], 'Predicted Podium'] = 'Second'
    if len(group) >= 3:
        group.loc[idx[2], 'Predicted Podium'] = 'Third'
    if len(group) >= 4:
        group.loc[idx[3], 'Predicted Podium'] = 'Third'
    
    return group

df = df.groupby('Bracket', group_keys=False).apply(assign_podium)

# 10. Decode actual podium if needed
df['Podium'] = label_encoder.inverse_transform(df['Podium'])

# 11. Final result preview
final = df[['First Name', 'Last Name', 'Bracket', 'Club', 'Belt', 'Division', 'Podium', 'Predicted Podium']]
print(final.head(10))


Classification Report:
               precision    recall  f1-score   support

       First       0.26      0.22      0.24        32
        None       0.51      0.66      0.58        77
      Second       0.15      0.06      0.09        31
       Third       0.33      0.33      0.33        48

    accuracy                           0.40       188
   macro avg       0.31      0.32      0.31       188
weighted avg       0.36      0.40      0.38       188

Confusion Matrix:
 [[ 7 14  4  7]
 [ 8 51  2 16]
 [ 8 12  2  9]
 [ 4 23  5 16]]
    First Name        Last Name                             Bracket  \
759     Lauren       Engelhardt  Female Gi / Adult / Blue / Feather   
761     Alexis  Da silva cabral  Female Gi / Adult / Blue / Feather   
760     Danika           Katona  Female Gi / Adult / Blue / Feather   
762      Gabby           Aumont  Female Gi / Adult / Blue / Feather   
780  Ekaterina        Lachilova    Female Gi / Adult / Blue / Heavy   
781    arielle            lopes    

In [84]:
final.to_csv('Predictions.csv')