### Cleaning Fighters Data

In [17]:
import pandas as pd
import numpy as np

In [18]:
fighters_df = pd.read_csv('data/fighters.csv')

In [None]:
fighters_df.head()

Unnamed: 0,First,Last,Nickname,Ht.,Wt.,Reach,Stance,W,L,D,Belt
0,Tom,Aaron,,--,155 lbs.,--,,5,3,0,
1,Danny,Abbadi,The Assassin,"5' 11""",155 lbs.,--,Orthodox,4,6,0,
2,Nariman,Abbasov,Bayraktar,"5' 8""",155 lbs.,"66.0""",Orthodox,28,4,0,
3,Darion,Abbey,,"6' 2""",265 lbs.,"80.0""",Orthodox,9,5,0,
4,David,Abbott,Tank,"6' 0""",265 lbs.,--,Switch,10,15,0,


In [79]:
# print(len("================ Number of missing values for each columns ================"))
def print_missing_values(df):
    print("================ Number of missing values for each columns ================")
    for col in df.columns.tolist():
        print(f'{col} : {df[col].isna().sum()}')
    print("="*75)

In [80]:
print_missing_values(fighters_df)

First : 11
Last : 0
Nickname : 1590
Ht. : 315
Wt. : 74
Reach : 1618
Stance : 734
W : 0
L : 0
D : 0
Belt : 3567


In [83]:
def check_types(df):
    print(df.dtypes)

In [85]:
check_types(fighters_df)

First         object
Last          object
Nickname      object
Ht.          float32
Wt.          float32
Reach        float32
Stance      category
W              int32
L              int32
D              int32
Belt         float64
dtype: object


In [21]:
# Ht. column
def format_height(height=""):
    if height == '--':
        return np.nan
    height = height.rstrip('"')
    height = height.replace("' ", ".")
    return height


fighters_df['Ht.'] = fighters_df['Ht.'].apply(format_height)
fighters_df['Ht.'] = fighters_df['Ht.'].astype('float32')

In [27]:
# Wt. column
def format_weight(height=""):
    if height == '--':
        return np.nan
    height = height.rstrip(' lbs.')
    return height


fighters_df['Wt.'] = fighters_df['Wt.'].apply(format_weight)
fighters_df['Wt.'] = fighters_df['Wt.'].astype('float32')

In [None]:
# Reach column
def format_reach(height=""):
    if height == '--':
        return np.nan
    height = height.rstrip('"')
    return height


fighters_df['Reach'] = fighters_df['Reach'].apply(format_reach)
fighters_df['Reach'] = fighters_df['Reach'].astype('float32')

In [42]:
# Making stance column categorical
fighters_df['Stance'] = fighters_df['Stance'].astype('category')

In [37]:
# W - L - D can not be so big.
# It is better to make them only 32-bits
for col in ['W','L','D'] : 
    fighters_df[col] = fighters_df[col].astype('int32')

In [41]:
fighters_df.head()

Unnamed: 0,First,Last,Nickname,Ht.,Wt.,Reach,Stance,W,L,D,Belt
0,Tom,Aaron,,,155.0,,,5,3,0,
1,Danny,Abbadi,The Assassin,5.11,155.0,,Orthodox,4,6,0,
2,Nariman,Abbasov,Bayraktar,5.8,155.0,66.0,Orthodox,28,4,0,
3,Darion,Abbey,,6.2,265.0,80.0,Orthodox,9,5,0,
4,David,Abbott,Tank,6.0,265.0,,Switch,10,15,0,


I think this is enough for cleaning fighters data

In [52]:
fighters_df.to_csv('data/Fighters.csv')

### CLeaning Events Data


In [87]:
events_df = pd.read_csv('raw_data/raw_events.csv')
events_df.head()

Unnamed: 0,Event_Id,Name,Date,Location
0,754968e325d6f60d,UFC Fight Night: Walker vs. Zhang,"August 23, 2025","Shanghai, Hebei, China"
1,421ccfc6ddb17958,UFC 319: Du Plessis vs. Chimaev,"August 16, 2025","Chicago, Illinois, USA"
2,6cd3dfc54f01287f,UFC Fight Night: Dolidze vs. Hernandez,"August 09, 2025","Las Vegas, Nevada, USA"
3,f2c934689243fe4e,UFC Fight Night: Taira vs. Park,"August 02, 2025","Las Vegas, Nevada, USA"
4,28d8638ea0a71908,UFC Fight Night: Whittaker vs. De Ridder,"July 26, 2025","Abu Dhabi, Abu Dhabi, United Arab Emirates"


In [84]:
check_types(events_df)

Event_Id    object
Name        object
Date        object
Location    object
dtype: object


In [86]:
print_missing_values(events_df)

Event_Id : 0
Name : 0
Date : 0
Location : 0


! Perfect

In [92]:
events_df['Date'] = pd.to_datetime(events_df['Date'], format="%B %d, %Y")
events_df.head()

Unnamed: 0,Event_Id,Name,Date,Location
0,754968e325d6f60d,UFC Fight Night: Walker vs. Zhang,2025-08-23,"Shanghai, Hebei, China"
1,421ccfc6ddb17958,UFC 319: Du Plessis vs. Chimaev,2025-08-16,"Chicago, Illinois, USA"
2,6cd3dfc54f01287f,UFC Fight Night: Dolidze vs. Hernandez,2025-08-09,"Las Vegas, Nevada, USA"
3,f2c934689243fe4e,UFC Fight Night: Taira vs. Park,2025-08-02,"Las Vegas, Nevada, USA"
4,28d8638ea0a71908,UFC Fight Night: Whittaker vs. De Ridder,2025-07-26,"Abu Dhabi, Abu Dhabi, United Arab Emirates"
