In [1]:
import pandas as pd
from unidecode import unidecode

In [2]:
df_fight = pd.read_csv("../data/raw/ufc_fights_historical_data.csv")
df_event = pd.read_csv("../data/raw/ufc_event_date.csv")

In [3]:
df_event.rename(columns={"Event":"event", "Date":"date"}, inplace=True)
df_event.reset_index(drop=True)
df_event = df_event.iloc[:,1:]
print(df_event)

                                        event                date
0               UFC 313: Pereira vs. Ankalaev      March 08, 2025
1         UFC Fight Night: Kape vs. Almabayev      March 01, 2025
2            UFC Fight Night: Cejudo vs. Song   February 22, 2025
3    UFC Fight Night: Cannonier vs. Rodrigues   February 15, 2025
4        UFC 312: Du Plessis vs. Strickland 2   February 08, 2025
..                                        ...                 ...
718                UFC 6: Clash of the Titans       July 14, 1995
719            UFC 5: The Return of the Beast      April 07, 1995
720            UFC 4: Revenge of the Warriors   December 16, 1994
721                 UFC 3: The American Dream  September 09, 1994
722                         UFC 2: No Way Out      March 11, 1994

[723 rows x 2 columns]


In [4]:
def normalizetext(text):
    return unidecode(text)
df_event["event"] = df_event["event"].apply(normalizetext)
df_fight["event"] = df_fight["event"].apply(normalizetext)
df_fight["fighter_1"] = df_fight["fighter_1"].apply(normalizetext)
df_fight["fighter_2"] = df_fight["fighter_2"].apply(normalizetext)

In [5]:
merged = df_fight.merge(df_event, how="left", on="event")
merged["method"] = merged["method"].str.split("\n").str[0]
merged["result"] = merged["result"].str.split("\n").str[0]
merged["date"].fillna("November 12, 1993", inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  merged["date"].fillna("November 12, 1993", inplace=True)


In [6]:
new_order = ["event", "date", "fighter_1", "fighter_2", "result", "method", "round", "time"]
merged = merged[new_order]

In [7]:
merged["date"] = pd.to_datetime(merged["date"]).dt.strftime("%Y-%m-%d")
def lowercase(text):
    return text.lower()
merged["result"] = merged["result"].apply(lowercase)

In [8]:
merged.to_csv("../data/processed/processed_ufc.csv")