# Feature Engineering

### Get data from DB

In [1]:
import pandas as pd
import numpy as np
import sqlite3

pd.set_option("display.max_columns", None)

conn = sqlite3.connect("../data/ufc.db", detect_types=sqlite3.PARSE_DECLTYPES)
query = """
        SELECT
            a.*,
            b.HEIGHT_INCHES AS RED_HEIGHT_INCHES,
            b.REACH_INCHES AS RED_REACH_INCHES,
            b.FIGHTING_STANCE AS RED_FIGHTING_STANCE,
            b.DATE_OF_BIRTH AS RED_DATE_OF_BIRTH,
            c.HEIGHT_INCHES AS BLUE_HEIGHT_INCHES,
            c.REACH_INCHES AS BLUE_REACH_INCHES,
            c.FIGHTING_STANCE AS BLUE_FIGHTING_STANCE,
            c.DATE_OF_BIRTH AS BLUE_DATE_OF_BIRTH
        FROM
            BOUTS_OVERALL a
        LEFT JOIN
            FIGHTERS b
        ON
            a.RED_FIGHTER_ID = b.FIGHTER_ID
        LEFT JOIN
            FIGHTERS c
        ON
            a.BLUE_FIGHTER_ID = c.FIGHTER_ID
        """

df = pd.read_sql_query(query, conn)

### Create a stacked dataset so that features can be engineered by fighter

In [16]:
def get_weight_class(bout_type):
    if "Women's Strawweight" in bout_type:
        return "Women's Strawweight"
    elif "Women's Flyweight" in bout_type:
        return "Women's Flyweight"
    elif "Women's Bantamweight" in bout_type:
        return "Women's Bantamweight"
    elif "Women's Featherweight" in bout_type:
        return "Women's Featherweight"
    elif "Light Heavyweight" in bout_type:
        return "Light Heavyweight"
    elif "Heavyweight" in bout_type:
        return "Heavyweight"
    elif "Middleweight" in bout_type:
        return "Middleweight"
    elif "Welterweight" in bout_type:
        return "Welterweight"
    elif "Lightweight" in bout_type:
        return "Lightweight"
    elif "Featherweight" in bout_type:
        return "Featherweight"
    elif "Bantamweight" in bout_type:
        return "Bantamweight"
    elif "Flyweight" in bout_type:
        return "Flyweight"
    
def is_female(bout_type):
    if "Women's" in bout_type:
        return 1
    return 0

def is_title_bout(bout_type):
    if any(x in bout_type for x in ["Tournament Title Bout", "Ultimate Fighter", "TUF"]):
        return 0
    elif "Title Bout" in bout_type:
        return 1
    return 0

def is_undisputed_title_bout(bout_type):
    if any(x in bout_type for x in ["Tournament Title Bout", "Ultimate Fighter", "TUF"]):
        return 0
    elif "Title Bout" in bout_type:
        if "Interim" in bout_type:
            return 0
        return 1
    return 0

In [2]:
temp = df.copy().rename_axis("TEMP").reset_index()
common = [
    "TEMP",
    "BOUT_ID",
    "DATE",
    "LOCATION",
    "BOUT_TYPE",
    "OUTCOME_METHOD",
    "END_ROUND",
    "BOUT_TIME_FORMAT",
    "TOTAL_TIME_MINUTES",
]
red_cols = [col for col in temp.columns if col.startswith("RED_")]
blue_cols = [col for col in temp.columns if col.startswith("BLUE_")]

reds = temp[common + red_cols + blue_cols]
reds = reds.rename(
    columns=lambda x: x.replace("RED_", "f_")
    if x.startswith("RED_")
    else x.replace("BLUE_", "o_")
    if x.startswith("BLUE_")
    else x
)
reds["TEMP2"] = 1
blues = temp[common + blue_cols + red_cols]
blues = blues.rename(
    columns=lambda x: x.replace("BLUE_", "f_")
    if x.startswith("BLUE_")
    else x.replace("RED_", "o_")
    if x.startswith("RED_")
    else x
)
blues["TEMP2"] = 2

feature_df = (
    pd.concat([reds, blues])
    .sort_values(["TEMP", "TEMP2"])
    .drop(["TEMP", "TEMP2", "f_FIGHTER_NAME", "o_FIGHTER_NAME"], axis=1)
)
feature_df["DATE"] = pd.to_datetime(feature_df["DATE"])
feature_df["f_DATE_OF_BIRTH"] = pd.to_datetime(feature_df["f_DATE_OF_BIRTH"])
feature_df["o_DATE_OF_BIRTH"] = pd.to_datetime(feature_df["o_DATE_OF_BIRTH"])
feature_df = feature_df.drop("o_OUTCOME", axis=1)
feature_df

Unnamed: 0,BOUT_ID,DATE,LOCATION,BOUT_TYPE,OUTCOME_METHOD,END_ROUND,BOUT_TIME_FORMAT,TOTAL_TIME_MINUTES,f_FIGHTER_ID,f_OUTCOME,f_KNOCKDOWNS,f_TOTAL_STRIKES_LANDED,f_TOTAL_STRIKES_ATTEMPTED,f_TAKEDOWNS_LANDED,f_TAKEDOWNS_ATTEMPTED,f_SUBMISSION_ATTEMPTS,f_REVERSALS,f_CONTROL_TIME_MINUTES,f_SIGNIFICANT_STRIKES_LANDED,f_SIGNIFICANT_STRIKES_ATTEMPTED,f_SIGNIFICANT_STRIKES_HEAD_LANDED,f_SIGNIFICANT_STRIKES_HEAD_ATTEMPTED,f_SIGNIFICANT_STRIKES_BODY_LANDED,f_SIGNIFICANT_STRIKES_BODY_ATTEMPTED,f_SIGNIFICANT_STRIKES_LEG_LANDED,f_SIGNIFICANT_STRIKES_LEG_ATTEMPTED,f_SIGNIFICANT_STRIKES_DISTANCE_LANDED,f_SIGNIFICANT_STRIKES_DISTANCE_ATTEMPTED,f_SIGNIFICANT_STRIKES_CLINCH_LANDED,f_SIGNIFICANT_STRIKES_CLINCH_ATTEMPTED,f_SIGNIFICANT_STRIKES_GROUND_LANDED,f_SIGNIFICANT_STRIKES_GROUND_ATTEMPTED,f_HEIGHT_INCHES,f_REACH_INCHES,f_FIGHTING_STANCE,f_DATE_OF_BIRTH,o_FIGHTER_ID,o_KNOCKDOWNS,o_TOTAL_STRIKES_LANDED,o_TOTAL_STRIKES_ATTEMPTED,o_TAKEDOWNS_LANDED,o_TAKEDOWNS_ATTEMPTED,o_SUBMISSION_ATTEMPTS,o_REVERSALS,o_CONTROL_TIME_MINUTES,o_SIGNIFICANT_STRIKES_LANDED,o_SIGNIFICANT_STRIKES_ATTEMPTED,o_SIGNIFICANT_STRIKES_HEAD_LANDED,o_SIGNIFICANT_STRIKES_HEAD_ATTEMPTED,o_SIGNIFICANT_STRIKES_BODY_LANDED,o_SIGNIFICANT_STRIKES_BODY_ATTEMPTED,o_SIGNIFICANT_STRIKES_LEG_LANDED,o_SIGNIFICANT_STRIKES_LEG_ATTEMPTED,o_SIGNIFICANT_STRIKES_DISTANCE_LANDED,o_SIGNIFICANT_STRIKES_DISTANCE_ATTEMPTED,o_SIGNIFICANT_STRIKES_CLINCH_LANDED,o_SIGNIFICANT_STRIKES_CLINCH_ATTEMPTED,o_SIGNIFICANT_STRIKES_GROUND_LANDED,o_SIGNIFICANT_STRIKES_GROUND_ATTEMPTED,o_HEIGHT_INCHES,o_REACH_INCHES,o_FIGHTING_STANCE,o_DATE_OF_BIRTH
0,567a09fd200cfa05,1993-11-12,"Denver, Colorado, USA",Open Weight Bout,KO/TKO,1,No Time Limit,0.433333,279093302a6f44b3,W,0.0,3.0,5.0,0.0,0.0,0.0,0.0,,3.0,5.0,3.0,5.0,0.0,0.0,0.0,0.0,1.0,3.0,0.0,0.0,2.0,2.0,77.0,,Orthodox,1959-03-30,96eff1a628adcc7f,0.0,0.0,1.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,72.0,,Orthodox,1969-06-14
0,567a09fd200cfa05,1993-11-12,"Denver, Colorado, USA",Open Weight Bout,KO/TKO,1,No Time Limit,0.433333,96eff1a628adcc7f,L,0.0,0.0,1.0,0.0,1.0,0.0,0.0,,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,72.0,,Orthodox,1969-06-14,279093302a6f44b3,0.0,3.0,5.0,0.0,0.0,0.0,0.0,,3.0,5.0,3.0,5.0,0.0,0.0,0.0,0.0,1.0,3.0,0.0,0.0,2.0,2.0,77.0,,Orthodox,1959-03-30
1,2d2bbc86e941e05c,1993-11-12,"Denver, Colorado, USA",Open Weight Bout,KO/TKO,1,No Time Limit,4.333333,598a58db87b890ee,W,2.0,38.0,53.0,0.0,0.0,0.0,0.0,,15.0,27.0,12.0,23.0,3.0,4.0,0.0,0.0,4.0,10.0,4.0,9.0,7.0,8.0,76.0,,Orthodox,NaT,d3711d3784b76255,0.0,13.0,29.0,0.0,0.0,0.0,0.0,,12.0,28.0,7.0,19.0,3.0,6.0,2.0,3.0,0.0,7.0,10.0,19.0,2.0,2.0,77.0,,Orthodox,NaT
1,2d2bbc86e941e05c,1993-11-12,"Denver, Colorado, USA",Open Weight Bout,KO/TKO,1,No Time Limit,4.333333,d3711d3784b76255,L,0.0,13.0,29.0,0.0,0.0,0.0,0.0,,12.0,28.0,7.0,19.0,3.0,6.0,2.0,3.0,0.0,7.0,10.0,19.0,2.0,2.0,77.0,,Orthodox,NaT,598a58db87b890ee,2.0,38.0,53.0,0.0,0.0,0.0,0.0,,15.0,27.0,12.0,23.0,3.0,4.0,0.0,0.0,4.0,10.0,4.0,9.0,7.0,8.0,76.0,,Orthodox,NaT
2,cecdc0da584274b9,1993-11-12,"Denver, Colorado, USA",Open Weight Bout,Submission,1,No Time Limit,2.300000,429e7d3725852ce9,W,0.0,4.0,7.0,1.0,1.0,0.0,0.0,,0.0,3.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,3.0,0.0,0.0,0.0,0.0,73.0,,Southpaw,1966-12-12,a5c53b3ddb31cc7d,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,73.0,,Orthodox,1963-08-04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7273,2c54be25b09fce50,2023-08-05,"Nashville, Tennessee, USA",Light Heavyweight Bout,KO/TKO,1,3 Rnd (5-5-5),1.366667,8667caa0451d245b,L,0.0,4.0,18.0,0.0,0.0,0.0,0.0,0.000000,4.0,18.0,3.0,14.0,0.0,3.0,1.0,1.0,4.0,18.0,0.0,0.0,0.0,0.0,77.0,83.0,Southpaw,1992-06-13,e4277e87a789d687,1.0,10.0,33.0,0.0,0.0,0.0,0.0,0.066667,10.0,33.0,5.0,28.0,0.0,0.0,5.0,5.0,10.0,26.0,0.0,0.0,0.0,7.0,75.0,76.0,Orthodox,1988-04-04
7274,ec2358459ffbe7de,2023-08-05,"Nashville, Tennessee, USA",Women's Strawweight Bout,Submission,2,3 Rnd (5-5-5),6.516667,6a1901c62ab3870f,L,0.0,13.0,31.0,0.0,0.0,0.0,0.0,0.283333,11.0,29.0,2.0,14.0,5.0,8.0,4.0,7.0,8.0,23.0,3.0,6.0,0.0,0.0,61.0,62.0,Orthodox,1991-09-25,b08012bbe542592a,0.0,25.0,51.0,3.0,3.0,1.0,0.0,2.366667,23.0,49.0,3.0,20.0,17.0,26.0,3.0,3.0,13.0,36.0,10.0,13.0,0.0,0.0,65.0,66.0,Southpaw,1990-12-19
7274,ec2358459ffbe7de,2023-08-05,"Nashville, Tennessee, USA",Women's Strawweight Bout,Submission,2,3 Rnd (5-5-5),6.516667,b08012bbe542592a,W,0.0,25.0,51.0,3.0,3.0,1.0,0.0,2.366667,23.0,49.0,3.0,20.0,17.0,26.0,3.0,3.0,13.0,36.0,10.0,13.0,0.0,0.0,65.0,66.0,Southpaw,1990-12-19,6a1901c62ab3870f,0.0,13.0,31.0,0.0,0.0,0.0,0.0,0.283333,11.0,29.0,2.0,14.0,5.0,8.0,4.0,7.0,8.0,23.0,3.0,6.0,0.0,0.0,61.0,62.0,Orthodox,1991-09-25
7275,554163b1c8c05522,2023-08-05,"Nashville, Tennessee, USA",Catch Weight Bout,Decision - Unanimous,5,5 Rnd (5-5-5-5-5),25.000000,65f09bacd3957381,W,0.0,132.0,211.0,7.0,7.0,1.0,0.0,19.633333,34.0,84.0,30.0,79.0,2.0,3.0,2.0,2.0,10.0,47.0,0.0,1.0,24.0,36.0,71.0,70.0,Switch,1992-04-20,05339613bf8e9808,0.0,25.0,69.0,1.0,2.0,0.0,0.0,1.133333,9.0,49.0,5.0,43.0,2.0,3.0,2.0,3.0,8.0,44.0,0.0,3.0,1.0,2.0,68.0,71.0,Orthodox,1987-06-25


In [18]:
df["temp temp"] = df["BOUT_TYPE"].apply(lambda x: is_title_bout(x))
df.loc[df["temp temp"] == 1]

Unnamed: 0,BOUT_ID,EVENT_ID,EVENT_NAME,DATE,LOCATION,RED_FIGHTER_ID,BLUE_FIGHTER_ID,RED_FIGHTER_NAME,BLUE_FIGHTER_NAME,RED_OUTCOME,BLUE_OUTCOME,BOUT_TYPE,OUTCOME_METHOD,OUTCOME_METHOD_DETAILS,END_ROUND,END_ROUND_TIME_MINUTES,BOUT_TIME_FORMAT,TOTAL_TIME_MINUTES,RED_KNOCKDOWNS,BLUE_KNOCKDOWNS,RED_TOTAL_STRIKES_LANDED,RED_TOTAL_STRIKES_ATTEMPTED,BLUE_TOTAL_STRIKES_LANDED,BLUE_TOTAL_STRIKES_ATTEMPTED,RED_TAKEDOWNS_LANDED,RED_TAKEDOWNS_ATTEMPTED,BLUE_TAKEDOWNS_LANDED,BLUE_TAKEDOWNS_ATTEMPTED,RED_SUBMISSION_ATTEMPTS,BLUE_SUBMISSION_ATTEMPTS,RED_REVERSALS,BLUE_REVERSALS,RED_CONTROL_TIME_MINUTES,BLUE_CONTROL_TIME_MINUTES,RED_SIGNIFICANT_STRIKES_LANDED,RED_SIGNIFICANT_STRIKES_ATTEMPTED,BLUE_SIGNIFICANT_STRIKES_LANDED,BLUE_SIGNIFICANT_STRIKES_ATTEMPTED,RED_SIGNIFICANT_STRIKES_HEAD_LANDED,RED_SIGNIFICANT_STRIKES_HEAD_ATTEMPTED,BLUE_SIGNIFICANT_STRIKES_HEAD_LANDED,BLUE_SIGNIFICANT_STRIKES_HEAD_ATTEMPTED,RED_SIGNIFICANT_STRIKES_BODY_LANDED,RED_SIGNIFICANT_STRIKES_BODY_ATTEMPTED,BLUE_SIGNIFICANT_STRIKES_BODY_LANDED,BLUE_SIGNIFICANT_STRIKES_BODY_ATTEMPTED,RED_SIGNIFICANT_STRIKES_LEG_LANDED,RED_SIGNIFICANT_STRIKES_LEG_ATTEMPTED,BLUE_SIGNIFICANT_STRIKES_LEG_LANDED,BLUE_SIGNIFICANT_STRIKES_LEG_ATTEMPTED,RED_SIGNIFICANT_STRIKES_DISTANCE_LANDED,RED_SIGNIFICANT_STRIKES_DISTANCE_ATTEMPTED,BLUE_SIGNIFICANT_STRIKES_DISTANCE_LANDED,BLUE_SIGNIFICANT_STRIKES_DISTANCE_ATTEMPTED,RED_SIGNIFICANT_STRIKES_CLINCH_LANDED,RED_SIGNIFICANT_STRIKES_CLINCH_ATTEMPTED,BLUE_SIGNIFICANT_STRIKES_CLINCH_LANDED,BLUE_SIGNIFICANT_STRIKES_CLINCH_ATTEMPTED,RED_SIGNIFICANT_STRIKES_GROUND_LANDED,RED_SIGNIFICANT_STRIKES_GROUND_ATTEMPTED,BLUE_SIGNIFICANT_STRIKES_GROUND_LANDED,BLUE_SIGNIFICANT_STRIKES_GROUND_ATTEMPTED,RED_HEIGHT_INCHES,RED_REACH_INCHES,RED_FIGHTING_STANCE,RED_DATE_OF_BIRTH,BLUE_HEIGHT_INCHES,BLUE_REACH_INCHES,BLUE_FIGHTING_STANCE,BLUE_DATE_OF_BIRTH,temp temp
130,3e813ba25895913d,96eff1a628adcc7f,UFC 12: Judgement Day,1997-02-07,"Dothan, Alabama, USA",21b8a0f5c231096f,c670aa48827d6be6,Mark Coleman,Dan Severn,W,L,UFC Heavyweight Title Bout,Submission,Scarf Hold From Side Control,1,2.950000,1 Rnd + 2OT (15-3-3),2.950000,0.0,0.0,12.0,21.0,16.0,18.0,0.0,0.0,0.0,3.0,2.0,0.0,0.0,0.0,,,4.0,12.0,2.0,4.0,4.0,12.0,2.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,8.0,2.0,4.0,0.0,0.0,0.0,0.0,2.0,4.0,0.0,0.0,73.0,75.0,Orthodox,1964-12-20,74.0,,Southpaw,1958-06-08,1
148,1a5171802bf3bce6,dc950d59dc590aca,UFC 14: Showdown,1997-07-27,"Birmingham, Alabama, USA",33e33d51f289d2a1,21b8a0f5c231096f,Maurice Smith,Mark Coleman,W,L,UFC Heavyweight Title Bout,Decision - Unanimous,,3,3.000000,1 Rnd + 2OT (15-3-3),21.000000,0.0,0.0,129.0,139.0,47.0,80.0,0.0,0.0,2.0,7.0,0.0,2.0,1.0,0.0,,,55.0,65.0,19.0,43.0,34.0,44.0,19.0,43.0,1.0,1.0,0.0,0.0,20.0,20.0,0.0,0.0,36.0,44.0,2.0,6.0,0.0,0.0,0.0,0.0,19.0,21.0,17.0,37.0,74.0,,Orthodox,1961-12-13,73.0,75.0,Orthodox,1964-12-20,1
155,b3cafb4e6e1b2ad0,07a18ae55dfc3cd9,UFC 15: Collision Course,1997-10-17,"Bay St. Louis, Mississippi, USA",33e33d51f289d2a1,b361180739bed4b0,Maurice Smith,David Abbott,W,L,UFC Heavyweight Title Bout,Submission,Other \n Fatigue,1,8.133333,1 Rnd + 2OT (15-3-3),8.133333,0.0,1.0,38.0,39.0,17.0,39.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,,,12.0,13.0,5.0,22.0,7.0,8.0,5.0,22.0,0.0,0.0,0.0,0.0,5.0,5.0,0.0,0.0,5.0,5.0,1.0,8.0,1.0,1.0,2.0,3.0,6.0,7.0,2.0,11.0,74.0,,Orthodox,1961-12-13,72.0,,Switch,,1
159,e0c636beed345e1d,29f935654825331b,UFC - Ultimate Japan,1997-12-21,"Yokohama, Kanagawa, Japan",fcaae0385b514f11,13b2f59210dda9cc,Frank Shamrock,Kevin Jackson,W,L,UFC Light Heavyweight Title Bout,Submission,Armbar From Bottom Guard,1,0.266667,1 Rnd + 2OT (15-3-3),0.266667,0.0,0.0,1.0,1.0,2.0,3.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,,,1.0,1.0,2.0,3.0,1.0,1.0,1.0,2.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,70.0,,Orthodox,1972-12-08,70.0,,Switch,,1
162,748657d9ba9d7c71,29f935654825331b,UFC - Ultimate Japan,1997-12-21,"Yokohama, Kanagawa, Japan",0aa92558424ced9e,33e33d51f289d2a1,Randy Couture,Maurice Smith,W,L,UFC Heavyweight Title Bout,Decision - Majority,,3,3.000000,1 Rnd + 2OT (15-3-3),21.000000,0.0,0.0,54.0,66.0,18.0,26.0,3.0,3.0,0.0,0.0,1.0,0.0,0.0,0.0,,,19.0,31.0,9.0,17.0,19.0,31.0,5.0,11.0,0.0,0.0,1.0,2.0,0.0,0.0,3.0,4.0,0.0,0.0,7.0,14.0,0.0,0.0,1.0,2.0,19.0,31.0,1.0,1.0,73.0,75.0,Orthodox,1963-06-22,74.0,,Orthodox,1961-12-13,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7079,fe143cb868adfded,3dc3022232b79c7a,UFC 287: Pereira vs. Adesanya 2,2023-04-08,"Miami, Florida, USA",e5549c82bfb5582d,1338e2c7480bdf9e,Alex Pereira,Israel Adesanya,L,W,UFC Middleweight Title Bout,KO/TKO,Punch to Head At Distance,2,4.350000,5 Rnd (5-5-5-5-5),9.350000,0.0,1.0,49.0,85.0,41.0,96.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.050000,49.0,85.0,41.0,96.0,8.0,28.0,18.0,61.0,15.0,21.0,14.0,24.0,26.0,36.0,9.0,11.0,48.0,83.0,40.0,93.0,1.0,2.0,0.0,2.0,0.0,0.0,1.0,1.0,76.0,79.0,Orthodox,1987-07-07,76.0,80.0,Switch,1989-07-22,1
7127,e3aad51099a23ba4,3c6976f8182d9527,UFC 288: Sterling vs. Cejudo,2023-05-06,"Newark, New Jersey, USA",cb696ebfb6598724,056c493bbd76a918,Aljamain Sterling,Henry Cejudo,W,L,UFC Bantamweight Title Bout,Decision - Split,Derek Cleary \n\n \n \n ...,5,5.000000,5 Rnd (5-5-5-5-5),25.000000,0.0,0.0,186.0,261.0,143.0,219.0,4.0,15.0,3.0,8.0,0.0,0.0,0.0,0.0,4.000000,5.100000,135.0,210.0,99.0,174.0,51.0,112.0,35.0,99.0,50.0,60.0,30.0,37.0,34.0,38.0,34.0,38.0,103.0,174.0,82.0,152.0,24.0,28.0,16.0,21.0,8.0,8.0,1.0,1.0,67.0,71.0,Orthodox,1989-07-31,64.0,64.0,Orthodox,1987-02-09,1
7174,69bc7ca8ce831731,b6c6d1731ff00eeb,UFC 289: Nunes vs. Aldana,2023-06-10,"Vancouver, British Columbia, Canada",80fa8218c99f9c58,578ef12674df1e6a,Amanda Nunes,Irene Aldana,W,L,UFC Women's Bantamweight Title Bout,Decision - Unanimous,Sal D'amato \n\n \n \n ...,5,5.000000,5 Rnd (5-5-5-5-5),25.000000,0.0,0.0,196.0,323.0,57.0,159.0,6.0,13.0,0.0,0.0,0.0,0.0,0.0,0.0,7.000000,0.166667,142.0,267.0,41.0,143.0,97.0,198.0,25.0,118.0,28.0,49.0,3.0,7.0,17.0,20.0,13.0,18.0,115.0,234.0,36.0,133.0,5.0,8.0,3.0,3.0,22.0,25.0,2.0,7.0,68.0,69.0,Orthodox,1988-05-30,69.0,68.0,Orthodox,1988-03-26,1
7223,0f24f3bc1b39ea45,6085ceb59087514b,UFC 290: Volkanovski vs. Rodriguez,2023-07-08,"Las Vegas, Nevada, USA",792be9a24df82ed6,a0f0004aadf10b71,Brandon Moreno,Alexandre Pantoja,L,W,UFC Flyweight Title Bout,Decision - Split,Ben Cartlidge \n\n \n \n...,5,5.000000,5 Rnd (5-5-5-5-5),25.000000,0.0,1.0,167.0,265.0,161.0,323.0,2.0,4.0,6.0,11.0,0.0,0.0,3.0,0.0,4.016667,8.433333,147.0,245.0,129.0,274.0,133.0,231.0,106.0,247.0,13.0,13.0,20.0,22.0,1.0,1.0,3.0,5.0,128.0,220.0,104.0,239.0,12.0,14.0,13.0,21.0,7.0,11.0,12.0,14.0,67.0,70.0,Orthodox,1993-12-07,65.0,67.0,Orthodox,1990-04-16,1


In [None]:
# cumulative
# rolling 3 fight window
# lag 1 fight

# accuracy: f_landed / f_attempted
# avg landed per 15 min: f_landed / total_time * 15
# defense: (o_attempted - o_landed) / o_attempted
# avg absorbed per 15 min: o_landed / total_time * 15

# win pct (count draws as 0.5)
# winning and losing streaks
# finish pct
# wins by ko/tko, sub, dec

# avg landed per 15 min / avg absorbed per 15 min
# accuracy * defense
# total landed - total absorbed
# ground_strikes / control_time * 15
# (ground_strikes / control_time) / (takedowns_landed + reversals)
# control_time / total_time
# control_time / (takedowns_landed + reversals)
# (ko/tko_wins + knockdowns) / significant strikes landed
# head / total sig, body / total sig, leg / total sig, distance / total sig, clinch / total sig, ground / total sig
# number of days since last fight
# cte score: age * head_absorbed * (1 + ko/tko_losses)

# custom elo rating system
# elo score, elo probability

# mins and maxes
# deltas in stats between bouts

# title fight indicator
# champ indicator
# number of title fights
# number of 5-round fights participated in
# title wins

# finally finally ratios/differences between stats between fighters

# at the end, drop all number of attempted columns

### Coalesce the features into a single dataframe that will serve as input

In [None]:
data = pd.DataFrame()
data["BOUT_ID"] = df.loc[
    (pd.to_datetime(df["DATE"]) >= pd.to_datetime("2010-03-21")) & (df["RED_OUTCOME"].isin(["W", "L"])),
    "BOUT_ID",
]

# Map back features

# Reset index
data.reset_index(inplace=True, drop=True)
data

In [None]:
# Split into train and test sets
split_index = int(len(data) * 0.8)
train = data[:split_index]
test = data[split_index:]

X_train, y_train = train.drop("TARGET", axis=1), train["TARGET"]
X_test, y_test = test.drop("TARGET", axis=1), test["TARGET"]
len(test)

## AutoML Benchmarking

In [None]:
import evalml
from evalml.preprocessing.data_splitters import TimeSeriesSplit

In [None]:
automl = evalml.AutoMLSearch(
    X_train=X_train,
    y_train=y_train,
    X_holdout=X_test,
    y_holdout=y_test,
    problem_type="binary",
    objective="log loss binary",
    data_splitter=TimeSeriesSplit(n_splits=5),
    ensembling=True,
)