In [190]:
pip install catboost




In [191]:
import pandas as pd
from catboost import CatBoostClassifier
from catboost import Pool

def add_event_info(df_result: pd.DataFrame, df_events: pd.DataFrame,
                   event_col: str = 'EVENT',
                   date_col: str = 'DATE',
                   location_col: str = 'LOCATION') -> pd.DataFrame:
    """
    מוסיפה ל-df_result את עמודות התאריך והמיקום מתוך df_events לפי התאמת event_col.

    פרמטרים:
    - df_result: טבלה ראשית עם עמודת event_col.
    - df_events: טבלת האירועים עם עמודות event_col, date_col, location_col.
    - event_col: שם העמודה שמשמשת למיזוג (ברירת מחדל 'EVENT').
    - date_col: שם עמודת התאריך ב-df_events (ברירת מחדל 'DATE').
    - location_col: שם עמודת המיקום ב-df_events (ברירת מחדל 'LOCATION').

    מחזירה:
    - עותק של df_result עם עמודות date_col ו-location_col מחוברות.
    """

    df_result = df_result.copy()
    df_events = df_events.copy()

    # ניקוי טקסט בעמודת event_col בשתי הטבלאות
    df_result[event_col] = df_result[event_col].astype(str).str.strip()
    df_events[event_col] = df_events[event_col].astype(str).str.strip()

    # המרת תאריך
    df_events[date_col] = pd.to_datetime(df_events[date_col], errors='coerce')

    # מיזוג טבלאות
    df_merged = df_result.merge(
        df_events[[event_col, date_col, location_col]],
        on=event_col,
        how='left'
    )

    # בדיקה של אירועים שלא נמצאו
    missing_events = df_merged[df_merged[date_col].isna()][event_col].unique()

    return df_merged

In [192]:
def split_fighters(df, bout_column='BOUT'):




    # ודא שהערכים הם מחרוזות
    df[bout_column] = df[bout_column].astype(str)

    # פיצול לפי 'vs.' עם רווחים משני הצדדים
    fighters = df[bout_column].str.split(r'\s+vs\.\s+', expand=True)

    # יצירת העמודות החדשות
    df['Fighter_1'] = fighters[0]
    df['Fighter_2'] = fighters[1]

    return df

In [193]:
import re
import numpy as np

def height_to_inches(height_str):
    if pd.isna(height_str):
        return None
    match = re.match(r"(\d+)' ?(\d+)?\"?", height_str)
    if match:
        feet = int(match.group(1))
        inches = int(match.group(2)) if match.group(2) else 0
        total_inches = feet * 12 + inches
        return total_inches
    else:
        try:
            return float(height_str)
        except:
            return None

def add_diff_features_to_main(df_main, df_tott,
                              main_fighter1_col='Fighter_1',
                              main_fighter2_col='Fighter_2',
                              tots_name_col='FIGHTER',
                              tots_height_col='HEIGHT',
                              tots_dob_col='DOB'):
    df_main = df_main.copy()
    df_tott = df_tott.copy()

    # ניקוי שמות
    df_main[main_fighter1_col] = df_main[main_fighter1_col].str.strip().str.lower()
    df_main[main_fighter2_col] = df_main[main_fighter2_col].str.strip().str.lower()
    df_tott[tots_name_col] = df_tott[tots_name_col].str.strip().str.lower()

    # המרת גובה למספר
    df_tott[tots_height_col] = df_tott[tots_height_col].apply(height_to_inches)

    # יצירת מילונים
    height_dict = df_tott.set_index(tots_name_col)[tots_height_col].to_dict()
    dob_dict = df_tott.set_index(tots_name_col)[tots_dob_col].to_dict()

    # מיפוי תכונות
    df_main['Fighter_1_HEIGHT'] = df_main[main_fighter1_col].map(height_dict)
    df_main['Fighter_2_HEIGHT'] = df_main[main_fighter2_col].map(height_dict)
    df_main['Fighter_1_DOB'] = df_main[main_fighter1_col].map(dob_dict)
    df_main['Fighter_2_DOB'] = df_main[main_fighter2_col].map(dob_dict)

    # המרת תאריכים ל-datetime
    df_main['Fighter_1_DOB'] = pd.to_datetime(df_main['Fighter_1_DOB'], errors='coerce')
    df_main['Fighter_2_DOB'] = pd.to_datetime(df_main['Fighter_2_DOB'], errors='coerce')

    # חישוב הפרשים (כאן בלי abs, אבל אפשר להוסיף אם רוצים)
    df_main['diff_HEIGHT'] = df_main['Fighter_1_HEIGHT'] - df_main['Fighter_2_HEIGHT']
    df_main['diff_DOB_days'] = (df_main['Fighter_1_DOB'] - df_main['Fighter_2_DOB']).dt.days

    # אפשר למחוק עמודות ביניים אם לא צריך אותן
    #df_main.drop(columns=['Fighter_1_HEIGHT', 'Fighter_2_HEIGHT', 'Fighter_1_DOB', 'Fighter_2_DOB'], inplace=True)

    return df_main

In [194]:
import pandas as pd
import numpy as np
import re

def aggregate_fighter_stats_preserve_order(df_stats):
    df = df_stats.copy()

    # המרת ROUND למספר float
    df['Round_num'] = df['ROUND'].str.extract(r'(\d+)').astype(float)

    # עמודות 'X of Y' לפיצול
    cols_to_split = ['HEAD', 'BODY', 'LEG', 'DISTANCE', 'CLINCH', 'GROUND', 'SIG.STR.', 'TOTAL STR.', 'TD']

    for col in cols_to_split:
        if col in df.columns:
            split_vals = df[col].str.split(' of ', expand=True)
            df[col + '_landed'] = pd.to_numeric(split_vals[0], errors='coerce').fillna(0).astype(np.uint16)
            df[col + '_total'] = pd.to_numeric(split_vals[1], errors='coerce').fillna(0).astype(np.uint16)

    # המרת אחוזים למספרים בין 0 ל-1
    for pct_col in ['SIG.STR. %', 'TD %']:
        if pct_col in df.columns:
            df[pct_col] = (
                df[pct_col].str.replace('%', '', regex=False)
                .replace('---', np.nan)
                .astype(float)
                .fillna(0) / 100
            ).astype(np.float32)

    # עמודות מספריות אחרות
    numeric_cols = ['KD', 'SUB.ATT', 'REV.', 'CTRL']
    for col in numeric_cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0).astype(np.uint16)

    group_cols = ['EVENT', 'BOUT', 'FIGHTER', 'DATE', 'LOCATION']

    # עמודות לסכום
    sum_cols = [c for c in df.columns if c.endswith('_landed') or c.endswith('_total')] + numeric_cols

    # עמודות לממוצע
    avg_cols = [c for c in ['SIG.STR. %', 'TD %'] if c in df.columns]

    # סכום + ממוצע + ספירת סבבים (Round_num)
    agg_dict = {col: 'sum' for col in sum_cols}
    agg_dict.update({col: 'mean' for col in avg_cols})
    agg_dict['Round_num'] = 'count'

    # ביצוע הקיבוץ ללא מיון (sort=False שומר על סדר הופעה)
    df_agg = df.groupby(group_cols, sort=False).agg(agg_dict).reset_index()
    df_agg.rename(columns={'Round_num': 'rounds_count'}, inplace=True)

    # חישוב יחס סבב ראשון לסבב אחרון עבור עמודות סכום, בצורה וקטורית
    first_vals = df.groupby(group_cols)[sum_cols].first()
    last_vals = df.groupby(group_cols)[sum_cols].last()
    ratio_df = (last_vals / first_vals).replace([np.inf, -np.inf], np.nan).fillna(1).reset_index()
    ratio_df = ratio_df.rename(columns={c: c + '_first_last_ratio' for c in sum_cols})

    # מיזוג סיכום ויחס לפי קבוצות
    df_final = pd.merge(df_agg, ratio_df, on=group_cols, how='left')
    columns_to_drop = [
    'TD_landed_first_last_ratio',
    'TD_total_first_last_ratio',
    'KD_first_last_ratio',
    'SUB.ATT_first_last_ratio',
    'REV._first_last_ratio',
    'CTRL_first_last_ratio'
]
    df_agg = df_final.drop(columns=columns_to_drop, errors='ignore')

    return df_agg

In [195]:
def add_historical_stats_rowwise_fast(df_stat_fighter):
    import pandas as pd
    import numpy as np

    df = df_stat_fighter.copy()
    df['original_index'] = df.index

    df['DATE'] = pd.to_datetime(df['DATE'])
    df = df.sort_values(['FIGHTER', 'DATE']).reset_index(drop=True)

    numeric_cols = df.select_dtypes(include='number').columns.tolist()
    ignore_cols = ['BOUT_ORDER', 'ROUND', 'WINNER', 'original_index']
    numeric_cols = [col for col in numeric_cols if col not in ignore_cols]

    prev_df = pd.DataFrame(index=df.index)
    last3_avg_df = pd.DataFrame(index=df.index)
    all_avg_df = pd.DataFrame(index=df.index)

    for fighter, group in df.groupby('FIGHTER', sort=False):
        idx = group.index
        data = group[numeric_cols]

        prev_vals = data.shift(1).astype(float)
        prev_df.loc[idx, prev_vals.columns + '_prev'] = prev_vals.values

        last3_avg = data.rolling(window=3, min_periods=1).mean().shift(1)
        last3_avg_df.loc[idx, last3_avg.columns + '_last3_avg'] = last3_avg.values

        all_avg = data.expanding(min_periods=1).mean().shift(1)
        all_avg_df.loc[idx, all_avg.columns + '_all_avg'] = all_avg.values

    result = pd.concat([df, prev_df, last3_avg_df, all_avg_df], axis=1)
    result = result.sort_values('original_index').drop(columns=['original_index']).reset_index(drop=True)

    return result



In [196]:
def merge_fighter_features_with_sign_check(df_main, df_features, feature_cols, fighter1_col='Fighter_1', fighter2_col='Fighter_2', date_col='DATE'):
    df_features['DATE'] = pd.to_datetime(df_features['DATE'])

    # המרת הפיצ'רים לעמודות נומריות
    df_features[feature_cols] = df_features[feature_cols].apply(pd.to_numeric, errors='coerce')

    # יצירת מילון פיצ'רים
    feature_dict = {
        (row['FIGHTER'].lower(), row['DATE']): row[feature_cols].values
        for _, row in df_features.iterrows()
    }

    diffs = []
    for _, row in df_main.iterrows():
        f1 = row[fighter1_col]
        f2 = row[fighter2_col]
        date = pd.to_datetime(row[date_col])

        f1_feat = feature_dict.get((str(f1).lower(), date))
        f2_feat = feature_dict.get((str(f2).lower(), date))

        if (f1_feat is not None) and (f2_feat is not None):
            diff = f1_feat - f2_feat
        else:
            diff = np.full(len(feature_cols), np.nan)

        diffs.append(diff)

    diffs_df = pd.DataFrame(diffs, columns=[f'diff_{c}' for c in feature_cols])
    return pd.concat([df_main.reset_index(drop=True), diffs_df], axis=1)

In [197]:
def add_prev_fight_results_and_diff(df_new, df_full,
                                   fighter1_col='Fighter_1',
                                   fighter2_col='Fighter_2',
                                   date_col='EVENT_DATE',
                                   outcome_col='OUTCOME',
                                   method_col='METHOD'):

    def normalize_name(name):
        if pd.isna(name):
            return ""
        return str(name).strip().lower()

    df_full = df_full.copy()
    df_new = df_new.copy()

    df_full[fighter1_col] = df_full[fighter1_col].apply(normalize_name)
    df_full[fighter2_col] = df_full[fighter2_col].apply(normalize_name)
    df_new[fighter1_col] = df_new[fighter1_col].apply(normalize_name)
    df_new[fighter2_col] = df_new[fighter2_col].apply(normalize_name)

    df_full[date_col] = pd.to_datetime(df_full[date_col])
    df_new[date_col] = pd.to_datetime(df_new[date_col])

    results_1 = []
    kos_1 = []
    results_2 = []
    kos_2 = []

    for idx, row in df_new.iterrows():
        f1 = row[fighter1_col]
        f2 = row[fighter2_col]
        fight_date = row[date_col]

        f1_fights = df_full[
            ((df_full[fighter1_col] == f1) | (df_full[fighter2_col] == f1)) &
            (df_full[date_col] < fight_date)
        ].sort_values(by=date_col, ascending=False)

        f2_fights = df_full[
            ((df_full[fighter1_col] == f2) | (df_full[fighter2_col] == f2)) &
            (df_full[date_col] < fight_date)
        ].sort_values(by=date_col, ascending=False)

        # Debug print

        def get_prev_result_and_ko(fighter, fights):
            if fights.empty:
                return None, None
            prev_fight = fights.iloc[0]
            outcome = prev_fight[outcome_col]
            method = prev_fight[method_col]

            if outcome == 'W/L':
                result = 1 if prev_fight[fighter1_col] == fighter else -1
            elif outcome == 'L/W':
                result = 1 if prev_fight[fighter2_col] == fighter else -1
            else:
                result = None

            ko_win = False
            if result == 1 and isinstance(method, str):
                ko_win = ('KO' in method.upper()) or ('TKO' in method.upper())

            return result, ko_win

        r1, k1 = get_prev_result_and_ko(f1, f1_fights)
        r2, k2 = get_prev_result_and_ko(f2, f2_fights)

        results_1.append(r1)
        kos_1.append(k1)
        results_2.append(r2)
        kos_2.append(k2)

    df_new['Fighter_1_prev_result'] = results_1
    df_new['Fighter_1_prev_KO'] = kos_1
    df_new['Fighter_2_prev_result'] = results_2
    df_new['Fighter_2_prev_KO'] = kos_2

    def safe_subtract(a, b):
        if a is None or b is None:
            return None
        return a - b

    df_new['Prev_Result_Diff'] = [safe_subtract(a, b) for a, b in zip(results_1, results_2)]

    return df_new

In [198]:
def add_stance_diff(df_input, df_tott,
                    fighter1_col='Fighter_1',
                    fighter2_col='Fighter_2',
                    fighter_tott_col='FIGHTER',
                    stance_col='STANCE'):
    # יוצרים עותקים נרמלים כדי לא לשנות המקור
    df_temp = df_input.copy()
    df_temp['fighter_1_norm'] = df_temp[fighter1_col].apply(lambda x: x.lower().strip() if isinstance(x, str) else "")
    df_temp['fighter_2_norm'] = df_temp[fighter2_col].apply(lambda x: x.lower().strip() if isinstance(x, str) else "")
    df_tott_temp = df_tott.copy()
    df_tott_temp['fighter_norm'] = df_tott_temp[fighter_tott_col].apply(lambda x: x.lower().strip() if isinstance(x, str) else "")

    # יצירת מילון מיפוי שם -> סטנס
    stance_map = df_tott_temp.set_index('fighter_norm')[stance_col].str.strip().str.lower()

    stance_diff_list = []
    for _, row in df_temp.iterrows():
        s1 = stance_map.get(row['fighter_1_norm'], 'unknown')
        s2 = stance_map.get(row['fighter_2_norm'], 'unknown')
        stance_diff_list.append(f"{s1} - {s2}")

    df_temp['STANCE_DIFF'] = stance_diff_list

    # מוחזיר את הטבלה עם העמודה החדשה, בלי לשנות המקור
    return df_temp.drop(columns=['fighter_1_norm', 'fighter_2_norm'])

In [199]:
import pandas as pd
import numpy as np

def fill_missing_values(df):
    df = df.copy()
    for col in df.columns:
        # Check for NaN or 'Unknown'
        has_nan = df[col].isna().any()
        has_unknown = df[col].astype(str).str.lower().eq('unknown').any()

        if has_nan or has_unknown:
            # Work only on non-null and known values to infer fill value
            cleaned_series = df[col][~df[col].astype(str).str.lower().eq('unknown')].dropna()

            if cleaned_series.empty:
                continue  # Skip if we can't infer anything

            # Try to convert to numeric
            try:
                numeric_series = pd.to_numeric(cleaned_series)
                fill_value = numeric_series.mean()
                df[col] = pd.to_numeric(df[col], errors='coerce').fillna(fill_value)
            except:
                unique_vals = cleaned_series.unique()
                if len(unique_vals) == 2:
                    fill_value = cleaned_series.mode().iloc[0]
                else:
                    fill_value = cleaned_series.mode().iloc[0]
                df[col] = df[col].replace('Unknown', np.nan)
                df[col] = df[col].fillna(fill_value)

    return df


In [206]:
df_stat_fighter = pd.read_csv("/content/ufc_fight_stats.csv")
#df_main = pd.read_csv("/content/ufc_fight_details.csv")
df_result = pd.read_csv("/content/ufc_fight_results.csv")
df_tott = pd.read_csv("/content/ufc_fighter_tott.csv")
df_event = pd.read_csv("/content/ufc_event_details.csv")
df_result4 = pd.read_csv("/content/df_result4.csv")

#data check\predict vector





data = {
    "EVENT": ["UFC 318: Holloway vs. Poirier 3"],
    "BOUT": [" Ilia Topuria vs. Islam Makhachev"],
    "OUTCOME": ["W/L"],
    "WEIGHTCLASS": ["Lightweight Bout"],
    "METHOD": ["Decision - Unanimous"],
    "ROUND": [3],
    "TIME": ["5:00"],
    "TIME FORMAT": ["3 Rnd (5-5-5)"],
    "REFEREE": ["Kerry Hatley"],
    "DETAILS": ["Derek Cleary 28 - 29.Eric Colon 28 - 29.Junich..."],
    "URL": ["http://ufcstats.com/fight-details/a95b03e9b5a9"]
}
df_tott.replace("--", np.nan, inplace=True)

# מילוי STANCE בערך השכיח
most_common_stance = df_tott['STANCE'].mode().iloc[0]


# מילוי HEIGHT בערך השכיח (נניח שמחרוזות בגובה אחידות)
most_common_height = df_tott['HEIGHT'].mode().iloc[0]

# מילוי DOB בערך השכיח
most_common_dob = df_tott['DOB'].mode().iloc[0]
df_tott['STANCE'] = df_tott['STANCE'].fillna(most_common_stance)
df_tott['HEIGHT'] = df_tott['HEIGHT'].fillna(most_common_height)
df_tott['DOB'] = df_tott['DOB'].fillna(most_common_dob)





data = pd.DataFrame(data)
#preprocessing
# Step 1: הוספת מידע אירוע
data1 = add_event_info(data, df_event)

df_result3 = add_event_info(df_stat_fighter, df_event)

# Step 2: פיצול שמות הלוחמים
data2 = split_fighters(data1)

# Step 3: הוספת פיצ'רים של הבדלי גובה/גיל וכו'
data3 = add_diff_features_to_main(data2, df_tott)



# שלב 1: קבלת שמות הלוחמים והקרב
fighter_1 = data2.loc[0, 'Fighter_1']
fighter_2 = data2.loc[0, 'Fighter_2']

bout_name = f"{fighter_1} vs. {fighter_2}"

# שלב 2: לקיחת שורת בסיס
base_row = df_stat_fighter.iloc[0]

# שלב 3: יצירת שתי שורות חדשות — אחת לכל לוחם
row_1 = base_row.copy()
row_1['FIGHTER'] = fighter_1
row_1['BOUT'] = bout_name


row_2 = base_row.copy()
row_2['FIGHTER'] = fighter_2
row_2['BOUT'] = bout_name
#row_2['Fighter_1'] = fighter_1
#row_2['Fighter_2'] = fighter_2

# שלב 4: יצירת DataFrame משתי השורות
df_new_rows = pd.DataFrame([row_1, row_2])

# שלב 5: חיבור עם df_stat_fighter
df_stat_fighter_extended = pd.concat([df_stat_fighter, df_new_rows], ignore_index=True)

# בדיקה: סינון לפי Fighter == Fighter_1
filtered_fighter_1 = df_stat_fighter_extended[(df_stat_fighter_extended['FIGHTER'] == fighter_1)|(df_stat_fighter_extended['FIGHTER'] == fighter_2)]

# הדפסה לבדיקה


filtered_fighter_1 = add_event_info(filtered_fighter_1, df_event)

filtered_fighter_2 = aggregate_fighter_stats_preserve_order(filtered_fighter_1)

# הוספת סטטיסטיקות היסטוריות לכל לוחם
filtered_fighter_3 = add_historical_stats_rowwise_fast(filtered_fighter_2)
filtered_fighter_3 = fill_missing_values(filtered_fighter_3)






feature_cols = [col for col in filtered_fighter_3.columns if col not in ['FIGHTER', 'DATE']]



data4 = merge_fighter_features_with_sign_check(data3, filtered_fighter_3, feature_cols)
#data5 = data4.copy()


data6 = add_prev_fight_results_and_diff(data4, df_result4, date_col='DATE')
# Step 9: הפרש סגנונות לחימה (STANCE)
data7 = add_stance_diff(data6, df_tott)



data7 = fill_missing_values(data7)



# רשימת העמודות המספריות למילוי לפי ממוצע
numeric_cols = [
    'diff_HEIGHT', 'diff_DOB_days',
    'Fighter_2_HEIGHT', 'Fighter_2_DOB',
    'Fighter_1_HEIGHT', 'Fighter_1_DOB'
]

for col in numeric_cols:
    mean_val = data7[col].mean()
    data7[col] = data7[col].fillna(mean_val)

# רשימת עמודות טקסט למילוי לפי הערך השכיח
text_cols = ['Prev_Result_Diff', 'Fighter_2_prev_result', 'Fighter_2_prev_KO',
             'Fighter_1_prev_result', 'Fighter_1_prev_KO', 'DETAILS', 'REFEREE']

for col in text_cols:
    mode_val = data7[col].mode()
    if not mode_val.empty:
        data7[col] = data7[col].fillna(mode_val[0])
    else:
        # במקרה שאין ערך שכיח (עמודה ריקה לחלוטין) אפשר למלא ב-Unknown
        data7[col] = data7[col].fillna('Unknown')
import pandas as pd
import numpy as np

# רשימת עמודות עם 2121 ערכי NaN לטיפול
columns_to_fill = [
    col for col in data7.columns
    if data7[col].isna().sum() == 2121
]

for col in columns_to_fill:
    col_data = data7[col]

    if pd.api.types.is_numeric_dtype(col_data):
        unique_vals = col_data.dropna().unique()

        if set(unique_vals).issubset({0, 1}):  # אם בינארי
            mode_val = col_data.mode()
            fill_val = mode_val[0] if not mode_val.empty else 0
            fill_type = "mode (binary)"
        else:  # עמודה מספרית רגילה
            fill_val = col_data.mean()
            fill_type = "mean"

        data7[col] = col_data.fillna(fill_val)


    elif pd.api.types.is_string_dtype(col_data) or col_data.dtype == "object":
        mode_val = col_data.mode()
        fill_val = mode_val[0] if not mode_val.empty else "Unknown"
        data7[col] = col_data.fillna(fill_val)


df_result14 = data7.drop(columns=['EVENT', 'BOUT', 'URL', 'DATE','METHOD','TIME','DETAILS','ROUND','diff_EVENT','diff_BOUT','diff_LOCATION'], errors='ignore')

mapping_WEIGHTCLASS = {
    # Simple direct mappings
    'Lightweight Bout': 'Lightweight Bout',
    'Middleweight Bout': 'Middleweight Bout',
    'Welterweight Bout': 'Welterweight Bout',
    'Featherweight Bout': 'Featherweight Bout',
    'Bantamweight Bout': 'Bantamweight Bout',
    'Light Heavyweight Bout': 'Light Heavyweight Bout',
    'Heavyweight Bout': 'Heavyweight Bout',
    "Women's Flyweight Bout": "Women's Flyweight Bout",
    "Women's Strawweight Bout": "Women's Strawweight Bout",
    'Flyweight Bout': 'Flyweight Bout',
    'Catch Weight Bout': 'Catch Weight Bout',
    "Women's Bantamweight Bout": "Women's Bantamweight Bout",
    'Open Weight Bout': 'Open Weight Bout',

    # UFC Title Bouts mapped to base categories
    'UFC Lightweight Title Bout': 'Lightweight Bout',
    'UFC Flyweight Title Bout': 'Flyweight Bout',
    'UFC Bantamweight Title Bout': 'Bantamweight Bout',
    "UFC Women's Bantamweight Title Bout": "Women's Bantamweight Bout",
    'UFC Welterweight Title Bout': 'Welterweight Bout',
    "UFC Women's Flyweight Title Bout": "Women's Flyweight Bout",
    'UFC Featherweight Title Bout': 'Featherweight Bout',
    'UFC Light Heavyweight Title Bout': 'Light Heavyweight Bout',
    'UFC Middleweight Title Bout': 'Middleweight Bout',
    "UFC Women's Strawweight Title Bout": "Women's Strawweight Bout",
    'UFC Heavyweight Title Bout': 'Heavyweight Bout',
    'UFC Interim Heavyweight Title Bout': 'Heavyweight Bout',
    'UFC Interim Featherweight Title Bout': 'Featherweight Bout',
    'UFC Interim Flyweight Title Bout': 'Flyweight Bout',
    'UFC Interim Bantamweight Title Bout': 'Bantamweight Bout',
    "UFC Women's Featherweight Title Bout": "Women's Featherweight Bout",
    'UFC Interim Lightweight Title Bout': 'Lightweight Bout',
    'UFC Interim Middleweight Title Bout': 'Middleweight Bout',
    'UFC Interim Welterweight Title Bout': 'Welterweight Bout',
    'UFC Interim Light Heavyweight Title Bout': 'Light Heavyweight Bout',

    # Ultimate Fighter mappings (map to base category)
    'Ultimate Fighter 28 Heavyweight Tournament Title Bout': 'Heavyweight Bout',
    "Ultimate Fighter 28 Women's Featherweight Tournament Title Bout": "Women's Featherweight Bout",
    'Ultimate Fighter 27 Lightweight Tournament Title Bout': 'Lightweight Bout',
    'Ultimate Fighter 27 Featherweight Tournament Title Bout': 'Featherweight Bout',
    'Ultimate Fighter 25 Welterweight Tournament Title Bout': 'Welterweight Bout',
    'Ultimate Fighter Latin America 3 Lightweight Tournament Title Bout': 'Lightweight Bout',
    'Ultimate Fighter 23 Light Heavyweight Tournament Title Bout': 'Light Heavyweight Bout',
    "Ultimate Fighter 23 Women's Strawweight Tournament Title Bout": "Women's Strawweight Bout",
    'Ultimate Fighter 22 Lightweight Tournament Title Bout': 'Lightweight Bout',
    'Ultimate Fighter Latin America 2 Welterweight Tournament Title Bout': 'Welterweight Bout',
    'Ultimate Fighter Latin America 2 Lightweight Tournament Title Bout': 'Lightweight Bout',
    'Ultimate Fighter Brazil 4 Lightweight Tournament Title Bout': 'Lightweight Bout',
    'Ultimate Fighter Brazil 4 Bantamweight Tournament Title Bout': 'Bantamweight Bout',
    'Ultimate Fighter 21 Welterweight Tournament Title Bout': 'Welterweight Bout',
    'Ultimate Fighter Latin America Featherweight Tournament Title Bout': 'Featherweight Bout',
    'Ultimate Fighter Latin America Bantamweight Tournament Title Bout': 'Bantamweight Bout',
    'Ultimate Fighter China Featherweight Tournament Title Bout': 'Featherweight Bout',
    'Ultimate Fighter 19 Light Heavyweight Tournament Title Bout': 'Light Heavyweight Bout',
    'Ultimate Fighter 19 Middleweight Tournament Title Bout': 'Middleweight Bout',
    'Ultimate Fighter Brazil 3 Heavyweight Tournament Title Bout': 'Heavyweight Bout',
    'Ultimate Fighter Brazil 3 Middleweight Tournament Title Bout': 'Middleweight Bout',
    'Ultimate Fighter China Welterweight Tournament Title Bout': 'Welterweight Bout',
    "Ultimate Fighter 18 Women's Bantamweight Tournament Title Bout": "Women's Bantamweight Bout",
    'Ultimate Fighter 18 Bantamweight Tournament Title Bout': 'Bantamweight Bout',
    'Ultimate Fighter Brazil 2 Welterweight Tournament Title Bout': 'Welterweight Bout',
    'Ultimate Fighter 17 Middleweight Tournament Title Bout': 'Middleweight Bout',
    'Ultimate Fighter 16 Welterweight Tournament Title Bout': 'Welterweight Bout',
    'Ultimate Fighter Australia vs. UK Welterweight Tournament Title Bout': 'Welterweight Bout',
    'Ultimate Fighter Australia vs. UK Lightweight Tournament Title Bout': 'Lightweight Bout',
    'Ultimate Fighter Brazil 1 Middleweight Tournament Title Bout': 'Middleweight Bout',
    'Ultimate Fighter Brazil 1 Featherweight Tournament Title Bout': 'Featherweight Bout',
    'Ultimate Fighter 15 Lightweight Tournament Title Bout': 'Lightweight Bout',
    'Ultimate Fighter 14 Featherweight Tournament Title Bout': 'Featherweight Bout',
    'Ultimate Fighter 14 Bantamweight Tournament Title Bout': 'Bantamweight Bout',
    'Ultimate Fighter 13 Welterweight Tournament Title Bout': 'Welterweight Bout',
    'Ultimate Fighter 12 Lightweight Tournament Title Bout': 'Lightweight Bout',
    'Ultimate Fighter 11 Middleweight Tournament Title Bout': 'Middleweight Bout',
    'Ultimate Fighter 10 Heavyweight Tournament Title Bout': 'Heavyweight Bout',
    'Ultimate Fighter 9 Welterweight Tournament Title Bout': 'Welterweight Bout',
    'Ultimate Fighter 9 Lightweight Tournament Title Bout': 'Lightweight Bout',
    'Ultimate Fighter 8 Lightweight Tournament Title Bout': 'Lightweight Bout',
    'Ultimate Fighter 8 Light Heavyweight Tournament Title Bout': 'Light Heavyweight Bout',
    'Ultimate Fighter 7 Middleweight Tournament Title Bout': 'Middleweight Bout',
    'Ultimate Fighter 6 Welterweight Tournament Title Bout': 'Welterweight Bout',
    'Ultimate Fighter 5 Lightweight Tournament Title Bout': 'Lightweight Bout',
    'Ultimate Fighter 4 Welterweight Tournament Title Bout': 'Welterweight Bout',
    'Ultimate Fighter 4 Middleweight Tournament Title Bout': 'Middleweight Bout',
    'Ultimate Fighter 3 Light Heavyweight Tournament Title Bout': 'Light Heavyweight Bout',
    'Ultimate Fighter 3 Middleweight Tournament Title Bout': 'Middleweight Bout',
    'Ultimate Fighter 2 Heavyweight Tournament Title Bout': 'Heavyweight Bout',
    'Ultimate Fighter 2 Welterweight Tournament Title Bout': 'Welterweight Bout',
    'Ultimate Fighter 1 Light Heavyweight Tournament Title Bout': 'Light Heavyweight Bout',
    'Ultimate Fighter 1 Middleweight Tournament Title Bout': 'Middleweight Bout',

    # Other explicit mappings
    'Super Heavyweight Bout': 'Heavyweight Bout',
    'Ultimate Japan 2 Heavyweight Tournament Title Bout': 'Heavyweight Bout',
    'Ultimate Japan Heavyweight Tournament Title Bout': 'Heavyweight Bout',
    'Open Weight Bout': 'Open Weight Bout',
    'UFC 17 Middleweight Tournament Title Bout': 'Middleweight Bout',
    'UFC 15 Heavyweight Tournament Title Bout': 'Heavyweight Bout',
    'UFC 14 Heavyweight Tournament Title Bout': 'Heavyweight Bout',
    'UFC 14 Middleweight Tournament Title Bout': 'Middleweight Bout',
    'UFC 13 Heavyweight Tournament Title Bout': 'Heavyweight Bout',
    'UFC 13 Lightweight Tournament Title Bout': 'Lightweight Bout',
    "Ultimate Ultimate '96 Tournament Title Bout": 'other',
    'UFC 10 Tournament Title Bout': 'other',
    'UFC Superfight Championship Bout': 'other',
    'UFC 8 Tournament Title Bout': 'other',
    "Ultimate Ultimate '95 Tournament Title Bout": 'other',
    'UFC 7 Tournament Title Bout': 'other',
    'UFC 6 Tournament Title Bout': 'other',
    'UFC 5 Tournament Title Bout': 'other',
    'UFC 4 Tournament Title Bout': 'other',
    'UFC 3 Tournament Title Bout': 'other',
    'UFC 2 Tournament Title Bout': 'other',
}

data7['WEIGHTCLASS'] = data7['WEIGHTCLASS'].map(mapping_WEIGHTCLASS).fillna('other')
# עמודות תאריכים של הלוחמים
data7['Fighter_1_AGE'] = (pd.to_datetime('today') - data7['Fighter_1_DOB']).dt.days
data7['Fighter_2_AGE'] = (pd.to_datetime('today') - data7['Fighter_2_DOB']).dt.days



# אפשר להוריד את העמודות המקוריות אם לא צריך אותן
data7.drop(['Fighter_1_DOB', 'Fighter_2_DOB'], axis=1, inplace=True)


cat_cols = [
    'OUTCOME',
    'WEIGHTCLASS',
    'TIME FORMAT',
    'REFEREE',
    'LOCATION',
    'STANCE_DIFF',
    'Fighter_1',
    'Fighter_2'
]

# הפיכה לקטגוריה
for col in cat_cols:
    data7[col] = data7[col].astype('category')

numeric_cols = df_result14.select_dtypes(include=['number']).columns

for col in numeric_cols:
    mean_val = df_result14[col].mean()
    df_result14[col].fillna(mean_val)
data7.loc[0, 'red_corner'] = 0
from catboost import CatBoostClassifier

model = CatBoostClassifier()
model.load_model("/content/catboost_model.cbm")

# בוחרים רק את העמודות שהמודל רוצה
expected_features = model.feature_names_  # רשימת העמודות שהמודל למד
X2 = data7[expected_features].copy()

# מזהים את העמודות הקטגוריות בתוך X2 בלבד
cat_features = X2.select_dtypes(include=['category']).columns.tolist()

# יוצרים Pool עם הנתונים והעמודות הקטגוריות
pool = Pool(data=X2, cat_features=cat_features)

# עושים חיזוי
y_pred = model.predict(pool)

y_pred

array([0.])