# Setting Up Environment

In [41]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager

import time
import pandas as pd
import re
from fuzzywuzzy import fuzz
from bs4 import BeautifulSoup
import datetime as dt
import numpy as np

# Fixing Bet Recommender

In [48]:
best_diff = 0.05
best_fight_number = 0
best_fight_number_lgbm = 20

In [50]:
def calculate_bets_gb(row, diff):
    bet = 0
    fighter = ''
    if (row.Prediction_GB_Winner != 0):
        if row.Prediction_GB_Winner - calculate_odds(row.Fighter_1_Odds) >= diff:
            bet = 100
            fighter = row.Fighter_1
        if (1.0 - row.Prediction_GB_Winner) - calculate_odds(row.Fighter_2_Odds) >= diff:
            bet = 100
            fighter = row.Fighter_2
    if bet > 0:
        rec = f'Bet 100 on {fighter}'
    else:
        rec = 'No bet'
    return rec

def calculate_bets_lgbm(row):
    bet = 0
    if (row.Prediction_LGBM_Winner != 0):
        if row.Prediction_LGBM_Winner > 0.5:
            bet = 100
            fighter = row.Fighter_1
        else:
            bet = 100
            fighter = row.Fighter_2
    if bet > 0:
        rec = f'Bet 100 on {fighter}'
    else:
        rec = 'No bet'
    return rec
def calculate_odds(odds):
    if odds<0:
        return (abs(odds)/(abs(odds)+100))
    if odds>0:
        return (100/(odds+100))

In [42]:
# Instantiating webdriver
driver = webdriver.Safari()
driver.get('https://www.actionnetwork.com/ufc/odds')

# Getting odds table and formatting
html = driver.page_source
tables = pd.read_html(html)
odds = tables[0]
odds = odds.iloc[::2]
odds.reset_index(drop = True, inplace = True)

# Iterating through to get each fighter's odds
odds_df = pd.DataFrame(columns = ['Fighter_1', 'Fighter_2', 'Fighter_1_Odds', 'Fighter_2_Odds'])
fighter_2_regex = r'^[A-Za-z]+\s[A-Za-z]+'
fighter_1_regex = r'[A-Za-z]+\s[A-Za-z]+(?=[A-Za-z]*\.)'
flag_regex = r'[^\x00-\x7F]'
for index, row in odds.iterrows():
    # Getting fighter names
    names_string = re.sub(flag_regex, '', row.Scheduled)
    names_split = names_string.split()
    if len(names_split) == 5:
        fighter_2 = names_split[0] + ' ' + names_split[1][:-2]
        # Splitting middle part to get fighter 1 first name
        need_to_split = names_split[2]
        split = re.findall('[A-Z][^A-Z]*', need_to_split)
        fighter_1 = split[1] + ' ' + names_split[-1]
    else:
        # Case where first name is two names
        try:
            need_to_split = names_split[1]
            split = re.findall('[A-Z][^A-Z]*', need_to_split)
            fighter_2 = names_split[0] + ' ' + split[0]
            if re.findall('[A-Z][^A-Z]*', names_split[1])[1][1] == '.': 
                # Case where second name is three names
                if len(re.findall('[A-Z][^A-Z]*', names_split[2])) > 1:
                    need_to_split = names_split[2]
                    split = re.findall('[A-Z][^A-Z]*', need_to_split)
                    fighter_1 = split[1] + ' ' + names_split[3] + ' ' + names_split[-1]
        except:
            # Case where first name is three names
            if len(re.findall('[A-Z][^A-Z]*', names_split[2])) > 1:
                need_to_split = names_split[2]
                split = re.findall('[A-Z][^A-Z]*', need_to_split)
                fighter_2 = names_split[0] + ' ' + names_split[1] + ' ' + split[0]
                # Case where second name is two names
                try:
                    if len(names_split) == 7:
                        if re.findall('[A-Z][^A-Z]*', names_split[-2])[1][1] == '.':
                            need_to_split = names_split[4]
                            split = re.findall('[A-Z][^A-Z]*', need_to_split)
                            fighter_1 = split[1] + ' ' + names_split[-1]
                    else:
                        if re.findall('[A-Z][^A-Z]*', names_split[-2])[1][1] == '.':
                            need_to_split = names_split[3]
                            split = re.findall('[A-Z][^A-Z]*', need_to_split)
                            fighter_1 = split[1] + ' ' + names_split[-1]
                except:
                    # Case where second name is three names
                    if len(re.findall('[A-Z][^A-Z]*', names_split[6])) > 1:
                        need_to_split = names_split[4]
                        split = re.findall('[A-Z][^A-Z]*', need_to_split)
                        fighter_1 = split[1] + ' ' + names_split[5] + ' ' + names_split[-1]
                    # Case where second name is four names
                    else:
                        need_to_split = names_split[4]
                        split = re.findall('[A-Z][^A-Z]*', need_to_split)
                        fighter_1 = split[1] + ' ' + names_split[5] + ' ' + names_split[6] + ' ' + names_split[-1]
            # Case where first name is four names
            else:
                need_to_split = names_split[3]
                split = re.findall('[A-Z][^A-Z]*', need_to_split)
                fighter_2 = names_split[0] + ' ' + names_split[1] + ' ' + names_split[2] + ' ' + split[0]
                # Case where second name is two names
                try:
                    if re.findall('[A-Z][^A-Z]*', names_split[-2])[1][1] == '.':
                        need_to_split = names_split[-3]
                        split = re.findall('[A-Z][^A-Z]*', need_to_split)
                        fighter_1 = split[1] +  ' ' + names_split[-1]
                except:
                    # Case where second name is three names
                    if len(re.findall('[A-Z][^A-Z]*', names_split[7])) > 1:
                        need_to_split = names_split[4]
                        split = re.findall('[A-Z][^A-Z]*', need_to_split)
                        fighter_1 = split[1] + ' ' + names_split[6] + ' ' + names_split[-1]
                    # Case where second name is four names
                    else:
                        need_to_split = names_split[5]
                        split = re.findall('[A-Z][^A-Z]*', need_to_split)
                        fighter_1 = split[1] + ' ' + names_split[6] + ' ' + names_split[7] + ' ' + names_split[-1]
    # Getting fighter odds
    ml_string = row['Best Odds']
    if len(ml_string) == 8:
        ml_fighter_2 = ml_string[:4]
        ml_fighter_1 = ml_string[-4:]
    elif len(ml_string) == 9:
        if (ml_string[4] == '+') | (ml_string[4]=='-'):
            ml_fighter_2 = ml_string[:4]
            ml_fighter_1 = ml_string[-5:]
        else:
            ml_fighter_2 = ml_string[:5]
            ml_fighter_1 = ml_string[-4:]
    elif len(ml_string) == 10:
            ml_fighter_2 = ml_string[:5]
            ml_fighter_1 = ml_string[-5:]
    else:
        continue
    try:
        ml_fighter_2 = float(ml_fighter_2)
    except:
        continue
    try:
        ml_fighter_1 = float(ml_fighter_1)
    except:
        continue
    # Adding data to odds df
    new_data = [fighter_1, fighter_2, ml_fighter_1, ml_fighter_2]
    new_df = pd.DataFrame([new_data])
    new_df.columns = odds_df.columns
    odds_df = pd.concat([odds_df, new_df], ignore_index = True)

In [80]:
prediction_df = pd.read_csv('mma_data_predictions.csv', index_col = 0)

In [90]:
# Calculating GB bets
odds_df['Prediction_GB_Winner'] = 0
for index, row in odds_df.iterrows():
    prediction_df['FUZZ_1'] = prediction_df.fighter_1.apply(lambda x: fuzz.ratio(x, row.Fighter_1))
    prediction_df['FUZZ_2'] = prediction_df.fighter_1.apply(lambda x: fuzz.ratio(x, row.Fighter_2))
    try:
        correct_row = prediction_df.loc[(prediction_df.FUZZ_1 > 50) | (prediction_df.FUZZ_2 > 50)].tail(1)
        gb = correct_row['Prediction_GB_Winner'].values[0]
        if correct_row['FUZZ_1'].values[0] > 50:
            pass
        else:
            gb = 1.0 - gb
        fights_1 = correct_row['wins_1'].values[0] + correct_row['losses_1'].values[0]
        fights_2 = correct_row['wins_2'].values[0] + correct_row['losses_2'].values[0]
        if (fights_1 > best_fight_number) | (fights_2 > best_fight_number):
            odds_df.loc[index, 'Prediction_GB_Winner'] = gb
        else:
            continue
    except:
        continue
odds_df['Bet_GB'] = odds_df.apply(calculate_bets_gb, diff = best_diff, axis = 1)
# Calculating LGBM bets
odds_df['Prediction_LGBM_Winner'] = 0
for index, row in odds_df.iterrows():
    prediction_df['FUZZ_1'] = prediction_df.fighter_1.apply(lambda x: fuzz.ratio(x, row.Fighter_1))
    prediction_df['FUZZ_2'] = prediction_df.fighter_1.apply(lambda x: fuzz.ratio(x, row.Fighter_2))
    try:
        correct_row = prediction_df.loc[(prediction_df.FUZZ_1 > 50) | (prediction_df.FUZZ_2 > 50)].tail(1)
        gb = correct_row['Prediction_LGBM_Winner'].values[0]
        if correct_row['FUZZ_1'].values[0] > 50:
            pass
        else:
            gb = 1.0 - gb
        fights_1 = correct_row['wins_1'].values[0] + correct_row['losses_1'].values[0]
        fights_2 = correct_row['wins_2'].values[0] + correct_row['losses_2'].values[0]
        if (fights_1 > best_fight_number_lgbm) | (fights_2 > best_fight_number_lgbm):
            odds_df.loc[index, 'Prediction_LGBM_Winner'] = gb
        else:
            continue
    except:
        continue
odds_df['Bet_LGBM'] = odds_df.apply(calculate_bets_lgbm, axis = 1)

In [61]:
row

Unnamed: 0,fighter_1,weight_1,reach_1,age_1,slpm_1,sapm_1,td_avg_1,sub_avg_1,strk_acc_1,strk_def_1,...,Date,strike_diff_1,strike_diff_2,strike_diff,td_diff_1,td_diff_2,td_diff,Prediction_LGBM_Winner,FUZZ_1,FUZZ_2
15,GregoryRodrigues,185.0,75.0,31,6.19,5.87,2.39,0.6,55.0,51.0,...,2023-01-20,0.32,-6.21,6.53,50.0,0.0,50.0,0.627944,22,60
35,YairRodriguez,145.0,11.0,31,4.67,4.12,0.83,0.7,45.0,53.0,...,2023-02-11,0.55,-0.01,0.56,-34.0,-18.0,-16.0,0.647392,12,96
64,AlexanderVolkov,250.0,80.0,35,4.88,3.04,0.51,0.1,57.0,53.0,...,2023-03-10,1.84,1.92,-0.08,-1.0,42.0,-43.0,0.471621,83,7
148,MarinaRodriguez,115.0,65.0,36,4.8,3.05,0.19,0.2,47.0,57.0,...,2023-05-05,1.75,-0.74,2.49,-32.0,-35.0,3.0,0.564427,17,76
153,DanielRodriguez,170.0,74.0,37,7.42,5.22,0.63,0.1,50.0,56.0,...,2023-05-12,2.2,2.6,-0.4,-13.0,-13.0,0.0,0.531065,17,76
178,AndreiArlovski,240.0,77.0,44,3.79,3.2,0.4,0.2,45.0,57.0,...,2023-06-02,0.59,-0.39,0.98,-40.0,-6.0,-34.0,0.588789,57,21
225,AlexandrRomanov,262.0,75.0,33,3.62,2.0,4.96,1.1,50.0,46.0,...,2023-06-30,1.62,-0.72,2.34,32.0,-30.0,62.0,0.613964,72,21
226,AlexanderVolkanovski,145.0,71.0,35,6.35,3.4,1.52,0.2,56.0,59.0,...,2023-07-07,2.95,0.75,2.2,-35.0,-35.0,0.0,0.956551,98,6
