# Setting Up Environment

In [2]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager

import time
import pandas as pd
import re
from fuzzywuzzy import fuzz
from bs4 import BeautifulSoup
import datetime as dt
import numpy as np



# Fixing Odds Scraper

In [39]:
# Instantiating webdriver
driver = webdriver.Safari()
driver.get('https://www.actionnetwork.com/ufc/odds')

# Getting odds table and formatting
html = driver.page_source
tables = pd.read_html(html)
odds = tables[0]
odds = odds.iloc[::2]
odds.reset_index(drop = True, inplace = True)

# Iterating through to get each fighter's odds
odds_df = pd.DataFrame(columns = ['Fighter_1', 'Fighter_2', 'Fighter_1_Odds', 'Fighter_2_Odds'])
fighter_2_regex = r'^[A-Za-z]+\s[A-Za-z]+'
fighter_1_regex = r'[A-Za-z]+\s[A-Za-z]+(?=[A-Za-z]*\.)'
flag_regex = r'[^\x00-\x7F]'
for index, row in odds.iterrows():
    # Getting fighter names
    names_string = re.sub(flag_regex, '', row.Scheduled)
    names_split = names_string.split()
    if len(names_split) == 5:
        fighter_2 = names_split[0] + ' ' + names_split[1][:-2]
        # Splitting middle part to get fighter 1 first name
        need_to_split = names_split[2]
        split = re.findall('[A-Z][^A-Z]*', need_to_split)
        fighter_1 = split[1] + ' ' + names_split[-1]
    else:
        # Case where first name is two names
        try:
            need_to_split = names_split[1]
            split = re.findall('[A-Z][^A-Z]*', need_to_split)
            fighter_2 = names_split[0] + ' ' + split[0]
            if re.findall('[A-Z][^A-Z]*', names_split[1])[1][1] == '.': 
                # Case where second name is three names
                if len(re.findall('[A-Z][^A-Z]*', names_split[2])) > 1:
                    need_to_split = names_split[2]
                    split = re.findall('[A-Z][^A-Z]*', need_to_split)
                    fighter_1 = split[1] + ' ' + names_split[3] + ' ' + names_split[-1]
        except:
            # Case where first name is three names
            if len(re.findall('[A-Z][^A-Z]*', names_split[2])) > 1:
                need_to_split = names_split[2]
                split = re.findall('[A-Z][^A-Z]*', need_to_split)
                fighter_2 = names_split[0] + ' ' + names_split[1] + ' ' + split[0]
                # Case where second name is two names
                try:
                    if len(names_split) == 7:
                        if re.findall('[A-Z][^A-Z]*', names_split[-2])[1][1] == '.':
                            need_to_split = names_split[4]
                            split = re.findall('[A-Z][^A-Z]*', need_to_split)
                            fighter_1 = split[1] + ' ' + names_split[-1]
                    else:
                        if re.findall('[A-Z][^A-Z]*', names_split[-2])[1][1] == '.':
                            need_to_split = names_split[3]
                            split = re.findall('[A-Z][^A-Z]*', need_to_split)
                            fighter_1 = split[1] + ' ' + names_split[-1]
                except:
                    # Case where second name is three names
                    if len(re.findall('[A-Z][^A-Z]*', names_split[6])) > 1:
                        need_to_split = names_split[4]
                        split = re.findall('[A-Z][^A-Z]*', need_to_split)
                        fighter_1 = split[1] + ' ' + names_split[5] + ' ' + names_split[-1]
                    # Case where second name is four names
                    else:
                        need_to_split = names_split[4]
                        split = re.findall('[A-Z][^A-Z]*', need_to_split)
                        fighter_1 = split[1] + ' ' + names_split[5] + ' ' + names_split[6] + ' ' + names_split[-1]
            # Case where first name is four names
            else:
                need_to_split = names_split[3]
                split = re.findall('[A-Z][^A-Z]*', need_to_split)
                fighter_2 = names_split[0] + ' ' + names_split[1] + ' ' + names_split[2] + ' ' + split[0]
                # Case where second name is two names
                try:
                    if re.findall('[A-Z][^A-Z]*', names_split[-2])[1][1] == '.':
                        need_to_split = names_split[-3]
                        split = re.findall('[A-Z][^A-Z]*', need_to_split)
                        fighter_1 = split[1] +  ' ' + names_split[-1]
                except:
                    # Case where second name is three names
                    if len(re.findall('[A-Z][^A-Z]*', names_split[7])) > 1:
                        need_to_split = names_split[4]
                        split = re.findall('[A-Z][^A-Z]*', need_to_split)
                        fighter_1 = split[1] + ' ' + names_split[6] + ' ' + names_split[-1]
                    # Case where second name is four names
                    else:
                        need_to_split = names_split[5]
                        split = re.findall('[A-Z][^A-Z]*', need_to_split)
                        fighter_1 = split[1] + ' ' + names_split[6] + ' ' + names_split[7] + ' ' + names_split[-1]
    # Getting fighter odds
    ml_string = row['Best Odds']
    if len(ml_string) == 8:
        ml_fighter_2 = ml_string[:4]
        ml_fighter_1 = ml_string[-4:]
    elif len(ml_string) == 9:
        if (ml_string[4] == '+') | (ml_string[4]=='-'):
            ml_fighter_2 = ml_string[:4]
            ml_fighter_1 = ml_string[-5:]
        else:
            ml_fighter_2 = ml_string[:5]
            ml_fighter_1 = ml_string[-4:]
    elif len(ml_string) == 10:
            ml_fighter_2 = ml_string[:5]
            ml_fighter_1 = ml_string[-5:]
    else:
        continue
    try:
        ml_fighter_2 = float(ml_fighter_2)
    except:
        continue
    try:
        ml_fighter_1 = float(ml_fighter_1)
    except:
        continue
    # Adding data to odds df
    new_data = [fighter_1, fighter_2, ml_fighter_1, ml_fighter_2]
    new_df = pd.DataFrame([new_data])
    new_df.columns = odds_df.columns
    odds_df = pd.concat([odds_df, new_df], ignore_index = True)

In [40]:
odds_df

Unnamed: 0,Fighter_1,Fighter_2,Fighter_1_Odds,Fighter_2_Odds
0,Kamuela Kirk,Esteban Ribovics,130.0,-143.0
1,Shannon Ross,Jesus Aguilar,125.0,-140.0
2,Cameron Saaiman,Terrence Mitchell,-550.0,425.0
3,Vitor Petrino,Marcin Prachnio,-230.0,200.0
4,Tatsuro Taira,Edgar Chairez,-975.0,675.0
5,Jim Crute,Alonzo Menifield,-130.0,120.0
6,Yazmin Jauregui,Denise Gomes,-369.0,300.0
7,Josiah Harrell,Jack Della Maddalena,675.0,-900.0
8,Robbie Lawler,Niko Price,200.0,-225.0
9,Val Woodburn,BO Nickal,1300.0,-2100.0
