# Setting Up Environment

In [1]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import Select
import time
import pandas as pd
import re
from bs4 import BeautifulSoup
import datetime as dt
from webdriver_manager.chrome import ChromeDriverManager
from fuzzywuzzy import fuzz



# Adding New Fights to Odds Data

In [2]:
data_filled = pd.read_csv('mma_data_odds.csv', index_col = 0)

data_all = pd.read_csv('mma_data.csv', index_col = 0)
data_all = data_all[data_all.result >= 0]
data_all['Fighter_1_Odds'] = 0
data_all['Fighter_2_Odds'] = 0

# Filling odds w/ data with recent fights
last_row_filled = data_filled.tail(1)
fighter_1_last = last_row_filled.fighter_1.values[0]
fighter_2_last = last_row_filled.fighter_2.values[0]

data_all_copied = data_all.copy()
data_all_copied.reset_index(inplace = True, drop = True)
cutoff_unfilled = data_all_copied[(data_all_copied.fighter_1 == fighter_1_last) & 
                                  (data_all_copied.fighter_2 == fighter_2_last)].index[0]
data_all_new = data_all_copied.iloc[cutoff_unfilled+1:]

data = pd.concat([data_filled, data_all_new])

# Filling In Odds

In [6]:
options = Options()
options.add_argument('--no-sandbox')
options.add_argument("user-data-dir=/Users/hsinger24/Library/Application Support/Google/Chrome/Default1")
options.add_argument("--start-maximized")
options.add_argument('--disable-web-security')
options.add_argument('--allow-running-insecure-content')
options.add_argument("--disable-setuid-sandbox")
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get('https://www.bestfightodds.com/archive')
time.sleep(1)

for index, row in data.iterrows():
    
    try:
    
        if row.Fighter_1_Odds == 0:
            # Formatting name of higher ranked fighter
            fighter_1 = str(row.fighter_1)
            fighter_1 = re.findall('[A-Z][^A-Z]*', fighter_1)    
            fighter_name = ''
            for name in fighter_1:
                fighter_name = fighter_name + ' ' + name

            # Formatting name of lower ranked fighter
            fighter_2 = str(row.fighter_2)
            fighter_2 = re.findall('[A-Z][^A-Z]*', fighter_2)    
            fighter_name_2 = ''
            for name in fighter_2:
                fighter_name_2 = fighter_name_2 + ' ' + name

            # Searching for fights w/ higher ranked fighter
            search_bar = driver.find_elements(By.XPATH, '//*[@id="page-content"]/form/p/input[1]')[0]
            search_bar.send_keys(fighter_name)
            driver.find_elements(By.XPATH, '//*[@id="page-content"]/form/p/input[2]')[0].click()

            # Clicking on fighter 1 
            try:
                driver.find_elements(By.XPATH, '//*[@id="page-content"]/table[1]/tbody/tr[1]/td[2]/a')[0].click()
                time.sleep(1)
            except:
                pass

            # Getting odds
            html = driver.page_source
            table = pd.read_html(html)[0]
            table = table[['Matchup', 'Closing range']]
            table['Fuzzy_1'] = table.Matchup.apply(lambda x: fuzz.ratio(x, fighter_name))
            table['Fuzzy_2'] = table.Matchup.apply(lambda x: fuzz.ratio(x, fighter_name_2))
            table = table[(table.Fuzzy_2 > 50) | (table.Fuzzy_1 > 50)].reset_index(drop = True)
            index_opp = table[table.Fuzzy_2 > 50].index[0]
            table_matchup = table.loc[index_opp-1:index_opp, :].reset_index(drop = True)

            # Filling odds
            data.loc[index, 'Fighter_1_Odds'] = table_matchup.loc[0, 'Closing range']
            data.loc[index, 'Fighter_2_Odds'] = table_matchup.loc[1, 'Closing range']

            # Navigating back and clearing text box
            driver.back()
            driver.implicitly_wait(10)
            driver.back()
            driver.implicitly_wait(10)
            driver.find_elements(By.XPATH, '//*[@id="page-content"]/form/p/input[1]')[0].clear()

        else:
            pass
    
    except:
        
        driver.quit()
        time.sleep(1)
        options = Options()
        options.add_argument('--no-sandbox')
        options.add_argument("user-data-dir=/Users/hsinger24/Library/Application Support/Google/Chrome/Default1")
        options.add_argument("--start-maximized")
        options.add_argument('--disable-web-security')
        options.add_argument('--allow-running-insecure-content')
        options.add_argument("--disable-setuid-sandbox")
        driver = webdriver.Chrome(ChromeDriverManager().install())
        driver.get('https://www.bestfightodds.com/archive')

data = data[(data.Fighter_1_Odds != 0) & (data.Fighter_2_Odds != 0)]
data.dropna(subset = ['Fighter_1_Odds', 'Fighter_2_Odds'], inplace = True)
data.reset_index(inplace = True, drop = True)

In [9]:
data.to_csv('mma_data_odds.csv')