# Setting Up Environment

In [3]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager

import time
import pandas as pd
import re
from fuzzywuzzy import fuzz
import requests
from bs4 import BeautifulSoup
import datetime as dt
import numpy as np

# Automating Results Scraping

In [211]:
# Instantiating webdriver
driver = webdriver.Safari()
driver.get('https://www.espn.com/mma/fightcenter')

result_dict = {
    'fighter_1' : [],
    'fighter_2' : [],
    'wins_fighter_1' : [],
    'losses_fighter_1' : [],
    'wins_fighter_2' : [],
    'losses_fighter_2' : [],
    'method' : [],
    'womens_ind' : []
}


# Getting fighters
fighters = list(driver.find_elements(By.CSS_SELECTOR, "[class*='truncate tc db']"))
for index, fighter in enumerate(fighters):
    if index % 2 == 0:
        result_dict['fighter_1'].append(fighter.text)
    if index % 2 == 1:
        result_dict['fighter_2'].append(fighter.text)

# Getting wins, losses, and draws for each fighters 
# Fighter 1
records_fighter_1 = list(driver.find_elements(By.CSS_SELECTOR, "[class*='flex items-center n9 nowrap justify-end clr-gray-04']"))
for record in records_fighter_1:
    wins_fighter_1 = float(record.text.split('-')[0])
    result_dict['wins_fighter_1'].append(wins_fighter_1)
    losses_fighter_1 = float(record.text.split('-')[1])
    result_dict['losses_fighter_1'].append(losses_fighter_1)
# Fighter 2
records_fighter_2 = list(driver.find_elements(By.CSS_SELECTOR, "[class*='flex items-center n9 nowrap clr-gray-04']"))
for record in records_fighter_2:
    wins_fighter_2 = float(record.text.split('-')[0])
    result_dict['wins_fighter_2'].append(wins_fighter_2)
    losses_fighter_2 = float(record.text.split('-')[1])
    result_dict['losses_fighter_2'].append(losses_fighter_2)

# Determining method of victory
fight_methods = list(driver.find_elements(By.CSS_SELECTOR, "[class*='h8']"))
for index, method in enumerate(fight_methods):
    if method.text != 'Final':
        result_dict['method'].append(method.text)
        
# Determining if fight is women's
# Getting main event (already open) and clicking so closed 
weight_class = driver.find_element(By.CSS_SELECTOR, "[class*='tc h9 clr-gray-03']")
weight_class = weight_class.text
if len(weight_class.split(' ')) > 1:
    if weight_class.split(' ')[0] == "Women's":
        result_dict['womens_ind'].append(1)
    else:
        result_dict['womens_ind'].append(0)
else:
    result_dict['womens_ind'].append(0)
fight_click = list(driver.find_elements(By.CSS_SELECTOR, "[class*='MMAFightCard__Gamestrip br-5 mh4 relative']"))
fight_click[0].click()
# Iterating through rest of fights 
for fight in fight_click[1:]:
    time.sleep(1)
    fight.click()
    wait = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.CSS_SELECTOR, "[class*='tc h9 clr-gray-03']")))
    weight_class = driver.find_element(By.CSS_SELECTOR, "[class*='tc h9 clr-gray-03']")
    weight_class = weight_class.text
    if len(weight_class.split(' ')) > 1:
        if weight_class.split(' ')[0] == "Women's":
            result_dict['womens_ind'].append(1)
        else:
            result_dict['womens_ind'].append(0)
    else:
        result_dict['womens_ind'].append(0)
    fight.click()

# Putting result in table
result_table = pd.DataFrame(result_dict)

driver.quit()

In [213]:
# Determining result and method from past week's fights

for index, row in fights.iterrows():
    # Just fights for which there is no result
    if row.result == -10:
        # Finding right row of results table
        temp_results = result_table.copy()
        temp_results['FUZZ'] = temp_results.fighter_1.apply(lambda x: fuzz.ratio(x, row.fighter_1))
        result_row = temp_results[temp_results.FUZZ > 70]
        if len(result_row) == 1:
            # Passing over women's fights
            if result_row.womens_ind.values[0] == 1:
                fights.loc[index, 'result'] = -5
                continue
            # Determining winner by record
            wins_fighter_1_before = row.wins_1
            wins_fighter_2_before = row.wins_2
            wins_fighter_1_after = result_row.wins_fighter_1.values[0]
            wins_fighter_2_after = result_row.wins_fighter_2.values[0]
            if wins_fighter_1_after > wins_fighter_1_before:
                fights.loc[index, 'result'] = 1
            elif wins_fighter_2_after > wins_fighter_2_before:
                fights.loc[index, 'result'] = 0
            # Filling in method
            if result_row.method.values[0] == 'KO/TKO':
                fights.loc[index, 'KO_OVR'] = 1
            if result_row.method.values[0] == 'Sub':
                fights.loc[index, 'SUB_OVR'] = 1

In [215]:
fights.to_csv('mma_data.csv')