# Setting Up Environment

In [3]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager

import time
import pandas as pd
import re
from fuzzywuzzy import fuzz
import requests
from bs4 import BeautifulSoup
import datetime as dt
import numpy as np

# Automating Results Scraping

In [127]:
# Instantiating webdriver
driver = webdriver.Safari()
driver.get('https://www.espn.com/mma/fightcenter')

result_dict = {
    'fighter_1' : [],
    'fighter_2' : [],
    'wins_fighter_1' : [],
    'losses_fighter_1' : [],
    'wins_fighter_2' : [],
    'losses_fighter_2' : [],
    'method' : []
}


# Getting fighters
fighters = list(driver.find_elements(By.CSS_SELECTOR, "[class*='truncate tc db']"))
for index, fighter in enumerate(fighters):
    if index % 2 == 0:
        result_dict['fighter_1'].append(fighter.text)
    if index % 2 == 1:
        result_dict['fighter_2'].append(fighter.text)

# Getting wins, losses, and draws for each fighters 
# Fighter 1
records_fighter_1 = list(driver.find_elements(By.CSS_SELECTOR, "[class*='flex items-center n9 nowrap justify-end clr-gray-04']"))
for record in records_fighter_1:
    wins_fighter_1 = float(record.text.split('-')[0])
    result_dict['wins_fighter_1'].append(wins_fighter_1)
    losses_fighter_1 = float(record.text.split('-')[1])
    result_dict['losses_fighter_1'].append(losses_fighter_1)
# Fighter 2
records_fighter_2 = list(driver.find_elements(By.CSS_SELECTOR, "[class*='flex items-center n9 nowrap clr-gray-04']"))
for record in records_fighter_2:
    wins_fighter_2 = float(record.text.split('-')[0])
    result_dict['wins_fighter_2'].append(wins_fighter_2)
    losses_fighter_2 = float(record.text.split('-')[1])
    result_dict['losses_fighter_2'].append(losses_fighter_2)

# Determining method of victory
fight_methods = list(driver.find_elements(By.CSS_SELECTOR, "[class*='h8']"))
for index, method in enumerate(fight_methods):
    if method.text != 'Final':
        result_dict['method'].append(method.text)

# Putting result in table
result_table = pd.DataFrame(result_dict)

driver.quit()

In [150]:
fights = pd.read_csv('mma_data.csv', index_col = 0)

for index, row in fights[955:].iterrows():
    fights.loc[index, 'result'] = -10

Unnamed: 0,fighter_1,weight_1,reach_1,age_1,slpm_1,sapm_1,td_avg_1,sub_avg_1,strk_acc_1,strk_def_1,...,sub_avg_2,strk_acc_2,strk_def_2,td_acc_2,td_def_2,wins_2,losses_2,result,SUB_OVR,KO_OVR
953,JoshuaVan,125.0,65.0,22,8.0,6.87,0.0,0.0,51.0,64.0,...,0.0,44.0,55.0,0.0,58.0,9.0,1.0,1,0,0
954,DennisBuzukja,145.0,70.0,26,3.29,4.71,1.0,0.3,43.0,41.0,...,0.2,47.0,57.0,40.0,90.0,19.0,7.0,0,0,1
955,BeneilDariush,155.0,10.0,34,3.79,2.63,1.91,0.9,49.0,58.0,...,0.0,48.0,54.0,36.0,75.0,20.0,3.0,-10,0,0
956,JalinTurner,155.0,77.0,28,5.82,4.61,0.91,1.3,48.0,41.0,...,0.3,52.0,62.0,37.0,74.0,31.0,14.0,-10,0,0
957,RobFont,135.0,71.0,36,5.71,3.7,0.96,0.4,45.0,57.0,...,1.8,55.0,51.0,42.0,57.0,21.0,3.0,-10,0,0
958,SeanBrady,170.0,10.0,31,4.17,3.86,2.8,0.8,53.0,62.0,...,0.1,41.0,58.0,30.0,62.0,18.0,8.0,-10,0,0
959,ClayGuida,155.0,70.0,42,2.63,3.01,3.16,0.6,33.0,61.0,...,0.5,40.0,52.0,50.0,58.0,12.0,5.0,-10,0,0
960,PunaheleSoriano,185.0,11.0,31,4.04,4.22,0.9,0.0,45.0,48.0,...,0.5,41.0,46.0,35.0,46.0,14.0,5.0,-10,0,0
961,MieshaTate,135.0,65.0,37,2.61,3.1,1.88,1.3,44.0,51.0,...,0.3,44.0,55.0,33.0,61.0,9.0,2.0,-10,0,0
962,ZachReese,185.0,77.0,29,6.49,8.92,0.0,2.2,61.0,31.0,...,1.1,54.0,43.0,50.0,73.0,9.0,5.0,-10,0,0


In [151]:
# Determining result and method

for index, row in fights.iterrows():
    # Just fights for which there is no result
    if row.result == -10:
        # Finding right row of results table
        temp_results = result_table.copy()
        temp_results['FUZZ'] = temp_results.fighter_1.apply(lambda x: fuzz.ratio(x, row.fighter_1))
        result_row = temp_results[temp_results.FUZZ > 70]
        if len(result_row) == 1:
            # Determining winner by record
            wins_fighter_1_before = row.wins_1
            wins_fighter_2_before = row.wins_2
            wins_fighter_1_after = result_row.wins_fighter_1.values[0]
            wins_fighter_2_after = result_row.wins_fighter_2.values[0]
            if wins_fighter_1_after > wins_fighter_1_before:
                fights.loc[index, 'result'] = 1
            elif wins_fighter_2_after > wins_fighter_2_before:
                fights.loc[index, 'result'] = 0

In [152]:
fights

Unnamed: 0,fighter_1,weight_1,reach_1,age_1,slpm_1,sapm_1,td_avg_1,sub_avg_1,strk_acc_1,strk_def_1,...,sub_avg_2,strk_acc_2,strk_def_2,td_acc_2,td_def_2,wins_2,losses_2,result,SUB_OVR,KO_OVR
0,DerrickLewis,260.0,79.0,36,2.59,2.16,0.52,0.0,50.0,44.0,...,0.3,54.0,63.0,21.0,0.0,9.0,0.0,0,0,1
1,JoseAldo,135.0,70.0,35,3.45,3.52,0.57,0.1,45.0,61.0,...,0.8,43.0,58.0,21.0,80.0,19.0,5.0,1,0,0
2,MichaelChiesa,170.0,75.0,34,1.89,1.71,3.60,0.9,40.0,54.0,...,0.9,54.0,52.0,50.0,65.0,20.0,7.0,0,1,0
3,TeciaTorres,115.0,60.0,32,4.30,3.37,0.68,0.1,47.0,62.0,...,0.1,50.0,64.0,36.0,76.0,13.0,9.0,-2,0,0
4,SongYadong,135.0,67.0,24,4.35,3.64,0.59,0.4,42.0,57.0,...,0.5,42.0,59.0,39.0,59.0,16.0,3.0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
963,DrakkarKlose,155.0,70.0,35,4.38,3.44,1.80,0.0,55.0,52.0,...,1.6,55.0,52.0,50.0,50.0,13.0,3.0,1,0,0
964,SteveGarcia,145.0,75.0,31,4.85,2.61,1.51,0.4,56.0,46.0,...,0.6,45.0,64.0,37.0,42.0,20.0,6.0,-10,0,0
965,RodolfoBellato,205.0,77.0,27,5.39,4.90,0.82,0.8,62.0,45.0,...,0.0,56.0,42.0,0.0,33.0,19.0,4.0,1,0,0
966,WellingtonTurman,170.0,72.0,27,3.04,3.87,1.49,0.8,49.0,46.0,...,0.0,47.0,48.0,0.0,68.0,22.0,9.0,0,0,0


In [145]:
fights.tail(10)

Unnamed: 0,fighter_1,weight_1,reach_1,age_1,slpm_1,sapm_1,td_avg_1,sub_avg_1,strk_acc_1,strk_def_1,...,strk_acc_2,strk_def_2,td_acc_2,td_def_2,wins_2,losses_2,result,SUB_OVR,KO_OVR,Favorite
958,SeanBrady,170.0,10.0,31,4.17,3.86,2.8,0.8,53.0,62.0,...,41.0,58.0,30.0,62.0,18.0,8.0,-10,0,0,1.0
959,ClayGuida,155.0,70.0,42,2.63,3.01,3.16,0.6,33.0,61.0,...,40.0,52.0,50.0,58.0,12.0,5.0,-10,0,0,0.0
960,PunaheleSoriano,185.0,11.0,31,4.04,4.22,0.9,0.0,45.0,48.0,...,41.0,46.0,35.0,46.0,14.0,5.0,-10,0,0,0.0
961,MieshaTate,135.0,65.0,37,2.61,3.1,1.88,1.3,44.0,51.0,...,44.0,55.0,33.0,61.0,9.0,2.0,-10,0,0,1.0
962,ZachReese,185.0,77.0,29,6.49,8.92,0.0,2.2,61.0,31.0,...,54.0,43.0,50.0,73.0,9.0,5.0,-10,0,0,0.0
963,DrakkarKlose,155.0,70.0,35,4.38,3.44,1.8,0.0,55.0,52.0,...,55.0,52.0,50.0,50.0,13.0,3.0,-10,0,0,1.0
964,SteveGarcia,145.0,75.0,31,4.85,2.61,1.51,0.4,56.0,46.0,...,45.0,64.0,37.0,42.0,20.0,6.0,-10,0,0,
965,RodolfoBellato,205.0,77.0,27,5.39,4.9,0.82,0.8,62.0,45.0,...,56.0,42.0,0.0,33.0,19.0,4.0,-10,0,0,
966,WellingtonTurman,170.0,72.0,27,3.04,3.87,1.49,0.8,49.0,46.0,...,47.0,48.0,0.0,68.0,22.0,9.0,-10,0,0,
967,VeronicaHardy,125.0,64.0,28,3.35,4.38,1.34,0.5,46.0,45.0,...,73.0,58.0,0.0,75.0,6.0,0.0,-10,0,0,


In [146]:
row

fighter_1         SteveGarcia
weight_1                145.0
reach_1                  75.0
age_1                      31
slpm_1                   4.85
sapm_1                   2.61
td_avg_1                 1.51
sub_avg_1                 0.4
strk_acc_1               56.0
strk_def_1               46.0
td_acc_1                 44.0
td_def_1                 94.0
wins_1                   14.0
losses_1                  5.0
fighter_2     MelquizaelCosta
weight_2                145.0
reach_2                  10.0
age_2                      27
slpm_2                   5.11
sapm_2                   2.53
td_avg_2                 1.87
sub_avg_2                 0.6
strk_acc_2               45.0
strk_def_2               64.0
td_acc_2                 37.0
td_def_2                 42.0
wins_2                   20.0
losses_2                  6.0
result                    -10
SUB_OVR                     0
KO_OVR                      0
Name: 964, dtype: object