# Setting Up Environment

In [1]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager

import time
import pandas as pd
import re
from fuzzywuzzy import fuzz
from bs4 import BeautifulSoup
import datetime as dt
import numpy as np

from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample



In [41]:
prediction_df = pd.read_csv('mma_data_predictions.csv', index_col = 0)
prediction_df = predictions_df[-7:]

best_diff = 0.05
best_fight_number = 5

def calculate_bets(row, diff):
    bet = 0
    fighter = ''
    if (row.Prediction_GB_Winner != 0):
        if row.Prediction_GB_Winner - calculate_odds(row.Fighter_1_Odds) >= diff:
            bet = 100
            fighter = row.Fighter_1
        if (1.0 - row.Prediction_GB_Winner) - calculate_odds(row.Fighter_2_Odds) >= diff:
            bet = 100
            fighter = row.Fighter_2
    if bet > 0:
        rec = f'Bet 100 on {fighter}'
    else:
        rec = 'No bet'
    return rec
def calculate_odds(odds):
    if odds<0:
        return (abs(odds)/(abs(odds)+100))
    if odds>0:
        return (100/(odds+100))

In [42]:
# Instantiating webdriver
driver = webdriver.Chrome(ChromeDriverManager().install())
driver.get('https://www.actionnetwork.com/ufc/odds')

# Getting odds table and formatting
html = driver.page_source
tables = pd.read_html(html)
odds = tables[0]
odds = odds.iloc[::2]
odds.reset_index(drop = True, inplace = True)

# Iterating through to get each fighter's odds
odds_df = pd.DataFrame(columns = ['Fighter_1', 'Fighter_2', 'Fighter_1_Odds', 'Fighter_2_Odds'])
fighter_2_regex = r'^[A-Za-z]+\s[A-Za-z]+'
fighter_1_regex = r'[A-Za-z]+\s[A-Za-z]+(?=[A-Za-z]*\.)'
flag_regex = r'[^\x00-\x7F]'
for index, row in odds.iterrows():
    # Getting fighter names
    names_string = re.sub(flag_regex, '', row.Scheduled)
    names_split = names_string.split()
    if len(names_split) == 5:
        fighter_2 = names_split[0] + ' ' + names_split[1][:-2]
        # Splitting middle part to get fighter 1 first name
        need_to_split = names_split[2]
        split = re.findall('[A-Z][^A-Z]*', need_to_split)
        fighter_1 = split[1] + ' ' + names_split[-1]
    else:
        # Case where first name is three names
        if len(re.findall('[A-Z][^A-Z]*', names_split[2])) > 1:
            need_to_split = names_split[2]
            split = re.findall('[A-Z][^A-Z]*', need_to_split)
            fighter_2 = names_split[0] + ' ' + names_split[1] + ' ' + split[0]
            # Case where second name is three names
            if len(re.findall('[A-Z][^A-Z]*', names_split[6])) > 1:
                need_to_split = names_split[4]
                split = re.findall('[A-Z][^A-Z]*', need_to_split)
                fighter_1 = split[1] + ' ' + names_split[5] + ' ' + names_split[-1]
            # Case where second name is four names
            else:
                need_to_split = names_split[4]
                split = re.findall('[A-Z][^A-Z]*', need_to_split)
                fighter_1 = split[1] + ' ' + names_split[5] + ' ' + names_split[6] + ' ' + names_split[-1]
        # Case where first name is four names
        else:
            need_to_split = names_split[3]
            split = re.findall('[A-Z][^A-Z]*', need_to_split)
            fighter_2 = names_split[0] + ' ' + names_split[1] + ' ' + names_split[2] + ' ' + split[0]
            # Case where second name is three names
            if len(re.findall('[A-Z][^A-Z]*', names_split[7])) > 1:
                need_to_split = names_split[4]
                split = re.findall('[A-Z][^A-Z]*', need_to_split)
                fighter_1 = split[1] + ' ' + names_split[6] + ' ' + names_split[-1]
            # Case where second name is four names
            else:
                need_to_split = names_split[5]
                split = re.findall('[A-Z][^A-Z]*', need_to_split)
                fighter_1 = split[1] + ' ' + names_split[6] + ' ' + names_split[7] + ' ' + names_split[-1]
    # Getting fighter odds
    ml_string = row['Unnamed: 3']
    if len(ml_string) == 8:
        ml_fighter_2 = ml_string[:4]
        ml_fighter_1 = ml_string[-4:]
    elif len(ml_string) == 9:
        if (ml_string[4] == '+') | (ml_string[4]=='-'):
            ml_fighter_2 = ml_string[:4]
            ml_fighter_1 = ml_string[-5:]
        else:
            ml_fighter_2 = ml_string[:5]
            ml_fighter_1 = ml_string[-4:]
    elif len(ml_string) == 10:
            ml_fighter_2 = ml_string[:5]
            ml_fighter_1 = ml_string[-5:]
    else:
        continue
    try:
        ml_fighter_2 = float(ml_fighter_2)
    except:
        continue
    try:
        ml_fighter_1 = float(ml_fighter_1)
    except:
        continue
    # Adding data to odds df
    new_data = [fighter_1, fighter_2, ml_fighter_1, ml_fighter_2]
    new_df = pd.DataFrame([new_data])
    new_df.columns = odds_df.columns
    odds_df = pd.concat([odds_df, new_df], ignore_index = True)

# Calculating bets
odds_df['Prediction_GB_Winner'] = 0
for index, row in odds_df.iterrows():
    prediction_df['FUZZ_1'] = prediction_df.fighter_1.apply(lambda x: fuzz.ratio(x, row.Fighter_1))
    prediction_df['FUZZ_2'] = prediction_df.fighter_1.apply(lambda x: fuzz.ratio(x, row.Fighter_2))
    try:
        row = prediction_df.loc[(prediction_df.FUZZ_1 > 50) | (prediction_df.FUZZ_2 > 50)]
        gb = row['Prediction_GB_Winner'].values[0]
        if row['FUZZ_1'].values[0] > 50:
            pass
        else:
            gb = 1.0 - gb
        fights_1 = row['wins_1'].values[0] + row['losses_1'].values[0]
        fights_2 = row['wins_2'].values[0] + row['losses_2'].values[0]
        if (fights_1 > best_fight_number) | (fights_2 > best_fight_number):
            odds_df.loc[index, 'Prediction_GB_Winner'] = gb
        else:
            continue
    except:
        continue
odds_df['Bet'] = odds_df.apply(calculate_bets, diff = best_diff, axis = 1)
odds_df = odds_df.iloc[:, :6]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prediction_df['FUZZ_1'] = prediction_df.fighter_1.apply(lambda x: fuzz.ratio(x, row.Fighter_1))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prediction_df['FUZZ_2'] = prediction_df.fighter_1.apply(lambda x: fuzz.ratio(x, row.Fighter_2))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  prediction_df