# What we want to do is create a dataframe for all the fights in ufc-master.csv


* r_fighter
* b_fighter
* r_prob
* b_prob
* r_ev
* b_ev
* bet
* winner
* profit

We are training and testing on the same set.  So this shouldn't be used in production.  I'm making this for a Coursera Tableau course

In [1]:
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

In [2]:
import sys
sys.path.append('../automated_model_creation') #We need to access the function file

In [3]:
import pandas as pd
import numpy as np
from functions import custom_cv_eval, get_ev_from_df, get_bet_ev, get_bet_return
import random
import csv

In [4]:
df = pd.read_csv("../data/kaggle_data/ufc-master.csv")
df_upcoming = pd.read_csv('../data/kaggle_data/upcoming-event.csv')
len(df)



4747

In [5]:
#Let's put all the labels in a dataframe
df['label'] = ''
#If the winner is not Red or Blue we can remove it.
mask = df['Winner'] == 'Red'
df['label'][mask] = 0
mask = df['Winner'] == 'Blue'
df['label'][mask] = 1

#df["Winner"] = df["Winner"].astype('category')
#df = df[(df['Winner'] != 'Blue') | (df['Winner'] == 'Red') ]


#Make sure lable is numeric
df['label'] = pd.to_numeric(df['label'], errors='coerce')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['label'][mask] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['label'][mask] = 1


In [6]:
#Let's fix the date
df['date'] = pd.to_datetime(df['date'])

In [7]:
#Create a label df:
label_df = df['label']

#Let's create an odds df too:
odds_df = df[['R_odds', 'B_odds']]

In [8]:
df_train = df.copy()
odds_train = odds_df.copy()
label_train = label_df.copy()

df_test = df.copy()
odds_test = odds_df.copy()
label_test = label_df.copy()


In [9]:
#Unranked fighters fix
weightclass_list = ['B_match_weightclass_rank', 'R_match_weightclass_rank', "R_Women's Flyweight_rank", "R_Women's Featherweight_rank", "R_Women's Strawweight_rank", "R_Women's Bantamweight_rank", 'R_Heavyweight_rank', 'R_Light Heavyweight_rank', 'R_Middleweight_rank', 'R_Welterweight_rank', 'R_Lightweight_rank', 'R_Featherweight_rank', 'R_Bantamweight_rank', 'R_Flyweight_rank', 'R_Pound-for-Pound_rank', "B_Women's Flyweight_rank", "B_Women's Featherweight_rank", "B_Women's Strawweight_rank", "B_Women's Bantamweight_rank", 'B_Heavyweight_rank', 'B_Light Heavyweight_rank', 'B_Middleweight_rank', 'B_Welterweight_rank', 'B_Lightweight_rank', 'B_Featherweight_rank', 'B_Bantamweight_rank', 'B_Flyweight_rank', 'B_Pound-for-Pound_rank']
df_train[weightclass_list] = df_train[weightclass_list].fillna(17)
df_test[weightclass_list] = df_test[weightclass_list].fillna(17)

In [10]:
def get_bets(input_model, input_features, input_ev, train_df, train_labels, train_odds, test_df, test_labels,
             test_odds, verbose=True):
    fighters_test = df[['R_ev', 'B_ev','date', 'R_fighter', 'B_fighter']]
    #Train model.  
    df_train = train_df[input_features].copy()
    df_test = test_df[input_features].copy()
    df_train = df_train.dropna()
    df_test = df_test.dropna()
        
    df_train = pd.get_dummies(df_train)
    df_test = pd.get_dummies(df_test)
    df_train, df_test = df_train.align(df_test, join='left', axis=1)    #Ensures both sets are dummified the same
    df_test = df_test.fillna(0)

    #LOOK AT get_ev and prepare the labels and odds
    
    labels_train = train_labels[train_labels.index.isin(df_train.index)]
    odds_train = train_odds[train_odds.index.isin(df_train.index)] 
    labels_test = test_labels[test_labels.index.isin(df_test.index)]
    odds_test = test_odds[test_odds.index.isin(df_test.index)] 
    fighters_test = fighters_test[fighters_test.index.isin(df_test.index)]

    input_model.fit(df_train, labels_train)
    
    probs = input_model.predict_proba(df_test)
    odds_test = np.array(odds_test)  
    fighters_test = np.array(fighters_test)
    #display(fighters_test)
    prepped_test = list(zip(fighters_test[:, -2], fighters_test[:, -1], fighters_test[:, -3], odds_test[:, -2], odds_test[:, -1], 
                            fighters_test[:, -5], fighters_test[:, -4], probs[:, 0], probs[:, 1], labels_test))
    ev_df = pd.DataFrame(prepped_test, columns=['t1_name', 't2_name', 'date', 't1_odds', 't2_odds',
                                                        'R_ev', 'B_ev','t1_prob', 't2_prob', 
                                                         'winner'])
    
    #display(ev_prepped_df)
    min_ev = input_ev
    #display_bets(ev_prepped_df, input_ev)
    bet_list = []
    for index, row in ev_df.iterrows():
        t1_bet_ev = get_bet_ev(row['t1_odds'], row['t1_prob'])
        t2_bet_ev = get_bet_ev(row['t2_odds'], row['t2_prob'])
        #print(f"ODDS:{row['t2_odds']} PROB: {row['t2_prob']} EV: {t2_bet_ev}")
        t1_bet_return = get_bet_return(row['t1_odds'])
        t2_bet_return = get_bet_return(row['t2_odds'])
        print(f"{row['t1_name']} vs. {row['t2_name']}")
        if t1_bet_ev > min_ev:
            print(f"{row['t1_name']} has an EV of {round(t1_bet_ev, 2)} on odds of {round(row['t1_odds'],2)}.  They have {round(row['t1_prob']*100,2)}", 
                  "% chance of winning.")
            bet_list.append('Red')
        elif t2_bet_ev > min_ev:
            print(f"{row['t2_name']} has an EV of {round(t2_bet_ev, 2)} on odds of {round(row['t2_odds'],2)}.  They have {round(row['t2_prob']*100,2)}", 
                  "% chance of winning.")
            bet_list.append('Blue')
        else:
            print(f"No bets.  EV1:{t1_bet_ev}. EV2: {t2_bet_ev}")
            print(f"{row['t1_name']} has an EV of {round(t1_bet_ev, 2)} on odds of {round(row['t1_odds'],2)}.  They have {round(row['t1_prob']*100,2)}", 
                  "% chance of winning.")
            bet_list.append(None)
        print()    
    ev_df['bet'] = bet_list
    return(ev_df)

In [11]:
with open('../automated_model_creation/models.csv', newline='') as f:
    reader = csv.reader(f)
    models = list(reader)
    
#print(len(models))

#set the model choice:
model_choice = 7

#print(models)



model = eval(models[1][model_choice])
features = eval(models[2][model_choice])
ev = eval(models[3][model_choice])
#print()
#print(model, features, ev)

In [12]:
output_df = get_bets(model, features, ev, df_train, label_train, odds_train, df_test, label_test,
                         odds_test, verbose = True)

Israel Adesanya vs. Marvin Vettori
Marvin Vettori has an EV of 185.0 on odds of 185.  They have 100.0 % chance of winning.

Deiveson Figueiredo vs. Brandon Moreno
Brandon Moreno has an EV of 150.0 on odds of 150.  They have 100.0 % chance of winning.

Leon Edwards vs. Nate Diaz
Leon Edwards has an EV of 8.26 on odds of -500.  They have 90.22 % chance of winning.

Demian Maia vs. Belal Muhammad
Demian Maia has an EV of 50.11 on odds of 170.  They have 55.6 % chance of winning.

Paul Craig vs. Jamahal Hill
Paul Craig has an EV of 67.62 on odds of 210.  They have 54.07 % chance of winning.

Drew Dober vs. Brad Riddell
Brad Riddell has an EV of 1.06 on odds of 120.  They have 45.94 % chance of winning.

Eryk Anders vs. Darren Stewart
Eryk Anders has an EV of 0.09 on odds of -150.  They have 60.06 % chance of winning.

Lauren Murphy vs. Joanne Calderwood
Joanne Calderwood has an EV of 19.05 on odds of -148.  They have 71.04 % chance of winning.

Movsar Evloev vs. Hakeem Dawodu
Hakeem Dawodu

Irwin Rivera vs. Ali Qaisi
Irwin Rivera has an EV of 2.79 on odds of -186.  They have 66.85 % chance of winning.

Derek Brunson vs. Edmen Shahbazyan
Derek Brunson has an EV of 9.04 on odds of 275.  They have 29.08 % chance of winning.

Joanne Calderwood vs. Jennifer Maia
Joanne Calderwood has an EV of 16.28 on odds of -162.  They have 71.9 % chance of winning.

Vicente Luque vs. Randy Brown
Vicente Luque has an EV of 6.76 on odds of -200.  They have 71.18 % chance of winning.

Lando Vannata vs. Bobby Green
Bobby Green has an EV of 130.0 on odds of 130.  They have 100.0 % chance of winning.

Frankie Saenz vs. Jonathan Martinez
Frankie Saenz has an EV of 8.21 on odds of 200.  They have 36.07 % chance of winning.

Johnny Munoz vs. Nate Maness
Nate Maness has an EV of 1.14 on odds of -137.  They have 58.46 % chance of winning.

Jamall Emmers vs. Vince Cachero
Jamall Emmers has an EV of 20.64 on odds of -360.  They have 94.42 % chance of winning.

Robert Whittaker vs. Darren Till
Darren Til


Molly McCann vs. Diana Belbita
Molly McCann has an EV of 10.16 on odds of -640.  They have 95.27 % chance of winning.

Kyle Bochniak vs. Sean Woodson
Kyle Bochniak has an EV of 51.56 on odds of -115.  They have 81.07 % chance of winning.

Randy Costa vs. Boston Salmon
Randy Costa has an EV of 40.74 on odds of 130.  They have 61.19 % chance of winning.

Court McGee vs. Sean Brady
Court McGee has an EV of 0.76 on odds of 150.  They have 40.3 % chance of winning.

Brendan Allen vs. Kevin Holland
Brendan Allen has an EV of 132.41 on odds of 135.  They have 98.9 % chance of winning.

Daniel Spitz vs. Tanner Boser
Daniel Spitz has an EV of 89.0 on odds of 125.  They have 84.0 % chance of winning.

Demian Maia vs. Ben Askren
Demian Maia has an EV of 50.06 on odds of 140.  They have 62.53 % chance of winning.

Michael Johnson vs. Stevie Ray
Stevie Ray has an EV of 12.39 on odds of 235.  They have 33.55 % chance of winning.

Frank Camacho vs. Beneil Dariush
Beneil Dariush has an EV of 54.05 on

Katlyn Chookagian has an EV of 31.06 on odds of -160.  They have 80.65 % chance of winning.

Dustin Ortiz vs. Matheus Nicolau
Matheus Nicolau has an EV of 42.92 on odds of -200.  They have 95.28 % chance of winning.

Randa Markos vs. Nina Ansaroff
Nina Ansaroff has an EV of 4.0 on odds of -140.  They have 60.67 % chance of winning.

Devin Powell vs. Alvaro Herrera
Devin Powell has an EV of 17.59 on odds of -105.  They have 60.23 % chance of winning.

Mauricio Rua vs. Anthony Smith
Anthony Smith has an EV of 25.37 on odds of 130.  They have 54.51 % chance of winning.

Glover Teixeira vs. Corey Anderson
Corey Anderson has an EV of 97.93 on odds of 100.  They have 98.97 % chance of winning.

Marcin Tybura vs. Stefan Struve
Marcin Tybura has an EV of 37.01 on odds of -190.  They have 89.77 % chance of winning.

Nasrat Haqparast vs. Marc Diakiese
Nasrat Haqparast has an EV of 26.83 on odds of 140.  They have 52.85 % chance of winning.

Nick Hein vs. Damir Hadzovic
Nick Hein has an EV of 14.

Claudia Gadelha vs. Karolina Kowalkiewicz
Claudia Gadelha has an EV of 14.33 on odds of -280.  They have 84.24 % chance of winning.

Vitor Belfort vs. Nate Marquardt
Nate Marquardt has an EV of 152.81 on odds of 158.  They have 97.99 % chance of winning.

Paulo Costa vs. Oluwale Bamgbose
Paulo Costa has an EV of 12.58 on odds of -265.  They have 81.74 % chance of winning.

Erick Silva vs. Yancy Medeiros
No bets.  EV1:-8.022460279835045. EV2: -0.24223874298595405
Erick Silva has an EV of -8.02 on odds of 120.  They have 41.81 % chance of winning.

Johnny Eduardo vs. Matthew Lopez
Matthew Lopez has an EV of 5.58 on odds of -110.  They have 55.3 % chance of winning.

Viviane Pereira vs. Jamie Moyle
Jamie Moyle has an EV of 8.37 on odds of 158.  They have 42.0 % chance of winning.

Luan Chagas vs. Jimmy Wallhead
Luan Chagas has an EV of 13.29 on odds of -260.  They have 81.82 % chance of winning.

Alexander Gustafsson vs. Glover Teixeira
Alexander Gustafsson has an EV of 17.41 on odds of -

Court McGee vs. Santiago Ponzinibbio
Court McGee has an EV of 8.54 on odds of 100.  They have 54.27 % chance of winning.

Randy Brown vs. Michael Graves
Randy Brown has an EV of 58.69 on odds of 125.  They have 70.53 % chance of winning.

John Dodson vs. Manvel Gamburyan
Manvel Gamburyan has an EV of 359.47 on odds of 400.  They have 91.89 % chance of winning.

Oluwale Bamgbose vs. Cezar Ferreira
No bets.  EV1:-2.0860048036245473. EV2: -5.2513392612509975
Oluwale Bamgbose has an EV of -2.09 on odds of -185.  They have 63.56 % chance of winning.

Elizeu Zaleski dos Santos vs. Omari Akhmedov
Elizeu Zaleski dos Santos has an EV of 2.0 on odds of 125.  They have 45.34 % chance of winning.

Ben Rothwell vs. Junior Dos Santos
Ben Rothwell has an EV of 45.34 on odds of -105.  They have 74.44 % chance of winning.

Derrick Lewis vs. Gabriel Gonzaga
Derrick Lewis has an EV of 23.1 on odds of -150.  They have 73.86 % chance of winning.

Igor Pokrajac vs. Jan Blachowicz
Igor Pokrajac has an EV of 

Michel Prazeres has an EV of 34.22 on odds of 145.  They have 54.78 % chance of winning.

Ray Borg vs. Chris Kelades
Ray Borg has an EV of 9.69 on odds of -975.  They have 99.49 % chance of winning.

Efrain Escudero vs. Rodrigo de Lima
Efrain Escudero has an EV of 50.92 on odds of 155.  They have 59.19 % chance of winning.

Chas Skelly vs. Jim Alers
Chas Skelly has an EV of 24.64 on odds of -230.  They have 86.87 % chance of winning.

Zach Makovsky vs. Tim Elliott
No bets.  EV1:-3.7114274661717914. EV2: -0.7281188787390249
Zach Makovsky has an EV of -3.71 on odds of 110.  They have 45.85 % chance of winning.

Tyron Woodley vs. Kelvin Gastelum
Kelvin Gastelum has an EV of 87.03 on odds of 170.  They have 69.27 % chance of winning.

Joe Lauzon vs. Al Iaquinta
Al Iaquinta has an EV of 52.22 on odds of -185.  They have 98.81 % chance of winning.

Thales Leites vs. Tim Boetsch
Thales Leites has an EV of 6.5 on odds of -300.  They have 79.87 % chance of winning.

Jordan Mein vs. Thiago Alves

Ryan Jimmo has an EV of 140.0 on odds of 140.  They have 100.0 % chance of winning.

Norman Parke vs. Jon Tuck
Norman Parke has an EV of 32.07 on odds of -175.  They have 84.05 % chance of winning.

Phil Harris vs. John Lineker
Phil Harris has an EV of 34.4 on odds of 375.  They have 28.29 % chance of winning.

Al Iaquinta vs. Piotr Hallmann
Al Iaquinta has an EV of 1.98 on odds of -160.  They have 62.76 % chance of winning.

Luke Barnatt vs. Andrew Craig
Luke Barnatt has an EV of 132.51 on odds of 185.  They have 81.58 % chance of winning.

Rosi Sexton vs. Jessica Andrade
Jessica Andrade has an EV of 15.04 on odds of -120.  They have 62.75 % chance of winning.

Andy Ogle vs. Cole Miller
Andy Ogle has an EV of 105.03 on odds of 185.  They have 71.94 % chance of winning.

Bradley Scott vs. Michael Kuiper
Bradley Scott has an EV of 96.83 on odds of 295.  They have 49.83 % chance of winning.

Cain Velasquez vs. Junior Dos Santos
Cain Velasquez has an EV of 14.57 on odds of -200.  They hav

Alexander Gustafsson has an EV of 19.73 on odds of -260.  They have 86.47 % chance of winning.

Brian Stann vs. Alessio Sakara
No bets.  EV1:-1.0553883675747393. EV2: -4.657063852948141
Brian Stann has an EV of -1.06 on odds of -380.  They have 78.33 % chance of winning.

Dennis Siver vs. Diego Nunes
Dennis Siver has an EV of 41.37 on odds of 170.  They have 52.36 % chance of winning.

DaMarques Johnson vs. John Maguire
DaMarques Johnson has an EV of 64.7 on odds of 170.  They have 61.0 % chance of winning.

Brad Pickett vs. Damacio Page
Damacio Page has an EV of 8.25 on odds of 255.  They have 30.49 % chance of winning.

Papy Abedi vs. James Head
James Head has an EV of 24.13 on odds of 170.  They have 45.98 % chance of winning.

Jason Young vs. Eric Wisely
Jason Young has an EV of 27.75 on odds of -170.  They have 80.44 % chance of winning.

Thiago Alves vs. Martin Kampmann
Martin Kampmann has an EV of 119.86 on odds of 135.  They have 93.56 % chance of winning.

Court McGee vs. Cost

Nate Diaz vs. Rory Markham
Nate Diaz has an EV of 31.9 on odds of -235.  They have 92.53 % chance of winning.

Ricardo Almeida vs. Matt Brown
Matt Brown has an EV of 22.53 on odds of 160.  They have 47.13 % chance of winning.

Rousimar Palhares vs. Tomasz Drwal
Rousimar Palhares has an EV of 8.73 on odds of -220.  They have 74.76 % chance of winning.

Rodney Wallace vs. Jared Hamman
Rodney Wallace has an EV of 41.25 on odds of -160.  They have 86.92 % chance of winning.

Brandon Vera vs. Jon Jones
Brandon Vera has an EV of 20.23 on odds of 215.  They have 38.17 % chance of winning.

Junior Dos Santos vs. Gabriel Gonzaga
Junior Dos Santos has an EV of 3.14 on odds of -250.  They have 73.67 % chance of winning.

Cheick Kongo vs. Paul Buentello
Paul Buentello has an EV of 261.6 on odds of 315.  They have 87.13 % chance of winning.

Alessio Sakara vs. James Irvin
Alessio Sakara has an EV of 7.82 on odds of -120.  They have 58.81 % chance of winning.

Clay Guida vs. Shannon Gugerty
Shannon 

In [13]:
output_df

Unnamed: 0,t1_name,t2_name,date,t1_odds,t2_odds,R_ev,B_ev,t1_prob,t2_prob,winner,bet
0,Israel Adesanya,Marvin Vettori,2021-06-12,-235,185,42.553191,185.000000,6.818231e-07,0.999999,0,Blue
1,Deiveson Figueiredo,Brandon Moreno,2021-06-12,-195,150,51.282051,150.000000,4.546628e-28,1.000000,1,Blue
2,Leon Edwards,Nate Diaz,2021-06-12,-500,355,20.000000,355.000000,9.021747e-01,0.097825,0,Red
3,Demian Maia,Belal Muhammad,2021-06-12,170,-220,170.000000,45.454545,5.559506e-01,0.444049,1,Red
4,Paul Craig,Jamahal Hill,2021-06-12,210,-275,210.000000,36.363636,5.407258e-01,0.459274,0,Red
...,...,...,...,...,...,...,...,...,...,...,...
3910,Alessio Sakara,James Irvin,2010-03-21,-120,100,83.333333,100.000000,5.881206e-01,0.411879,0,Red
3911,Clay Guida,Shannon Gugerty,2010-03-21,-420,335,23.809524,335.000000,6.866108e-01,0.313389,0,Blue
3912,Eliot Marshall,Vladimir Matyushenko,2010-03-21,145,-165,145.000000,60.606061,5.215786e-01,0.478421,1,Red
3913,Brendan Schaub,Chase Gormley,2010-03-21,-260,220,38.461538,220.000000,6.861802e-01,0.313820,0,Blue


In [14]:
def get_profit(bet, R_ev, B_ev, winner): 
    if bet == None:
        return 0
    if bet == 'Red' and winner == 0:
        return R_ev
    if bet == 'Blue' and winner == 1:
        return B_ev
    else:
        return -100 

In [15]:
output_df['net_profit'] = output_df.apply(lambda x: get_profit(x['bet'], x['R_ev'], x['B_ev'], x['winner']), axis=1)

In [16]:
output_df

Unnamed: 0,t1_name,t2_name,date,t1_odds,t2_odds,R_ev,B_ev,t1_prob,t2_prob,winner,bet,net_profit
0,Israel Adesanya,Marvin Vettori,2021-06-12,-235,185,42.553191,185.000000,6.818231e-07,0.999999,0,Blue,-100.000000
1,Deiveson Figueiredo,Brandon Moreno,2021-06-12,-195,150,51.282051,150.000000,4.546628e-28,1.000000,1,Blue,150.000000
2,Leon Edwards,Nate Diaz,2021-06-12,-500,355,20.000000,355.000000,9.021747e-01,0.097825,0,Red,20.000000
3,Demian Maia,Belal Muhammad,2021-06-12,170,-220,170.000000,45.454545,5.559506e-01,0.444049,1,Red,-100.000000
4,Paul Craig,Jamahal Hill,2021-06-12,210,-275,210.000000,36.363636,5.407258e-01,0.459274,0,Red,210.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
3910,Alessio Sakara,James Irvin,2010-03-21,-120,100,83.333333,100.000000,5.881206e-01,0.411879,0,Red,83.333333
3911,Clay Guida,Shannon Gugerty,2010-03-21,-420,335,23.809524,335.000000,6.866108e-01,0.313389,0,Blue,-100.000000
3912,Eliot Marshall,Vladimir Matyushenko,2010-03-21,145,-165,145.000000,60.606061,5.215786e-01,0.478421,1,Red,-100.000000
3913,Brendan Schaub,Chase Gormley,2010-03-21,-260,220,38.461538,220.000000,6.861802e-01,0.313820,0,Blue,-100.000000


In [17]:
output_df.to_csv('bet_result_df.csv')