In [3]:
import requests
import pandas as pd
from io import StringIO
import numpy as np

## UFC Fighter Dataset

In [4]:
#scrape from ufc stats
webpage_source = requests.get("http://ufcstats.com/statistics/fighters?char=a&page=all").text

In [5]:
data = pd.read_html(StringIO(webpage_source))
fighter_df = data[0] 

#remove empty row
fighter_df.drop(labels=0, axis=0, inplace=True) 

#rename columns
colnames = ['First', 'Last', 'Nickname', 'Height', 'Weight', 'Reach', 'Stance', 'Win', 'Loss', 'Draw', 'Belt']
fighter_df.columns = colnames

#replace values
fighter_df['Weight'] = fighter_df['Weight'].str.replace(" lbs.", "")
fighter_df['Reach'] = fighter_df['Reach'].str.replace("\"", "")
fighter_df = fighter_df.replace('--', np.nan) # convert n/a values written as "--"

#convert feet to inches
fighter_df['HeightInches'] = (fighter_df.Height.dropna().str.split("'").str[0].astype(int) * 12) + fighter_df.Height.dropna().str.split("\'").str[1].str.strip().str.strip("\"").astype(int)

#drop columns
fighter_df.drop('Height', axis=1, inplace=True)
fighter_df.drop('Belt', axis=1, inplace=True)
fighter_df.drop('Nickname', axis=1, inplace=True)

#create primary key
fighter_df['PK'] = fighter_df.First + fighter_df.Last + fighter_df.Weight

#retyping
fighter_df['Weight'] = fighter_df['Weight'].astype(float)

#clean up
fighter_df = fighter_df.iloc[:,[9,0,1,8,2,3,4,5,6,7]] 
colnames = ['PK', 'First', 'Last', 'Height', 'Weight', 'Reach', 'Stance', 'Win', 'Loss', 'Draw']
fighter_df.columns = colnames

fighter_df

Unnamed: 0,PK,First,Last,Height,Weight,Reach,Stance,Win,Loss,Draw
1,TomAaron155,Tom,Aaron,,155.0,,,5.0,3.0,0.0
2,DannyAbbadi155,Danny,Abbadi,71.0,155.0,,Orthodox,4.0,6.0,0.0
3,NarimanAbbasov155,Nariman,Abbasov,68.0,155.0,66.0,Orthodox,28.0,4.0,0.0
4,DavidAbbott265,David,Abbott,72.0,265.0,,Switch,10.0,15.0,0.0
5,HamdyAbdelwahab264,Hamdy,Abdelwahab,74.0,264.0,72.0,Southpaw,5.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
216,AbuAzaitar185,Abu,Azaitar,69.0,185.0,76.0,Orthodox,14.0,4.0,1.0
217,OttmanAzaitar155,Ottman,Azaitar,68.0,155.0,71.0,Switch,13.0,2.0,0.0
218,LuizAzeredo154,Luiz,Azeredo,69.0,154.0,,Orthodox,15.0,10.0,0.0
219,LucianoAzevedo161,Luciano,Azevedo,75.0,161.0,,Orthodox,17.0,9.0,1.0


## Master Dataset

In [6]:
ufc = pd.read_csv("ufc-master.csv")


In [16]:
ufc = ufc[['R_fighter', 'B_fighter', 'R_odds', 'B_odds', 'R_ev', 'B_ev', 'country', 'weight_class', 'gender', 'B_current_lose_streak', 'B_current_win_streak', 'B_Stance', 'B_Height_cms', 'B_Reach_cms', 'B_Weight_lbs', 'B_wins', 'B_losses', 'R_current_lose_streak', 'R_current_win_streak', 'R_Stance', 'R_Height_cms', 'R_Reach_cms', 'R_Weight_lbs', 'R_losses', 'R_wins', 'Winner']]
ufc['Winner'] = ufc['Winner'].str.replace("Blue", "0")
ufc['Winner'] = ufc['Winner'].str.replace("Red", "1")
ufc

Unnamed: 0,R_fighter,B_fighter,R_odds,B_odds,R_ev,B_ev,country,weight_class,gender,B_current_lose_streak,...,B_losses,R_current_lose_streak,R_current_win_streak,R_Stance,R_Height_cms,R_Reach_cms,R_Weight_lbs,R_losses,R_wins,Winner
0,Thiago Santos,Johnny Walker,-150.0,130,66.666667,130.000000,USA,Light Heavyweight,MALE,0,...,2,3,0,Orthodox,187.96,193.04,205,8,13,1
1,Alex Oliveira,Niko Price,170.0,-200,170.000000,50.000000,USA,Welterweight,MALE,2,...,5,2,0,Orthodox,180.34,193.04,170,8,11,0
2,Misha Cirkunov,Krzysztof Jotko,110.0,-130,110.000000,76.923077,USA,Middleweight,MALE,1,...,5,1,0,Orthodox,190.50,195.58,205,4,6,0
3,Alexander Hernandez,Mike Breeden,-675.0,475,14.814815,475.000000,USA,Lightweight,MALE,1,...,1,1,0,Orthodox,175.26,182.88,155,3,4,1
4,Joe Solecki,Jared Gordon,-135.0,115,74.074074,115.000000,USA,Lightweight,MALE,0,...,3,0,4,Orthodox,175.26,177.80,155,0,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4891,Duane Ludwig,Darren Elkins,-155.0,135,64.516129,135.000000,USA,Lightweight,MALE,0,...,0,1,0,Orthodox,177.80,177.80,170,1,2,0
4892,John Howard,Daniel Roberts,-210.0,175,47.619048,175.000000,USA,Welterweight,MALE,0,...,0,0,3,Orthodox,170.18,180.34,170,0,3,1
4893,Brendan Schaub,Chase Gormley,-260.0,220,38.461538,220.000000,USA,Heavyweight,MALE,1,...,1,1,0,Orthodox,193.04,198.12,245,1,0,1
4894,Mike Pierce,Julio Paulino,-420.0,335,23.809524,335.000000,USA,Welterweight,MALE,0,...,0,1,0,Orthodox,172.72,177.80,170,1,1,1


In [22]:
ufc2 = ufc.dropna()

y = ufc2.Winner
features = ['R_odds', 'B_odds', 'R_ev', 'B_ev', 'R_losses', 'R_wins', 'B_losses', 'B_wins']
X = ufc2[features]
X = X.dropna()

## Rudimentary Model

In [25]:
from sklearn.tree import DecisionTreeRegressor

ufc_model = DecisionTreeRegressor(random_state = 32)
ufc_model.fit(X,y)

predictions = ufc_model.predict(X)
predictions

array([1. , 0. , 0. , ..., 0.5, 1. , 0. ])