In [1]:
import pandas as pd
import numpy as np
import string

In [2]:
fighters = {}

# Run through URL for each letter of the alphabet, corresponding to a different page
for letter in string.ascii_lowercase:
    url = f"http://ufcstats.com/statistics/fighters?char={letter}&page=all"
    # Read the online tables into a DataFrame
    table = pd.read_html(url)[0]
    fighters[letter] = table

In [3]:
# Add all to a single DataFrame
df_list = [v for k,v in fighters.items()] 
all_fighters = pd.concat(df_list)
all_fighters.reset_index(inplace=True, drop=True)

In [4]:
# Remove completely empty rows from top of the tables
all_fighters.dropna(how='all', inplace=True)
# Edit column names
new_columns = {"Ht.": "Height (cm)", "Wt.": "Weight", "W": "Wins", "L": "Losses", 
               "D": "Draws", "Reach": "Reach (cm)"}
all_fighters.rename(columns=new_columns, inplace=True)
# Reset index
all_fighters.reset_index(inplace=True, drop=True)

In [5]:
# Replace name columns with one combined column
name_col = all_fighters['First'] + ' ' + all_fighters['Last']
all_fighters.drop(columns=['First', 'Last', 'Belt'], inplace=True)
all_fighters.insert(0, 'Name', name_col)

In [6]:
# Clean table
all_fighters.replace(to_replace='--', value=np.nan, inplace=True)

In [7]:
# Clean weight col and add weigth catagory
weight_col = all_fighters['Weight'].str.slice(0, 3)
weight_col = weight_col.astype(float)
all_fighters.drop(columns=['Weight'], inplace=True)
all_fighters.insert(3, 'Weight (lbs)', weight_col)

# Booleans to group weights by
weights = all_fighters['Weight (lbs)']
straw = weights <= 115.0
fly = (weights > 115.0) & (weights <= 125.0)
bant = (weights > 125.0) & (weights <= 135.0)
feath = (weights > 135.0) & (weights <= 145.0)
light = (weights > 145.0) & (weights <= 155.0)
welt = (weights > 155.0) & (weights <= 170.0)
mid = (weights > 170.0) & (weights <= 185.0)
lh = (weights > 185.0) & (weights <= 205.0)
hw = (weights > 205.0) & (weights <= 265.0)
sup = weights > 265.0

# Add the categories
all_fighters.loc[straw, 'Weight Category'] = 'Strawweight'
all_fighters.loc[fly, 'Weight Category'] = 'Flyweight'
all_fighters.loc[bant, 'Weight Category'] = 'Bantamweight'
all_fighters.loc[feath, 'Weight Category'] = 'Featherweight'
all_fighters.loc[light, 'Weight Category'] = 'Lightweight'
all_fighters.loc[welt, 'Weight Category'] = 'Welterweight'
all_fighters.loc[mid, 'Weight Category'] = 'Middleweight'
all_fighters.loc[lh, 'Weight Category'] = 'Light Heavyweight'
all_fighters.loc[hw, 'Weight Category'] = 'Heavyweight'
all_fighters.loc[sup, 'Weight Category'] = 'Super Heavyweight'

In [8]:
# Create total fights column
all_fighters['Total Fights'] = all_fighters['Wins'] + all_fighters['Losses'] + all_fighters['Draws']

In [9]:
# Rearrange columns
all_fighters = all_fighters[['Name', 'Nickname', 'Height (cm)', 'Weight (lbs)', 'Weight Category', 
                             'Reach (cm)', 'Stance', 'Total Fights', 'Wins', 'Losses', 'Draws'
                            ]]

In [10]:
# Convert heights to cm
heights = {'6\' 0"': 183, '5\' 9"': 175, '5\' 10"': 178, '5\' 11"': 180, 
           '6\' 1"': 185, '5\' 8"': 173, '5\' 7"': 170, '6\' 2"': 188, 
           '5\' 6"': 168, '6\' 3"': 191, '5\' 5"': 165, '6\' 4"': 193, 
           '5\' 4"': 163, '5\' 3"': 160, '6\' 5"': 196, '6\' 6"': 198, 
           '5\' 2"': 157, '5\' 1"': 155, '6\' 7"': 201, '6\' 8"': 203, 
           '5\' 0"': 152, '6\' 11"': 211, '6\' 10"': 208, '7\' 5"': 226, 
           '7\' 2"': 218, '6\' 9"': 206}

all_fighters['Height (cm)'] = all_fighters['Height (cm)'].replace(to_replace=heights)

In [11]:
# convert reach to cm

reach_col = all_fighters['Reach (cm)'].str.slice(0, 4)
reach_col = (reach_col.astype(float) * 2.54).round(0)
all_fighters['Reach (cm)'] = reach_col

In [12]:
# Use index as fighter ID
fighter_id = all_fighters.index
all_fighters.insert(0, 'Fighter ID', fighter_id)

In [13]:
all_fighters.to_csv("ufc_fighter_data.csv", index=False)