# Jonathan Halverson
# Monday, March 6, 2017
# Clean and conform the three data sources

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.style.use('halverson')
%matplotlib inline

In [2]:
fights = pd.read_csv('data/fightmetric_cards/fightmetric_fights_CLEAN_3-6-2017.csv', header=0, parse_dates=['Date'])
fighters_fm = pd.read_csv('data/fightmetric_fighters/fightmetric_fighters_CLEAN_3-6-2017.csv', header=0, parse_dates=['Dob'])
fighters_ufc = pd.read_csv('data/ufc_dot_com_fighter_data_CLEAN_28Feb2017.csv', header=0)
fighters_wiki = pd.read_csv('data/wikipedia_bdays_height_reach.csv', header=0, parse_dates=['Dob'])

In [3]:
fights.head()

Unnamed: 0,Winner,Outcome,Loser,WeightClass,Method,MethodNotes,Round,Time,Event,Date,Location
0,Germaine de Randamie,def.,Holly Holm,Women's Featherweight,U-DEC,,5,5:00,UFC 208: Holm vs. De Randamie,2017-02-11,"New York, New York, USA"
1,Anderson Silva,def.,Derek Brunson,Middleweight,U-DEC,,3,5:00,UFC 208: Holm vs. De Randamie,2017-02-11,"New York, New York, USA"
2,Jacare Souza,def.,Tim Boetsch,Middleweight,SUB,Kimura,1,3:41,UFC 208: Holm vs. De Randamie,2017-02-11,"New York, New York, USA"
3,Glover Teixeira,def.,Jared Cannonier,Light Heavyweight,U-DEC,,3,5:00,UFC 208: Holm vs. De Randamie,2017-02-11,"New York, New York, USA"
4,Dustin Poirier,def.,Jim Miller,Lightweight,M-DEC,,3,5:00,UFC 208: Holm vs. De Randamie,2017-02-11,"New York, New York, USA"


In [4]:
fighters_fm['Age'] = (pd.to_datetime('today') - fighters_fm.Dob) / np.timedelta64(1, 'Y')
fighters_fm.head()

Unnamed: 0,Name,Nickname,Dob,Age,Weight,Height,Reach,Stance,Win,Loss,Draw
0,Tom Aaron,,1978-07-13,38.653771,155.0,,,,5.0,3.0,0.0
1,Danny Abbadi,The Assassin,1983-07-03,33.681732,155.0,71.0,,Orthodox,4.0,6.0,0.0
2,David Abbott,Tank,NaT,,265.0,72.0,,Switch,10.0,14.0,0.0
3,Shamil Abdurakhimov,Abrek,1981-09-02,35.513392,235.0,75.0,76.0,Orthodox,16.0,4.0,0.0
4,Hiroyuki Abe,Abe Ani,NaT,,145.0,66.0,,Orthodox,8.0,14.0,3.0


In [5]:
fighters_ufc.head()

Unnamed: 0,Name,Nickname,Active,Record,Age,Height,Weight,Reach,LegReach,OutOf,From,College,Degree,Summary
0,Danny Abbadi,The Assassin,0,,33.0,71.0,185.0,,,,Amman Jordan,,,Striking and kicks
1,Tank Abbott,Tank,0,,51.0,72.0,250.0,,,,"Orange, California USA",,,"Punching power, strength"
2,Shamil Abdurakhimov,Abrek,1,17-4-0,35.0,75.0,235.0,76.0,41.0,Dagestan Russia,Dagestan Russia,,,Wrestling and boxing
3,Papy Abedi,Makambo,0,,38.0,71.0,185.0,74.0,,Stockholm Sweden,Kinshasa Democratic Republic of Congo,,,"Hits hard, strong, always in good shape, great..."
4,Ricardo Abreu,Demente,1,5-1-0,32.0,71.0,185.0,73.0,,"Huntington Beach, California USA","Dourados, Mato Grosso do Sul Brazil",,,Jiu-Jitsu


In [6]:
fighters_wiki['Age'] = (pd.to_datetime('today') - fighters_wiki.Dob) / np.timedelta64(1, 'Y')
fighters_wiki.head()

Unnamed: 0,Name,Dob,Height,Reach,Age
0,Frank Mir,1979-05-24,75.0,79.0,37.79133
1,Andrei Arlovski,1979-02-04,75.0,77.0,38.089762
2,Stefan Struve,1988-02-19,84.0,84.0,29.049193
3,Roy Nelson,1976-06-20,72.0,72.0,40.715415
4,Cain Velasquez,1982-07-28,73.0,77.0,34.61262


## Match names between FightMetric and UFC.com 

### The general idea is to improve the FightMetric data using the UFC.com and Wikipedia data. We begin by forming a list of everyone on the UFC cards as determined by the FightMetric fight card data of winners and losers:

In [7]:
win_lose = fights.Winner.append(fights.Loser, ignore_index=True)
win_lose = set(win_lose)

### Which fighters (names) fought three or more times in the UFC but are not in the UFC.com data set?

In [8]:
s = fights.Winner.append(fights.Loser, ignore_index=True).value_counts()
three_fights_fm = s[s >= 3].index

In [9]:
# should match names after convert to lowercase but will not do that here
set(three_fights_fm) - set(fighters_ufc.Name)

{'Benji Radach',
 'David Abbott',
 'Edwin DeWees',
 'Jacare Souza',
 'Joshua Sampo',
 'Luiz Cane',
 'Manvel Gamburyan',
 'Marcio Alexandre Junior',
 'Marcos Rogerio de Lima',
 'Miguel Torres',
 'Mike de la Torre',
 'Mike van Arsdale',
 'Mostapha Al-Turk',
 'Philip De Fries',
 'Polo Reyes',
 'Rafael Feijao',
 'Rameau Thierry Sokoudjou',
 'Richard Walsh',
 'Robert Peralta',
 'Rogerio Nogueira',
 'Scott Smith',
 'Tim Johnson',
 'Tito Ortiz',
 'Tony Fryklund',
 'Tsuyoshi Kohsaka',
 'William Patolino'}

In [10]:
# note that several UFC fighters are not in the UFC database
# (e.g., Benji Radach, Scott Smith, Tito Ortiz)
idx = fighters_ufc[(fighters_ufc.Name == 'Tank Abbott') & (fighters_ufc.Nickname == 'Tank')].index
fighters_ufc = fighters_ufc.set_value(idx, 'Name', 'David Abbott')
idx = fighters_ufc[(fighters_ufc.Name == 'Edwin Dewees') & (fighters_ufc.Nickname == 'Babyface')].index
fighters_ufc = fighters_ufc.set_value(idx, 'Name', 'Edwin DeWees')
idx = fighters_ufc[(fighters_ufc.Name == 'Ronaldo Souza') & (fighters_ufc.Nickname == 'Jacare')].index
fighters_ufc = fighters_ufc.set_value(idx, 'Name', 'Jacare Souza')
idx = fighters_ufc[(fighters_ufc.Name == 'Josh Sampo') & (fighters_ufc.Nickname == 'The Gremlin')].index
fighters_ufc = fighters_ufc.set_value(idx, 'Name', 'Joshua Sampo')
idx = fighters_ufc[(fighters_ufc.Name == 'Manny Gamburyan') & (fighters_ufc.Nickname == 'The Anvil')].index
fighters_ufc = fighters_ufc.set_value(idx, 'Name', 'Manvel Gamburyan')
idx = fighters_ufc[(fighters_ufc.Name == 'Marcio Alexandre') & (fighters_ufc.Nickname == 'Lyoto')].index
fighters_ufc = fighters_ufc.set_value(idx, 'Name', 'Marcio Alexandre Junior')
idx = fighters_ufc[(fighters_ufc.Name == 'Marcos Rogerio De Lima')].index
fighters_ufc = fighters_ufc.set_value(idx, 'Name', 'Marcos Rogerio de Lima')
idx = fighters_ufc[(fighters_ufc.Name == 'Miguel Angel Torres')].index
fighters_ufc = fighters_ufc.set_value(idx, 'Name', 'Miguel Torres')
idx = fighters_ufc[(fighters_ufc.Name == 'Mike De La Torre')].index
fighters_ufc = fighters_ufc.set_value(idx, 'Name', 'Mike de la Torre')
idx = fighters_ufc[(fighters_ufc.Name == 'Mike Van Arsdale')].index
fighters_ufc = fighters_ufc.set_value(idx, 'Name', 'Mike van Arsdale')
idx = fighters_ufc[(fighters_ufc.Name == 'Mostapha Al Turk')].index
fighters_ufc = fighters_ufc.set_value(idx, 'Name', 'Mostapha Al-Turk')
idx = fighters_ufc[(fighters_ufc.Name == 'Phil De Fries')].index
fighters_ufc = fighters_ufc.set_value(idx, 'Name', 'Philip De Fries')
idx = fighters_ufc[(fighters_ufc.Name == 'Marco Polo Reyes') & (fighters_ufc.Nickname == 'El Toro')].index
fighters_ufc = fighters_ufc.set_value(idx, 'Name', 'Polo Reyes')
idx = fighters_ufc[(fighters_ufc.Name == 'Rafael Cavalcante') & (fighters_ufc.Nickname == 'Feijao')].index
fighters_ufc = fighters_ufc.set_value(idx, 'Name', 'Rafael Feijao')
idx = fighters_ufc[(fighters_ufc.Name == 'Rameau Sokoudjou') & (fighters_ufc.Nickname == 'The African Assassin')].index
fighters_ufc = fighters_ufc.set_value(idx, 'Name', 'Rameau Thierry Sokoudjou')
idx = fighters_ufc[(fighters_ufc.Name == 'Rich Walsh') & (fighters_ufc.Nickname == 'Filthy')].index
fighters_ufc = fighters_ufc.set_value(idx, 'Name', 'Richard Walsh')
idx = fighters_ufc[(fighters_ufc.Name == 'Robbie Peralta') & (fighters_ufc.Nickname == 'Problems')].index
fighters_ufc = fighters_ufc.set_value(idx, 'Name', 'Robert Peralta')
idx = fighters_ufc[(fighters_ufc.Name == 'Antonio Rogerio Nogueira')].index
fighters_ufc = fighters_ufc.set_value(idx, 'Name', 'Rogerio Nogueira')
idx = fighters_ufc[(fighters_ufc.Name == 'Timothy Johnson')].index
fighters_ufc = fighters_ufc.set_value(idx, 'Name', 'Tim Johnson')
idx = fighters_ufc[(fighters_ufc.Name == 'Tony Frycklund') & (fighters_ufc.Nickname == 'The Freak')].index
fighters_ufc = fighters_ufc.set_value(idx, 'Name', 'Tony Fryklund')
idx = fighters_ufc[(fighters_ufc.Name == 'Tsuyoshi Kosaka') & (fighters_ufc.Nickname == 'TK')].index
fighters_ufc = fighters_ufc.set_value(idx, 'Name', 'Tsuyoshi Kohsaka')
idx = fighters_ufc[(fighters_ufc.Name == 'William Macario') & (fighters_ufc.Nickname == 'Patolino')].index
fighters_ufc = fighters_ufc.set_value(idx, 'Name', 'William Patolino')

### Let's check if the name changes were correctly applied:

In [11]:
set(three_fights_fm) - set(fighters_ufc.Name)

{'Benji Radach', 'Luiz Cane', 'Scott Smith', 'Tito Ortiz'}

### Now use fuzzy matching with remaining cases (should have done this above)

In [12]:
from fuzzywuzzy import process

# create list of FightMetric fighters with 1 or 2 UFC fights
s = fights.Winner.append(fights.Loser, ignore_index=True).value_counts()
two_fights_fm = s[(s == 2) | (s == 1)].index

# fighters in the FightMetric database with 1 or 2 UFC fights not found in the UFC database
not_found = set(two_fights_fm) - set(fighters_ufc.Name)

# these names have no match
wrong_match = ['Nate Loughran', 'Julian Sanchez', 'Kit Cope', 'Edilberto de Oliveira' ,
               'Kevin Ferguson', 'Eddie Mendez', 'Danillo Villefort', 'Masutatsu Yano',
               'Joao Pierini', 'Saeed Hosseini']

for fighter in not_found:
     if (fighter not in wrong_match):
          best_match, score = process.extractOne(query=fighter, choices=fighters_ufc.Name)
          print fighter, '<--', best_match
          idx = fighters_ufc[fighters_ufc.Name == best_match].index
          fighters_ufc = fighters_ufc.set_value(idx, 'Name', fighter)

Dmitri Stepanov <-- Dmitrei Stepanov
Joe Moreira <-- Joe Moriera
Marcelo Mello <-- Marcello Mello
Josh Stansbury <-- Joshua Stansbury
Flavio Luiz Moura <-- Flavio Moura
Sean Daugherty <-- Sean Daughtery
Josh Rafferty <-- Josh Raferty
Jack Nilson <-- Jack Nilsson
Antonio Dos Santos <-- Antonio dos Santos
Saparbeg Safarov <-- Saparbek Safarov
An Ying Wang <-- Wang Anying
Richard Crunkilton <-- Richard Crunkilton Jr.
Carlos Barreto <-- Carlos Baretto
Eldo Xavier Dias <-- Eldo Dias Xavier
Ron van Clief <-- Ron Van Clief
Josh Stewart <-- Josh Stuart
Kristof Midoux <-- Christophe Midoux
Ryan McGillivray <-- Ryan McGilivray
JC Cottrell <-- J.C. Cottrell
Sai Wang <-- Wang Sai
Jon Olav Einemo <-- John-Olav Einemo
Sanae Kikuta <-- Sinae Kikuta
Shane del Rosario <-- Shane Del Rosario
Ebenezer Fontes Braga <-- Ebenezer Braga
Mark Robinson <-- Mark David Robinson
Jimmy Wallhead <-- Jim Wallhead
Emmanuel Yarborough <-- Emmanuel Yarbrough
Chris Liguori <-- Chris Ligouri
Reza Nasri <-- Reza Nazri
Cesa

### Are there any active UFC fighters not in the FightMetric fight cards?

In [13]:
set(fighters_ufc[fighters_ufc.Active == 1].Name) - win_lose

{'Abdul-Kerim Edilov',
 'Adam Khaliev',
 'Aiemann Zahabi',
 'Alex Enlund',
 'Andre Soukhamthath',
 'Cindy Dandois',
 'Cynthia Calvillo',
 'Daniel Spitz',
 'Desmond Green',
 'Dmitriy Sosnovskiy',
 'Gavin Tucker',
 'Gina Mazany',
 'Jarred Brooks',
 'John Phillips',
 'Justin Willis',
 'Martin Buschkamp',
 'Michel Quinones',
 'Poliana Botelho'}

These must be recent hires since they don't have any UFC fights.

## Now match names between FightMetric and Wikipedia

In [14]:
len(set(fighters_wiki.Name))

1382

In [15]:
len(win_lose)

1641

In [16]:
len(win_lose - set(fighters_wiki.Name))

601

In [17]:
matches = ['Emil Meek', 'Joe Duffy', 'Rogerio Nogueira']
not_found = win_lose - set(fighters_wiki.Name)
for fighter in not_found:
     if (fighter in matches):
          best_match, score = process.extractOne(query=fighter, choices=fighters_wiki.Name)
          #if (score > 80): print fighter, '<--', best_match
          print fighter, '<--', best_match
          idx = fighters_wiki[fighters_wiki.Name == best_match].index
          fighters_wiki = fighters_wiki.set_value(idx, 'Name', fighter)
idx = fighters_wiki[fighters_wiki.Name == 'Dan Kelly'].index
fighters_wiki = fighters_wiki.set_value(idx, 'Name', 'Daniel Kelly')

Emil Meek <-- Emil Weber Meek
Joe Duffy <-- Joseph Duffy
Rogerio Nogueira <-- Antonio Rogerio Nogueira


In [18]:
len(win_lose - set(fighters_wiki.Name))

597

Since the wikipedia data was generated by scanning the names of inactive fighters from the FightMetric list and active fighters from Wikipedia, the only chance of new matches comes name differences in the active fighters. So it makes sense to only find a few names.

# Part 3: Improve on the FM data using UFC.com and Wikipedia

In [19]:
fighters_fm.shape[0]

2832

In [20]:
s = ('_fm', '_ufc')
tmp = pd.merge(fighters_fm, fighters_ufc, on='Name', how='left', suffixes=s)
tmp.columns = [column if column != 'Dob' else 'Dob_fm' for column in tmp.columns]
tmp.head()

Unnamed: 0,Name,Nickname_fm,Dob_fm,Age_fm,Weight_fm,Height_fm,Reach_fm,Stance,Win,Loss,...,Age_ufc,Height_ufc,Weight_ufc,Reach_ufc,LegReach,OutOf,From,College,Degree,Summary
0,Tom Aaron,,1978-07-13,38.653771,155.0,,,,5.0,3.0,...,,,,,,,,,,
1,Danny Abbadi,The Assassin,1983-07-03,33.681732,155.0,71.0,,Orthodox,4.0,6.0,...,33.0,71.0,185.0,,,,Amman Jordan,,,Striking and kicks
2,David Abbott,Tank,NaT,,265.0,72.0,,Switch,10.0,14.0,...,51.0,72.0,250.0,,,,"Orange, California USA",,,"Punching power, strength"
3,Shamil Abdurakhimov,Abrek,1981-09-02,35.513392,235.0,75.0,76.0,Orthodox,16.0,4.0,...,35.0,75.0,235.0,76.0,41.0,Dagestan Russia,Dagestan Russia,,,Wrestling and boxing
4,Hiroyuki Abe,Abe Ani,NaT,,145.0,66.0,,Orthodox,8.0,14.0,...,,,,,,,,,,


In [21]:
tmp.shape[0]

2832

In [22]:
tmp = pd.merge(tmp, fighters_wiki, on='Name', how='left')
tmp.columns = tmp.columns.tolist()[:-4] + ['Dob_wiki', 'Height_wiki', 'Reach_wiki', 'Age_wiki']
tmp['ReachDiff'] = np.abs(tmp.Reach_fm - tmp.Reach_ufc)
tmp['HeightDiff'] = np.abs(tmp.Height_fm - tmp.Height_ufc)
tmp['AgeDiff'] = np.abs(tmp.Age_fm - tmp.Age_ufc)

In [23]:
tmp.ReachDiff.value_counts().sort_index()

0.0    824
1.0     19
2.0     18
3.0      4
5.0      1
Name: ReachDiff, dtype: int64

In [24]:
tmp.HeightDiff.value_counts().sort_index()

0.0     1351
1.0      131
2.0       36
3.0        8
4.0        3
5.0        1
6.0        1
7.0        2
9.0        1
11.0       2
12.0       1
Name: HeightDiff, dtype: int64

In [25]:
tmp.shape[0]

2832

In [26]:
tmp[['Name', 'Reach_fm', 'Reach_ufc', 'Reach_wiki', 'ReachDiff']].sort_values('ReachDiff', ascending=False).head(20)

Unnamed: 0,Name,Reach_fm,Reach_ufc,Reach_wiki,ReachDiff
272,Jarred Brooks,62.0,67.0,,5.0
310,Nah-Shon Burrell,73.0,76.0,73.0,3.0
1151,Rampage Jackson,73.0,76.0,73.0,3.0
2751,Justin Willis,78.0,81.0,,3.0
2788,Jianping Yang,65.0,68.0,,3.0
172,Alan Belcher,73.0,75.0,75.0,2.0
2148,Jesse Ronson,70.0,72.0,,2.0
1086,Darrell Horcher,70.0,72.0,72.0,2.0
751,Spencer Fisher,70.0,68.0,68.0,2.0
753,Jon Fitch,76.0,74.0,74.0,2.0


Jarred Brooks is a recent hire. His height is 63 inches.

In [27]:
tmp[['Name', 'Active', 'Height_fm', 'Height_ufc', 'Height_wiki', 'HeightDiff']].sort_values('HeightDiff', ascending=False).head(20)

Unnamed: 0,Name,Active,Height_fm,Height_ufc,Height_wiki,HeightDiff
29,Alfonso Alcarez,0.0,63.0,75.0,,12.0
2324,Wes Shivers,0.0,80.0,69.0,80.0,11.0
1896,Gary Padilla,0.0,71.0,60.0,,11.0
1708,Homer Moore,0.0,70.0,61.0,70.0,9.0
1405,Alberta Cerra Leon,0.0,68.0,75.0,,7.0
2230,Art Santore,0.0,74.0,67.0,,7.0
854,Joey Gilbert,0.0,71.0,65.0,,6.0
1352,Jason Lambert,0.0,75.0,70.0,70.0,5.0
496,Jeff Cox,0.0,70.0,66.0,,4.0
501,Alberto Crane,0.0,65.0,69.0,69.0,4.0


Wes Shivers is 80 inches tall. The UFC value is not correct. Alfonso Alcarez is only 63 inches tall so UFC reach must be wrong. UFC has height of Padilla as 60 inches versus 71 for FM so UFC must be wrong -- both list weight as 205.

In [28]:
tmp[['Name', 'Active', 'Age_fm', 'Age_ufc', 'Age_wiki', 'Dob_fm', 'Dob_wiki', 'AgeDiff']].sort_values('AgeDiff', ascending=False).head(40)

Unnamed: 0,Name,Active,Age_fm,Age_ufc,Age_wiki,Dob_fm,Dob_wiki,AgeDiff
9,Sam Adkins,0.0,51.86691,37.0,,1965-04-26,NaT,14.86691
2571,Ronys Torres,0.0,41.569642,31.0,,1975-08-13,NaT,10.569642
2515,Evan Tanner,0.0,46.070761,37.0,46.070761,1971-02-11,1971-02-11,9.070761
58,Amilcar Alves,0.0,28.581011,37.0,,1988-08-08,NaT,8.418989
1050,Richie Hightower,0.0,42.281498,35.0,,1974-11-26,NaT,7.281498
854,Joey Gilbert,0.0,40.756484,46.0,,1976-06-05,NaT,5.243516
421,Dan Christison,0.0,44.866082,40.0,44.866082,1972-04-26,1972-04-26,4.866082
278,Todd Brown,0.0,40.233543,45.0,,1976-12-13,NaT,4.766457
946,Leonardo Augusto Leleco,0.0,34.897363,31.0,,1982-04-15,NaT,3.897363
566,Shane del Rosario,0.0,33.457224,30.0,33.457224,1983-09-23,1983-09-23,3.457224


Several of the differences above are due to the data being a few days old.

In [29]:
# slow but okay for small data
fighters = tmp.Name.copy()
for fighter in fighters:
     idx = tmp[tmp.Name == fighter].index
     # adjust reach
     if pd.isnull(tmp.loc[idx, 'Reach_fm'].values):
          tmp.set_value(idx, 'Reach_fm', tmp.loc[idx, 'Reach_wiki'].values)
     if pd.notnull(tmp.loc[idx, 'Reach_ufc'].values) and tmp.loc[idx, 'Active'].item():
          tmp.set_value(idx, 'Reach_fm', tmp.loc[idx, 'Reach_ufc'].values)
     # adjust height
     if pd.isnull(tmp.loc[idx, 'Height_fm'].values):
          tmp.set_value(idx, 'Height_fm', tmp.loc[idx, 'Height_wiki'].values)
     if pd.notnull(tmp.loc[idx, 'Height_ufc'].values) and tmp.loc[idx, 'Active'].item():
          tmp.set_value(idx, 'Height_fm', tmp.loc[idx, 'Height_ufc'].values)
     # date of birth
     if pd.isnull(tmp.loc[idx, 'Dob_fm'].values):
          tmp.set_value(idx, 'Dob_fm', tmp.loc[idx, 'Dob_wiki'].values)

In [30]:
tmp[['Name', 'Active', 'Reach_fm', 'Reach_ufc', 'Reach_wiki', 'ReachDiff']].head(20)

Unnamed: 0,Name,Active,Reach_fm,Reach_ufc,Reach_wiki,ReachDiff
0,Tom Aaron,,,,,
1,Danny Abbadi,0.0,,,,
2,David Abbott,0.0,,,,
3,Shamil Abdurakhimov,1.0,76.0,76.0,76.0,0.0
4,Hiroyuki Abe,,,,,
5,Papy Abedi,0.0,74.0,74.0,74.0,0.0
6,Ricardo Abreu,1.0,73.0,73.0,,0.0
7,Daniel Acacio,,73.0,,73.0,
8,Scott Adams,0.0,,,,
9,Sam Adkins,0.0,,,,


In [31]:
tmp[['Name', 'Active', 'Height_fm', 'Height_ufc', 'Height_wiki', 'HeightDiff']].head(20)

Unnamed: 0,Name,Active,Height_fm,Height_ufc,Height_wiki,HeightDiff
0,Tom Aaron,,,,,
1,Danny Abbadi,0.0,71.0,71.0,,0.0
2,David Abbott,0.0,72.0,72.0,,0.0
3,Shamil Abdurakhimov,1.0,75.0,75.0,75.0,0.0
4,Hiroyuki Abe,,66.0,,,
5,Papy Abedi,0.0,71.0,71.0,71.0,0.0
6,Ricardo Abreu,1.0,71.0,71.0,,0.0
7,Daniel Acacio,,68.0,,70.0,
8,Scott Adams,0.0,72.0,72.0,,0.0
9,Sam Adkins,0.0,75.0,72.0,,3.0


### Write out the final dataframe

In [32]:
fnl = tmp.iloc[:, :11]
fnl['LegReach'] = tmp.LegReach
cols = ['Name', 'Nickname', 'Dob', 'Age', 'Weight', 'Height', 'Reach', 'Stance', 'Win', 'Loss', 'Draw', 'LegReach']
fnl.columns = cols
fnl.Age = fnl.Age.apply(lambda x: x if pd.isnull(x) else round(x, 1))
cols = ['Name', 'Nickname', 'Dob', 'Weight', 'Height', 'Reach', 'LegReach', 'Stance', 'Win', 'Loss', 'Draw']
fnl[cols].to_csv('data/fightmetric_fighters_with_corrections_from_UFC_Wikipedia_CLEAN.csv', index=False)