# Step 1: Importing Libraries

In [1]:
# Scrapping
from bs4 import BeautifulSoup
import requests

# Data Manipulation
import numpy as np
import pandas as pd

# Other
import string
import time

# Step 2: Scrapping Data

In [2]:
alphabet = string.ascii_lowercase

fighter_details = {}

for letter in alphabet:

    response = requests.get(f"http://www.ufcstats.com/statistics/fighters?char={letter}&page=all")
    
    soup = BeautifulSoup(response.text, "lxml")
    
    fighter_details[letter] = soup.find_all("td",{'class':'b-statistics__table-col'})

### link : http://ufcstats.com/statistics/fighters

### Data Dictionary 
| Column | Description |
| --- | --- |
| First name | First name of the fighter |
| Last name | Last name of the fighter |
| Nick name | Nick name of the fighter |
| Height | Height of the fighter (ft'inch") |
| Weight | Weight of the fighter (lbs) |
| Reach | Total length of a fighter's arms from one fingertip to the other (inch") |
| Stance | Position of a fighter feet and body |
| Wins | # Fights won by a fighter in thier MMA career (Not only UFC) |
| Losses | # Fights lost by a fighter in thier MMA career (Not only UFC) |
| Draw | # Fights the fighter niether win or lose in thier MMA career (Not only UFC) |

In [4]:
fighter_first_names = []

for key, value in fighter_details.items():
    for fighter in np.arange(0, len(fighter_details[key]), 11):
        fighter_first_name = value[fighter].text.split("\n")[1]
        fighter_first_names.append(fighter_first_name)

In [5]:
fighter_last_names = []

for key, value in fighter_details.items():
    for fighter in np.arange(1, len(fighter_details[key]) + 1, 11):
        fighter_last_name = value[fighter].text.split("\n")[1]
        fighter_last_names.append(fighter_last_name)

In [6]:
fighter_nick_names = []

for key, value in fighter_details.items():
    for fighter in np.arange(2, len(fighter_details[key]) + 2, 11):
        fighter_nick_name = value[fighter].text.split("\n")[1]
        fighter_nick_names.append(fighter_nick_name)

In [8]:
fighter_heights = []

for key, value in fighter_details.items():
    for fighter in np.arange(3, len(fighter_details[key]) + 3, 11):
        fighter_height = value[fighter].text.split("\n")[1][10:]
        fighter_heights.append(fighter_height)

In [9]:
fighter_weights = []

for key, value in fighter_details.items():
    for fighter in np.arange(4, len(fighter_details[key]) + 4, 11):
        fighter_weight = value[fighter].text.split("\n")[1][10:13]
        fighter_weights.append(fighter_weight)

In [10]:
fighter_reaches = []

for key, value in fighter_details.items():
    for fighter in np.arange(5, len(fighter_details[key]) + 5, 11):
        fighter_reach = value[fighter].text.split("\n")[1][10:14]
        fighter_reaches.append(fighter_reach)

In [11]:
fighter_stances = []

for key, value in fighter_details.items():
    for fighter in np.arange(6, len(fighter_details[key]) + 6, 11):
        fighter_stance = value[fighter].text.split("\n")[1][10:]
        fighter_stances.append(fighter_stance)

In [12]:
fighter_wins = []

for key, value in fighter_details.items():
    for fighter in np.arange(7, len(fighter_details[key]) + 7, 11):
        fighter_win = value[fighter].text.split("\n")[1][10:]
        fighter_wins.append(fighter_win)

In [13]:
fighter_losses = []

for key, value in fighter_details.items():
    for fighter in np.arange(8, len(fighter_details[key]) + 8, 11):
        fighter_loss = value[fighter].text.split("\n")[1][10:]
        fighter_losses.append(fighter_loss)

In [14]:
fighter_draws = []

for key, value in fighter_details.items():
    for fighter in np.arange(9, len(fighter_details[key]) + 9, 11):
        fighter_draw = value[fighter].text.split("\n")[1][10:]
        fighter_draws.append(fighter_draw)

In [21]:
fighter_df = pd.DataFrame(fighter_last_names)
fighter_df.rename(columns={0:"last_name"}, inplace = True)

fighter_df["first_name"] = fighter_first_names
fighter_df["nick_name"] = fighter_nick_names
fighter_df["height"] = fighter_heights
fighter_df["weight"] = fighter_weights
fighter_df["reach"] = fighter_reaches
fighter_df["stance"] = fighter_stances
fighter_df["wins"] = fighter_wins
fighter_df["losses"] = fighter_losses
fighter_df["draws"] = fighter_draws

In [22]:
fighter_df.head()

Unnamed: 0,last_name,first_name,nick_name,height,weight,reach,stance,wins,losses,draws
0,Aaron,Tom,,--,155,--,,5,3,0
1,Abbadi,Danny,The Assassin,"5' 11""",155,--,Orthodox,4,6,0
2,Abbott,David,Tank,"6' 0""",265,--,Switch,10,15,0
3,Abdurakhimov,Shamil,Abrek,"6' 3""",235,76.0,Orthodox,20,6,0
4,Abe,Hiroyuki,Abe Ani,"5' 6""",145,--,Orthodox,8,15,3


In [23]:
fighter_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3740 entries, 0 to 3739
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   last_name   3740 non-null   object
 1   first_name  3740 non-null   object
 2   nick_name   3740 non-null   object
 3   height      3740 non-null   object
 4   weight      3740 non-null   object
 5   reach       3740 non-null   object
 6   stance      3740 non-null   object
 7   wins        3740 non-null   object
 8   losses      3740 non-null   object
 9   draws       3740 non-null   object
dtypes: object(10)
memory usage: 292.3+ KB


In [24]:
pd.isnull(fighter_df["stance"])

0       False
1       False
2       False
3       False
4       False
        ...  
3735    False
3736    False
3737    False
3738    False
3739    False
Name: stance, Length: 3740, dtype: bool