In [1]:
import pandas as pd
import numpy as np
import json
import time
import re
import requests
from bs4 import BeautifulSoup
import itertools
import datetime

# Gather list of past events

In [None]:
completed = 'http://ufcstats.com/statistics/events/completed?page=all'
url_request = requests.get(completed).text
soup = BeautifulSoup(url_request, 'html.parser')

In [None]:
events_data = []
links_data = []
table = soup.find('table',{'class':'b-statistics__table-events'})
table_rows = table.find_all('tr')
for row in table_rows[3:]:
    events_data.append([t.text.strip() for t in row.find_all('td')]) 
    links_data.append(row.find('a')['href'])

In [None]:
events_df = pd.DataFrame(events_data, columns=['Name-Date', 'Location'])
events_df['url'] = links_data
events_df

In [None]:
events_df['Event'] = events_df['Name-Date'].str.split("\n",expand=True)[0].str.strip()
events_df['Date'] = events_df['Name-Date'].str.split("\n",expand=True)[3].str.strip()

In [None]:
events_df = events_df[['Date', 'Event', 'Location', 'url']]

In [None]:
events_df.to_csv("data_ufcstats/ufc-stats-events-list.csv", index=False)

# Gather event detail from each event

In [None]:
events_df = pd.read_csv("data_ufcstats/ufc-stats-events-list.csv")
events_df.head()

In [None]:
# Get event details from each event
matches_df = pd.DataFrame()
for index, row in events_df.iterrows():
    event_name = row['Event']
    event_url = row['url']
    event_date = row['Date']
    print(str(index) + "\t" + event_date + "\t" + event_name)
    print(str(event_url))
    try:
        url_request = requests.get(event_url).text
        soup = BeautifulSoup(url_request, 'html.parser')
        table_rows = soup.find_all('tr',{'class':'b-fight-details__table-row'})
        data = []
        for row in table_rows[1:]:
            data.append([t.text.strip() for t in row.find_all('td')]) 
        df = pd.DataFrame(data, columns=['Results', 'Fighters', 'STR', 'TD', 'SUB', 'PASS', 'WEIGHT CLASS', 'METHOD', 'ROUND', 'TIME'])
        df.insert(0, "Date", event_date) 
        df.insert(1, "Event", event_name)
    except:
        print("Error")
    matches_df = matches_df.append(df, ignore_index=True)


In [None]:
matches_df = matches_df[matches_df['Results'].notna()]
matches_df = matches_df.drop_duplicates()
matches_df['Date']= pd.to_datetime(matches_df['Date'].str.strip(),format= '%B %d, %Y')

In [None]:
print("Total number of matches: " + str(len(matches_df)))
print("Total number of events: " +  str(len(matches_df['Event'].unique())))
print("Total number of dates: " +  str(len(matches_df['Date'].unique())))

In [None]:
matches_df['Results_1'] = matches_df['Results'].str.split("\n",expand=True)[0]
matches_df['Results_2'] = matches_df['Results'].str.split("\n",expand=True)[3]
matches_df['Fighter_1'] = matches_df['Fighters'].str.split("\n",expand=True)[0]
matches_df['Fighter_2'] = matches_df['Fighters'].str.split("\n",expand=True)[5]
matches_df['STR_1'] = matches_df['STR'].str.split("\n",expand=True)[0]
matches_df['STR_2'] = matches_df['STR'].str.split("\n",expand=True)[6]
matches_df['TD_1'] = matches_df['TD'].str.split("\n",expand=True)[0]
matches_df['TD_2'] = matches_df['TD'].str.split("\n",expand=True)[4]
matches_df['SUB_1'] = matches_df['SUB'].str.split("\n",expand=True)[0]
matches_df['SUB_2'] = matches_df['SUB'].str.split("\n",expand=True)[4]
matches_df['PASS_1'] = matches_df['PASS'].str.split("\n",expand=True)[0]
matches_df['PASS_2'] = matches_df['PASS'].str.split("\n",expand=True)[4]
matches_df['METHOD_result'] = matches_df['METHOD'].str.split("\n",expand=True)[0]
matches_df['METHOD_note'] = matches_df['METHOD'].str.split("\n",expand=True)[4]

matches_df = matches_df[['Date', 'Event', 'ROUND', 'TIME', 'Results_1', 'Results_2',
       'Fighter_1', 'Fighter_2', 'WEIGHT CLASS', 'STR_1', 'STR_2', 'TD_1', 'TD_2', 'SUB_1',
       'SUB_2', 'PASS_1', 'PASS_2', 'METHOD_result', 'METHOD_note']]

matches_df = matches_df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

In [None]:
matches_df.to_csv("data_ufcstats/ufc-stats-matches-overview.csv", index=False)
matches_df.columns

# Collect links to detailed match data for each event

In [None]:
matches_data_df = pd.DataFrame()
for index, row in events_df.iterrows():
    event_name = row['Event']
    event_url = row['url']
    event_date = row['Date']
#     print(str(index) + "\t" + event_date + "\t" + event_name)
#     print(str(event_url))
    try:
        url_request = requests.get(event_url).text
        soup = BeautifulSoup(url_request, 'html.parser')
        table_rows = soup.find_all('tr',{'class':'js-fight-details-click'})
        data = []
        for row in table_rows:
            data.append(row['data-link'])
        df = pd.DataFrame(data, columns=['match_url'])
        df.insert(0, "Date", event_date) 
        df.insert(1, "Event", event_name)
    except:
        print("Error")
    matches_data_df = matches_data_df.append(df, ignore_index=True)


In [None]:
matches_data_df.to_csv("data_ufcstats/ufc-stats-matches-links.csv", index=False)

# Gather detailed match data 

In [12]:
matches_data_df = pd.read_csv("data_ufcstats/ufc-stats-matches-links.csv")
matches_data_df.head()

Unnamed: 0,Date,Event,match_url
0,"October 16, 1998",UFC - Ultimate Brazil,http://ufcstats.com/fight-details/635fbf570018...
1,"May 15, 1998",UFC 17: Redemption,http://ufcstats.com/fight-details/1d3c1dedfba6...
2,"May 15, 1998",UFC 17: Redemption,http://ufcstats.com/fight-details/a4d8e991ec42...
3,"May 15, 1998",UFC 17: Redemption,http://ufcstats.com/fight-details/17526cb9fb60...
4,"May 15, 1998",UFC 17: Redemption,http://ufcstats.com/fight-details/1e14e79a477f...


In [13]:
# Get event details from each event
full_matches_df = pd.DataFrame()
for index, row in matches_data_df.iterrows():
    event_name = row['Event']
    match_url = row['match_url']
    event_date = row['Date']
    print(str(index) + "\t" + event_date + "\t" + event_name)
    print(str(match_url))
    try:
        url_request = requests.get(match_url).text
        soup = BeautifulSoup(url_request, 'html.parser')
        
        fight_details = soup.find('div',{'class':'b-fight-details__fight'})
        description = [t.text.strip() for t in fight_details.find_all('p')][0]
        description = ' '.join(description.split())
        item_df = pd.DataFrame([description], columns=['description'])

        p = re.compile(r'\S+: ')
        item_df['METHOD'] = p.split(description)[1]
        item_df['ROUND'] = p.split(description)[2].strip()
        item_df['TIME'] = p.split(description)[3].split(" ")[0]
        item_df['TIME_FORMAT'] = p.split(description)[4].strip()
        if len(p.split(description)) < 6:
            item_df['REFEREE'] = ""
        else:
            item_df['REFEREE'] = p.split(description)[5]
        item_df = item_df.drop(columns=['description'])

        item_df.insert(0, "Date", event_date) 
        item_df.insert(1, "Event", event_name)
        
        table_rows = soup.find_all('table',{'cellpadding':None})

        lines = []
        for row in table_rows[0::2]:
            lines.append([t.text.strip() for t in row.find_all('td')])

        # FIGHTER	KD	SIG. STR.	SIG. STR. %	TOTAL STR.	TD	TD %	SUB. ATT	PASS	REV.
        df = pd.DataFrame(lines[0]).transpose()

        item_df['Fighter_1'] = df[0].str.split("\n",expand=True)[0]
        item_df['Fighter_2'] = df[0].str.split("\n",expand=True)[3]
        item_df['KD_1'] = df[1].str.split("\n",expand=True)[0]
        item_df['KD_2'] = df[1].str.split("\n",expand=True)[3]
        item_df['SIG_STR_1'] = df[2].str.split("\n",expand=True)[0]
        item_df['SIG_STR_2'] = df[2].str.split("\n",expand=True)[3]
        item_df['pSIG_STR_1'] = df[3].str.split("\n",expand=True)[0]
        item_df['pSIG_STR_2'] = df[3].str.split("\n",expand=True)[3]
        item_df['TOTAL_STR_1'] = df[4].str.split("\n",expand=True)[0]
        item_df['TOTAL_STR_2'] = df[4].str.split("\n",expand=True)[3]
        item_df['TD_1'] = df[5].str.split("\n",expand=True)[0]
        item_df['TD_2'] = df[5].str.split("\n",expand=True)[3]
        item_df['pTD_1'] = df[6].str.split("\n",expand=True)[0]
        item_df['pTD_2'] = df[6].str.split("\n",expand=True)[3]
        item_df['SUB_ATT_1'] = df[7].str.split("\n",expand=True)[0]
        item_df['SUB_ATT_2'] = df[7].str.split("\n",expand=True)[3]
        item_df['PASS_1'] = df[8].str.split("\n",expand=True)[0]
        item_df['PASS_2'] = df[8].str.split("\n",expand=True)[3]
        item_df['REV_1'] = df[9].str.split("\n",expand=True)[0]
        item_df['REV_2'] = df[9].str.split("\n",expand=True)[3]

        # FIGHTER	SIG. STR	SIG. STR. %	HEAD	BODY	LEG	DISTANCE	CLINCH	GROUND
        df = pd.DataFrame(lines[1]).transpose()

        item_df['HEAD_1'] = df[3].str.split("\n",expand=True)[0]
        item_df['HEAD_2'] = df[3].str.split("\n",expand=True)[3]
        item_df['BODY_1'] = df[4].str.split("\n",expand=True)[0]
        item_df['BODY_2'] = df[4].str.split("\n",expand=True)[3]
        item_df['LEG_1'] = df[5].str.split("\n",expand=True)[0]
        item_df['LEG_2'] = df[5].str.split("\n",expand=True)[3]
        item_df['DISTANCE_1'] = df[6].str.split("\n",expand=True)[0]
        item_df['DISTANCE_2'] = df[6].str.split("\n",expand=True)[3]
        item_df['CLINCH_1'] = df[7].str.split("\n",expand=True)[0]
        item_df['CLINCH_2'] = df[7].str.split("\n",expand=True)[3]
        item_df['GROUND_1'] = df[8].str.split("\n",expand=True)[0]
        item_df['GROUND_2'] = df[8].str.split("\n",expand=True)[3]
        
    except:
        print("Error")
        item_df['Fighter_1'] = match_url
    full_matches_df = full_matches_df.append(item_df, ignore_index=True)

0	October 16, 1998	UFC - Ultimate Brazil
http://ufcstats.com/fight-details/635fbf57001897c7
Error
1	May 15, 1998	UFC 17: Redemption
http://ufcstats.com/fight-details/1d3c1dedfba6eddc


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


2	May 15, 1998	UFC 17: Redemption
http://ufcstats.com/fight-details/a4d8e991ec42b048
3	May 15, 1998	UFC 17: Redemption
http://ufcstats.com/fight-details/17526cb9fb6072ef
4	May 15, 1998	UFC 17: Redemption
http://ufcstats.com/fight-details/1e14e79a477f2357
5	May 15, 1998	UFC 17: Redemption
http://ufcstats.com/fight-details/74dcccd3d2f487a1
6	May 15, 1998	UFC 17: Redemption
http://ufcstats.com/fight-details/8d83354075a937f2
7	May 15, 1998	UFC 17: Redemption
http://ufcstats.com/fight-details/21aa163b80e504f9
8	May 15, 1998	UFC 17: Redemption
http://ufcstats.com/fight-details/8b258bbb37f74a66
Error
9	May 15, 1998	UFC 17: Redemption
http://ufcstats.com/fight-details/ea535b46e0f69ad3
10	March 13, 1998	UFC 16: Battle in the Bayou
http://ufcstats.com/fight-details/f32f823658478705
11	March 13, 1998	UFC 16: Battle in the Bayou
http://ufcstats.com/fight-details/b55481006297ba05
12	March 13, 1998	UFC 16: Battle in the Bayou
http://ufcstats.com/fight-details/16f25f0808db6f77
13	March 13, 1998	UFC 1

89	May 17, 1996	UFC 9: Motor City Madness
http://ufcstats.com/fight-details/ca62a52ab0bd9108
90	May 17, 1996	UFC 9: Motor City Madness
http://ufcstats.com/fight-details/c89017008da245be
91	May 17, 1996	UFC 9: Motor City Madness
http://ufcstats.com/fight-details/31fc4d5d5d1a6126
92	May 17, 1996	UFC 9: Motor City Madness
http://ufcstats.com/fight-details/e92abaf81ad5949b
93	February 16, 1996	UFC 8: David vs Goliath
http://ufcstats.com/fight-details/8d2e99599124a16f
94	February 16, 1996	UFC 8: David vs Goliath
http://ufcstats.com/fight-details/16b4a0b06427f1ac
95	February 16, 1996	UFC 8: David vs Goliath
http://ufcstats.com/fight-details/f5156ca4dcd2a0e3
96	February 16, 1996	UFC 8: David vs Goliath
http://ufcstats.com/fight-details/90bce37a788afeaa
97	February 16, 1996	UFC 8: David vs Goliath
http://ufcstats.com/fight-details/8312985b75241b06
98	February 16, 1996	UFC 8: David vs Goliath
http://ufcstats.com/fight-details/ab7390e549e28893
99	February 16, 1996	UFC 8: David vs Goliath
http://

173	November 12, 1993	UFC 1: The Beginning
http://ufcstats.com/fight-details/64139d1d505e46c5
174	November 12, 1993	UFC 1: The Beginning
http://ufcstats.com/fight-details/00b0796724ec1c09
175	November 12, 1993	UFC 1: The Beginning
http://ufcstats.com/fight-details/ffd16691c4c4aafc
176	November 12, 1993	UFC 1: The Beginning
http://ufcstats.com/fight-details/ac7ca2ec38b96c1a
177	November 12, 1993	UFC 1: The Beginning
http://ufcstats.com/fight-details/46acd54cc0c905fb
178	November 12, 1993	UFC 1: The Beginning
http://ufcstats.com/fight-details/cecdc0da584274b9
179	November 12, 1993	UFC 1: The Beginning
http://ufcstats.com/fight-details/2d2bbc86e941e05c
180	November 12, 1993	UFC 1: The Beginning
http://ufcstats.com/fight-details/567a09fd200cfa05


In [34]:
full_matches_df.fillna("--", inplace=True)

In [25]:
# full_matches_df.loc[0]['Fighter_1'] = "Cesar Marscucci"
# full_matches_df.loc[8]['Fighter_1'] = "Andre Roberts"
# full_matches_df.loc[16]['Fighter_1'] = "Chris Brennan"
# full_matches_df.loc[17]['Fighter_1'] = "Laverne Clark"
# full_matches_df.loc[57]['Fighter_1'] = "Justin Martin"
# full_matches_df.loc[58]['Fighter_1'] = "Nick Sanzo"
# full_matches_df.loc[66]['Fighter_1'] = "Tai Bowden"
# full_matches_df.loc[67]['Fighter_1'] = "Steve Nelmark"
# full_matches_df.loc[68]['Fighter_1'] = "Mark Hall"
# full_matches_df.loc[75]['Fighter_1'] = "Roberto Traven"
# full_matches_df.loc[76]['Fighter_1'] = "Scott Ferrozzo"
# full_matches_df.loc[84]['Fighter_1'] = "Sam Adkins"
# full_matches_df.loc[101]['Fighter_1'] = "Sam Adkins"
# full_matches_df.loc[109]['Fighter_1'] = "Mark Hall"
# full_matches_df.loc[110]['Fighter_1'] = "Joe Charles"
# full_matches_df.loc[120]['Fighter_1'] = "Onassis Parungao"
# full_matches_df.loc[121]['Fighter_1'] = "Joel Sutton"
# full_matches_df.loc[130]['Fighter_1'] = "Anthony Macias"
# full_matches_df.loc[131]['Fighter_1'] = "Joel Sutton"
# full_matches_df.loc[150]['Fighter_1'] = "Marcus Bossett"
# full_matches_df.loc[151]['Fighter_1'] = "Joe Charles"      

# full_matches_df.loc[0]['Fighter_2'] = "Paulo Santos"
# full_matches_df.loc[8]['Fighter_2'] = "Harry Moskowitz"
# full_matches_df.loc[16]['Fighter_2'] = "Courtney Turner"
# full_matches_df.loc[17]['Fighter_2'] = "Josh Stuart"
# full_matches_df.loc[57]['Fighter_2'] = "Eric Martin"
# full_matches_df.loc[58]['Fighter_2'] = "Jackie Lee"
# full_matches_df.loc[66]['Fighter_2'] = "Jack Nilson"
# full_matches_df.loc[67]['Fighter_2'] = "Marcus Bossett"
# full_matches_df.loc[68]['Fighter_2'] = "Felix Lee Mitchell"
# full_matches_df.loc[75]['Fighter_2'] = "Dave Berry"
# full_matches_df.loc[76]['Fighter_2'] = "Sam Fulton"
# full_matches_df.loc[84]['Fighter_2'] = "Felix Lee Mitchell"
# full_matches_df.loc[101]['Fighter_2'] = "Keith Mielke"
# full_matches_df.loc[109]['Fighter_2'] = "Trent Jenkins"
# full_matches_df.loc[110]['Fighter_2'] = "Scott Bessac"
# full_matches_df.loc[120]['Fighter_2'] = "Francesco Maturi"
# full_matches_df.loc[121]['Fighter_2'] = "Geza Kalman"
# full_matches_df.loc[130]['Fighter_2'] = "He-Man Gipson"
# full_matches_df.loc[131]['Fighter_2'] = "Jack McGlaughlin"
# full_matches_df.loc[150]['Fighter_2'] = "Eldo Xavier Dias" 
# full_matches_df.loc[151]['Fighter_2'] = "Kevin Rosier"

In [47]:
full_matches_df = full_matches_df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

In [48]:
full_matches_df.to_csv("data_ufcstats/ufc-stats-matches-detailed.csv", index=False)

# For adding new event and match data

In [125]:
event_date = "May 30, 2020"
event_name = "UFC Fight Night: Woodley vs. Burns"
event_location = "Las Vegas, Nevada, USA"
event_url = 'http://ufcstats.com/event-details/14b9e0f2679a2205'

In [127]:
# ufc-stats-events-list
single_df = pd.DataFrame(np.array([[event_date, event_name, event_location, event_url]]), 
                         columns=['Date', 'Event', 'Location', 'url'])
single_df.to_csv("data_ufcstats/ufc-stats-events-list.csv", mode='a', index = False, header=False)

In [128]:
url_request = requests.get(event_url).text
soup = BeautifulSoup(url_request, 'html.parser')
rows = soup.find_all('tr',{'class':'b-fight-details__table-row'})
matches_data = []
for row in rows[1:]:
    matches_data.append([t.text.strip() for t in row.find_all('td')]) 
matches_df = pd.DataFrame(matches_data, columns=['Results', 'Fighters', 'STR', 'TD', 'SUB', 'PASS', 'WEIGHT CLASS', 'METHOD', 'ROUND', 'TIME'])

matches_df['Results_1'] = matches_df['Results'].str.split("\n",expand=True)[0]
try:
    matches_df['Results_2'] = matches_df['Results'].str.split("\n",expand=True)[3]
except:
    matches_df['Results_2'] = ""
matches_df['Fighter_1'] = matches_df['Fighters'].str.split("\n",expand=True)[0]
matches_df['Fighter_2'] = matches_df['Fighters'].str.split("\n",expand=True)[5]
matches_df['STR_1'] = matches_df['STR'].str.split("\n",expand=True)[0]
matches_df['STR_2'] = matches_df['STR'].str.split("\n",expand=True)[6]
matches_df['TD_1'] = matches_df['TD'].str.split("\n",expand=True)[0]
matches_df['TD_2'] = matches_df['TD'].str.split("\n",expand=True)[4]
matches_df['SUB_1'] = matches_df['SUB'].str.split("\n",expand=True)[0]
matches_df['SUB_2'] = matches_df['SUB'].str.split("\n",expand=True)[4]
matches_df['PASS_1'] = matches_df['PASS'].str.split("\n",expand=True)[0]
matches_df['PASS_2'] = matches_df['PASS'].str.split("\n",expand=True)[4]
matches_df['METHOD_result'] = matches_df['METHOD'].str.split("\n",expand=True)[0]
matches_df['METHOD_note'] = matches_df['METHOD'].str.split("\n",expand=True)[4]

matches_df.insert(0, "Date", event_date) 
matches_df.insert(1, "Event", event_name)
matches_df['Date']= pd.to_datetime(event_date,format= '%B %d, %Y')
matches_df = matches_df[['Date', 'Event', 'ROUND', 'TIME', 'Results_1', 'Results_2',
       'Fighter_1', 'Fighter_2', 'WEIGHT CLASS', 'STR_1', 'STR_2', 'TD_1', 'TD_2', 'SUB_1',
       'SUB_2', 'PASS_1', 'PASS_2', 'METHOD_result', 'METHOD_note']]

matches_df = matches_df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

In [129]:
# ufc-stats-matches
matches_df.to_csv("data_ufcstats/ufc-stats-matches-all.csv", mode='a', index = False, header=False)
matches_df.head()

Unnamed: 0,Date,Event,ROUND,TIME,Results_1,Results_2,Fighter_1,Fighter_2,WEIGHT CLASS,STR_1,STR_2,TD_1,TD_2,SUB_1,SUB_2,PASS_1,PASS_2,METHOD_result,METHOD_note
0,2020-05-30,UFC Fight Night: Woodley vs. Burns,5,5:00,win,,Gilbert Burns,Tyron Woodley,Welterweight,83,28,2,0,1,0,3,0,U-DEC,
1,2020-05-30,UFC Fight Night: Woodley vs. Burns,3,5:00,win,,Augusto Sakai,Blagoy Ivanov,Heavyweight,78,66,0,1,0,0,0,1,S-DEC,
2,2020-05-30,UFC Fight Night: Woodley vs. Burns,3,5:00,win,,Billy Quarantillo,Spike Carlyle,Catch Weight,49,37,2,3,2,0,2,4,U-DEC,
3,2020-05-30,UFC Fight Night: Woodley vs. Burns,2,3:26,win,,Roosevelt Roberts,Brok Weaver,Lightweight,40,14,1,0,3,0,3,0,SUB,Rear Naked Choke
4,2020-05-30,UFC Fight Night: Woodley vs. Burns,1,2:36,win,,Mackenzie Dern,Hannah Cifers,Women's Strawweight,5,7,0,0,1,0,0,0,SUB,Kneebar


In [130]:
matches_data_df = pd.DataFrame()
try:
    url_request = requests.get(event_url).text
    soup = BeautifulSoup(url_request, 'html.parser')
    table_rows = soup.find_all('tr',{'class':'js-fight-details-click'})
    data = []
    for row in table_rows:
        data.append(row['data-link'])
    df = pd.DataFrame(data, columns=['match_url'])
    df.insert(0, "Date", event_date) 
    df.insert(1, "Event", event_name)
except:
    print("Error")
matches_data_df = matches_data_df.append(df, ignore_index=True)

In [131]:
# ufc-stats-matches-links
matches_data_df.to_csv("data_ufcstats/ufc-stats-matches-links.csv", mode='a', index = False, header=False)
matches_data_df

Unnamed: 0,Date,Event,match_url
0,"May 30, 2020",UFC Fight Night: Woodley vs. Burns,http://ufcstats.com/fight-details/e9eab0fa03ee...
1,"May 30, 2020",UFC Fight Night: Woodley vs. Burns,http://ufcstats.com/fight-details/a6743d41b0b9...
2,"May 30, 2020",UFC Fight Night: Woodley vs. Burns,http://ufcstats.com/fight-details/1e9a2b197ebf...
3,"May 30, 2020",UFC Fight Night: Woodley vs. Burns,http://ufcstats.com/fight-details/03e5127d4832...
4,"May 30, 2020",UFC Fight Night: Woodley vs. Burns,http://ufcstats.com/fight-details/3640defb8294...
5,"May 30, 2020",UFC Fight Night: Woodley vs. Burns,http://ufcstats.com/fight-details/a41384da7001...
6,"May 30, 2020",UFC Fight Night: Woodley vs. Burns,http://ufcstats.com/fight-details/767819f32143...
7,"May 30, 2020",UFC Fight Night: Woodley vs. Burns,http://ufcstats.com/fight-details/0fcf42b68f2f...
8,"May 30, 2020",UFC Fight Night: Woodley vs. Burns,http://ufcstats.com/fight-details/0170db5fad04...
9,"May 30, 2020",UFC Fight Night: Woodley vs. Burns,http://ufcstats.com/fight-details/57dec2b07b75...


In [132]:
full_matches_df = pd.DataFrame()
for index, row in matches_data_df.iterrows():
    event_name = row['Event']
    match_url = row['match_url']
    event_date = row['Date']
    print(str(index) + "\t" + event_date + "\t" + event_name)
    print(str(match_url))
    try:
        url_request = requests.get(match_url).text
        soup = BeautifulSoup(url_request, 'html.parser')
        
        fight_details = soup.find('div',{'class':'b-fight-details__fight'})
        description = [t.text.strip() for t in fight_details.find_all('p')][0]
        description = ' '.join(description.split())
        item_df = pd.DataFrame([description], columns=['description'])

        p = re.compile(r'\S+: ')
        item_df['METHOD'] = p.split(description)[1]
        item_df['ROUND'] = p.split(description)[2].strip()
        item_df['TIME'] = p.split(description)[3].split(" ")[0]
        item_df['TIME_FORMAT'] = p.split(description)[4].strip()
        if len(p.split(description)) < 6:
            item_df['REFEREE'] = ""
        else:
            item_df['REFEREE'] = p.split(description)[5]
        item_df = item_df.drop(columns=['description'])

        item_df.insert(0, "Date", event_date) 
        item_df.insert(1, "Event", event_name)
        
        table_rows = soup.find_all('table',{'cellpadding':None})

        lines = []
        for row in table_rows[0::2]:
            lines.append([t.text.strip() for t in row.find_all('td')])

        # FIGHTER	KD	SIG. STR.	SIG. STR. %	TOTAL STR.	TD	TD %	SUB. ATT	PASS	REV.
        df = pd.DataFrame(lines[0]).transpose()

        item_df['Fighter_1'] = df[0].str.split("\n",expand=True)[0]
        item_df['Fighter_2'] = df[0].str.split("\n",expand=True)[3]
        item_df['KD_1'] = df[1].str.split("\n",expand=True)[0]
        item_df['KD_2'] = df[1].str.split("\n",expand=True)[3]
        item_df['SIG_STR_1'] = df[2].str.split("\n",expand=True)[0]
        item_df['SIG_STR_2'] = df[2].str.split("\n",expand=True)[3]
        item_df['pSIG_STR_1'] = df[3].str.split("\n",expand=True)[0]
        item_df['pSIG_STR_2'] = df[3].str.split("\n",expand=True)[3]
        item_df['TOTAL_STR_1'] = df[4].str.split("\n",expand=True)[0]
        item_df['TOTAL_STR_2'] = df[4].str.split("\n",expand=True)[3]
        item_df['TD_1'] = df[5].str.split("\n",expand=True)[0]
        item_df['TD_2'] = df[5].str.split("\n",expand=True)[3]
        item_df['pTD_1'] = df[6].str.split("\n",expand=True)[0]
        item_df['pTD_2'] = df[6].str.split("\n",expand=True)[3]
        item_df['SUB_ATT_1'] = df[7].str.split("\n",expand=True)[0]
        item_df['SUB_ATT_2'] = df[7].str.split("\n",expand=True)[3]
        item_df['PASS_1'] = df[8].str.split("\n",expand=True)[0]
        item_df['PASS_2'] = df[8].str.split("\n",expand=True)[3]
        item_df['REV_1'] = df[9].str.split("\n",expand=True)[0]
        item_df['REV_2'] = df[9].str.split("\n",expand=True)[3]

        # FIGHTER	SIG. STR	SIG. STR. %	HEAD	BODY	LEG	DISTANCE	CLINCH	GROUND
        df = pd.DataFrame(lines[1]).transpose()

        item_df['HEAD_1'] = df[3].str.split("\n",expand=True)[0]
        item_df['HEAD_2'] = df[3].str.split("\n",expand=True)[3]
        item_df['BODY_1'] = df[4].str.split("\n",expand=True)[0]
        item_df['BODY_2'] = df[4].str.split("\n",expand=True)[3]
        item_df['LEG_1'] = df[5].str.split("\n",expand=True)[0]
        item_df['LEG_2'] = df[5].str.split("\n",expand=True)[3]
        item_df['DISTANCE_1'] = df[6].str.split("\n",expand=True)[0]
        item_df['DISTANCE_2'] = df[6].str.split("\n",expand=True)[3]
        item_df['CLINCH_1'] = df[7].str.split("\n",expand=True)[0]
        item_df['CLINCH_2'] = df[7].str.split("\n",expand=True)[3]
        item_df['GROUND_1'] = df[8].str.split("\n",expand=True)[0]
        item_df['GROUND_2'] = df[8].str.split("\n",expand=True)[3]
        
    except:
        print("Error")
        item_df['Fighter_1'] = match_url
    full_matches_df = full_matches_df.append(item_df, ignore_index=True)

0	May 30, 2020	UFC Fight Night: Woodley vs. Burns
http://ufcstats.com/fight-details/e9eab0fa03eecd9b
1	May 30, 2020	UFC Fight Night: Woodley vs. Burns
http://ufcstats.com/fight-details/a6743d41b0b95271
2	May 30, 2020	UFC Fight Night: Woodley vs. Burns
http://ufcstats.com/fight-details/1e9a2b197ebff074
3	May 30, 2020	UFC Fight Night: Woodley vs. Burns
http://ufcstats.com/fight-details/03e5127d4832750c
4	May 30, 2020	UFC Fight Night: Woodley vs. Burns
http://ufcstats.com/fight-details/3640defb8294bd37
5	May 30, 2020	UFC Fight Night: Woodley vs. Burns
http://ufcstats.com/fight-details/a41384da70013373
6	May 30, 2020	UFC Fight Night: Woodley vs. Burns
http://ufcstats.com/fight-details/767819f321431bed
7	May 30, 2020	UFC Fight Night: Woodley vs. Burns
http://ufcstats.com/fight-details/0fcf42b68f2f0fa3
8	May 30, 2020	UFC Fight Night: Woodley vs. Burns
http://ufcstats.com/fight-details/0170db5fad04e4d0
9	May 30, 2020	UFC Fight Night: Woodley vs. Burns
http://ufcstats.com/fight-details/57dec2b

In [134]:
full_matches_df = full_matches_df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
full_matches_df.sort_index(axis=1, inplace=True)

# ufc-stats-matches-detailed
full_matches_df.to_csv("data_ufcstats/ufc-stats-matches-detailed.csv", mode='a', index = False, header=False)

full_matches_df

Unnamed: 0,BODY_1,BODY_2,CLINCH_1,CLINCH_2,DISTANCE_1,DISTANCE_2,Date,Event,Fighter_1,Fighter_2,...,TD_1,TD_2,TIME,TIME_FORMAT,TOTAL_STR_1,TOTAL_STR_2,pSIG_STR_1,pSIG_STR_2,pTD_1,pTD_2
0,12 of 17,27 of 31,9 of 9,13 of 16,19 of 55,51 of 102,"May 30, 2020",UFC Fight Night: Woodley vs. Burns,Tyron Woodley,Gilbert Burns,...,0 of 2,2 of 8,5:00,5 Rnd (5-5-5-5-5),65 of 101,156 of 211,43%,60%,0%,25%
1,18 of 19,28 of 38,12 of 15,14 of 15,52 of 107,62 of 144,"May 30, 2020",UFC Fight Night: Woodley vs. Burns,Blagoy Ivanov,Augusto Sakai,...,1 of 3,0 of 0,5:00,3 Rnd (5-5-5),71 of 130,89 of 174,52%,48%,33%,0%
2,6 of 6,2 of 2,5 of 5,4 of 4,12 of 22,12 of 16,"May 30, 2020",UFC Fight Night: Woodley vs. Burns,Billy Quarantillo,Spike Carlyle,...,2 of 3,3 of 6,5:00,3 Rnd (5-5-5),110 of 135,64 of 102,74%,56%,66%,50%
3,7 of 10,9 of 11,2 of 2,4 of 7,25 of 46,10 of 41,"May 30, 2020",UFC Fight Night: Woodley vs. Burns,Roosevelt Roberts,Brok Weaver,...,1 of 3,0 of 0,3:26,3 Rnd (5-5-5),54 of 76,26 of 62,65%,29%,33%,0%
4,1 of 1,3 of 6,0 of 0,4 of 5,5 of 21,3 of 19,"May 30, 2020",UFC Fight Night: Woodley vs. Burns,Mackenzie Dern,Hannah Cifers,...,0 of 1,0 of 0,2:36,3 Rnd (5-5-5),5 of 21,11 of 31,23%,26%,0%,0%
5,12 of 18,6 of 9,4 of 4,4 of 5,22 of 49,21 of 59,"May 30, 2020",UFC Fight Night: Woodley vs. Burns,Katlyn Chookagian,Antonina Shevchenko,...,3 of 3,0 of 0,5:00,3 Rnd (5-5-5),200 of 240,37 of 76,67%,39%,100%,0%
6,27 of 33,51 of 58,4 of 5,2 of 4,169 of 337,125 of 269,"May 30, 2020",UFC Fight Night: Woodley vs. Burns,Daniel Rodriguez,Gabe Green,...,1 of 1,0 of 0,5:00,3 Rnd (5-5-5),175 of 345,127 of 273,50%,46%,100%,0%
7,4 of 4,1 of 1,2 of 2,1 of 1,9 of 18,4 of 6,"May 30, 2020",UFC Fight Night: Woodley vs. Burns,Jamahal Hill,Klidson Abreu,...,0 of 0,0 of 0,1:51,3 Rnd (5-5-5),14 of 24,5 of 7,58%,71%,0%,0%
8,3 of 5,4 of 4,3 of 5,1 of 1,11 of 24,12 of 29,"May 30, 2020",UFC Fight Night: Woodley vs. Burns,Tim Elliott,Brandon Royval,...,4 of 10,1 of 1,3:18,3 Rnd (5-5-5),40 of 55,22 of 41,53%,48%,40%,100%
9,14 of 27,5 of 6,2 of 2,1 of 1,16 of 41,31 of 51,"May 30, 2020",UFC Fight Night: Woodley vs. Burns,Louis Smolka,Casey Kenney,...,0 of 1,0 of 0,3:03,3 Rnd (5-5-5),18 of 43,32 of 52,41%,61%,0%,0%
