# Scrape UFC events data

1. Compile a list of the past events and write to file
2. Visit each wiki page of the past events and gather matches data
2. Compile a list of the schedule events and write to file
3. Collect the poster images of the past events

In [68]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import itertools
import re
pd.set_option('display.max_colwidth', -1)

## Scrape list of past events

In [2]:
# Last visited 6/1/2020
# Collect list of past events
url = 'https://en.wikipedia.org/wiki/List_of_UFC_events'
url_request = requests.get(url).text
soup = BeautifulSoup(url_request, 'html.parser')

In [3]:
events_data = []
table = soup.find('table',{'id':'Past_events'})
table_rows = table.find_all('tr')
for row in table_rows[1:]:
    events_data.append([t.text.strip() for t in row.find_all('td')]) 
            
events_df = pd.DataFrame(events_data, columns=['Index', 'Event', 'Date', 'Venue', 'Location', 'Attendance','Ref'])

In [4]:
links = []
for row in table_rows[1:]:
#     links.append([t.get('href') for t in row.find_all('a')]) 
    links.append("https://en.wikipedia.org" + row.find('a').get('href'))

In [5]:
events_df.insert(2, "wikipage", links) 

In [6]:
print("Total events: " + str(len(events_df)))
events_occurred_df = events_df[events_df['Attendance']!="Cancelled"]
print("Events (excluding cancelled): " + str(len(events_occurred_df)))

Total events: 526
Events (excluding cancelled): 517


In [7]:
# Write list of past UFC events to csv file
events_df.to_csv("data/list_of_UFC_past_events.csv", index=False)

## Visit each of the past event's wiki page and gather matches data

In [8]:
# Read list of past UFC events
events_df = pd.read_csv("data/list_of_UFC_past_events.csv")
events_occurred_df = events_df[events_df['Attendance']!="Cancelled"]

In [9]:
events_occurred_df = events_occurred_df.reset_index(drop=True)
events_occurred_df

Unnamed: 0,Index,Event,wikipage,Date,Venue,Location,Attendance,Ref
0,513,UFC Fight Night: Lee vs. Oliveira,https://en.wikipedia.org/wiki/UFC_Fight_Night:_Lee_vs._Oliveira,"Mar 14, 2020",Ginásio Nilson Nelson,"Brasília, Brazil",0,[15]
1,512,UFC 248: Adesanya vs. Romero,https://en.wikipedia.org/wiki/UFC_248,"Mar 7, 2020",T-Mobile Arena,"Las Vegas, Nevada, U.S.",15077,[16]
2,511,UFC Fight Night: Benavidez vs. Figueiredo,https://en.wikipedia.org/wiki/UFC_Fight_Night:_Benavidez_vs._Figueiredo,"Feb 29, 2020",Chartway Arena,"Norfolk, Virginia, U.S.",7098,[17]
3,510,UFC Fight Night: Felder vs. Hooker,https://en.wikipedia.org/wiki/UFC_Fight_Night:_Felder_vs._Hooker,"Feb 23, 2020",Spark Arena,"Auckland, New Zealand",10025,[18]
4,509,UFC Fight Night: Anderson vs. Błachowicz 2,https://en.wikipedia.org/wiki/UFC_Fight_Night:_Anderson_vs._B%C5%82achowicz_2,"Feb 15, 2020",Santa Ana Star Center,"Rio Rancho, New Mexico, U.S.",6449,[19]
...,...,...,...,...,...,...,...,...
508,005,UFC 5: The Return of the Beast,https://en.wikipedia.org/wiki/UFC_5,"Apr 7, 1995",Independence Arena,"Charlotte, North Carolina, U.S.",6000,[464]
509,004,UFC 4: Revenge of the Warriors,https://en.wikipedia.org/wiki/UFC_4,"Dec 16, 1994",Expo Square Pavilion,"Tulsa, Oklahoma, U.S.",5857,[465]
510,003,UFC 3: The American Dream,https://en.wikipedia.org/wiki/UFC_3,"Sep 9, 1994",Grady Cole Center,"Charlotte, North Carolina, U.S.",,
511,002,UFC 2: No Way Out,https://en.wikipedia.org/wiki/UFC_2,"Mar 11, 1994",Mammoth Gardens,"Denver, Colorado, U.S.",2000,[466]


In [None]:
# Get event details from each event wikipage
result_df = pd.DataFrame()
for index, row in events_occurred_df.iterrows():
# for index, row in itertools.islice(events_occurred_df.iterrows(), 3):
    event_name = row['Event']
    event_url = row['wikipage']
    event_date = row['Date']
    print(str(index) + "\t" + event_date + "\t" + event_name)
    try:
        url_request = requests.get(event_url).text
        soup = BeautifulSoup(url_request, 'html.parser')
        data = []
        table = soup.find('table',{'class':'toccolours'})
        table_rows = table.find_all('tr')
        for row in table_rows:
            data.append([t.text.strip() for t in row.find_all('td')]) 
        df = pd.DataFrame(data, columns=['Weight class', 'Fighter1', 'Result', 'Fighter2', 'Method', 'Round', 'Time','Note'])
        df = df[~df['Weight class'].isnull()]
        df.insert(0, "Date", event_date) 
        df.insert(1, "Event", event_name)
    except:
        print("Error")
#     df.to_csv("data/"+ event_name + ".csv", index=False)
    result_df = result_df.append(df, ignore_index=True)

In [None]:
# Initial save of incomplete data
# result_df.to_csv("data/ufc_matches.csv", index=False)
# result_df.to_json("data/ufc_matches.json",orient='records')

In [None]:
print(len(result_df))
print(len(result_df['Event'].unique()))

In [None]:
# Need to inspect these pages
inspect_df = pd.DataFrame(np.array([['Apr 20, 2013', 'UFC on Fox: Henderson vs. Melendez','2'],
                                    ['Jan 19, 2013','UFC on FX: Belfort vs. Bisping','1'],
                                    ['Jul 21, 2012','UFC 149: Faber vs. Barão','5'],
                                    ['Jun 8, 2012','UFC on FX: Johnson vs. McCall','4'],
                                    ['May 15, 2012','UFC on Fuel TV: The Korean Zombie vs. Poirier','3'], 
                                    ['Feb 15, 2012','UFC on Fuel TV: Sanchez vs. Ellenberger','2'],
                                    ['Jan 28, 2012','UFC on Fox: Evans vs. Davis','1'], 
                                    ['Apr 5, 2007','UFC Fight Night: Stevenson vs. Guillard','1'], 
                                    ['Aug 6, 2005','UFC Ultimate Fight Night','-1']]), #no wiki page exists
                   columns=['date', 'event_name','table_number'])
supp_result_df = pd.DataFrame()

In [None]:
for index in range(0,8):
    try:
        event_date = inspect_df.iloc[index,0]
        event_name = inspect_df.iloc[index,1]
        table_number = int(inspect_df.iloc[index,2])
        event_url = events_occurred_df[events_occurred_df['Date']==event_date]['wikipage'].values[0]
        print(event_url)
        url_request = requests.get(event_url).text
        soup = BeautifulSoup(url_request, 'html.parser')
        data = []
        table = soup.findAll('table',{'class':'toccolours'})
        table_rows = table[table_number].find_all('tr')
        for row in table_rows:
            data.append([t.text.strip() for t in row.find_all('td')]) 
        df = pd.DataFrame(data, columns=['Weight class', 'Fighter1', 'Result', 'Fighter2', 'Method', 'Round', 'Time','Note'])
        df = df[~df['Weight class'].isnull()]
        df.insert(0, "Date", event_date) 
        df.insert(1, "Event", event_name)
        print(df.iloc[0,0:6])
    except:
        print("Error")
    supp_result_df = supp_result_df.append(df, ignore_index=True)

In [None]:
print(len(supp_result_df))
print(len(supp_result_df['Event'].unique()))

In [None]:
supp_result_df[supp_result_df['Date']=='Feb 15, 2012']

In [None]:
# Find info on this event since no wiki page exists
# Manually add info for this event
event_date = 'Aug 6, 2005'
event_name = 'UFC Ultimate Fight Night'
# https://www.ufc.com/event/UFC-Fight-Night-1
# https://www.sherdog.com/events/UFC-Fight-Night-1-Marquardt-vs-Salaverry-3100
data_array = np.array([
    ['Middleweight', 'Nate Marquardt', 'def.', 'Ivan Salaverry', 'Decision (unanimous) (30-27, 30-27, 29-28)', '3', '5:00', ''],
    ['Middleweight', 'Chris Leben', 'def.', 'Patrick Cote', 'Decision (split) (29-28, 30-27, 27-29)', '3', '5:00', ''],
    ['Light Heavyweight', 'Stephan Bonnar', 'def.', 'Sam Hoger', 'Decision (unanimous) (30-27, 30-27, 30-27)', '3', '5:00', ''],
    ['Middleweight', 'Nate Quarry', 'def.', 'Pete Sell', ' TKO (punch)', '1', '0:42', ''],
    ['Welterweight', 'Josh Koscheck', 'def.', 'Pete Spratt', 'Submission (rear-naked choke)', '1', '1:53', ''],
    ['Middleweight', 'Mike Swick', 'def.', 'Gideon Ray', 'TKO (punches)', '1', '0:22', ''],
    ['Welterweight', 'Kenny Florian', 'def.', 'Alex Karalexis', 'TKO (doctor stoppage)', '2', '2:52', ''],
    ['Welterweight', 'Drew Fickett', 'def.', 'Josh Neer', 'Submission (rear-naked choke)', '1', '1:35', '']])

In [None]:
df = pd.DataFrame(data_array,
                  columns=['Weight class', 'Fighter1', 'Result', 'Fighter2', 'Method', 'Round', 'Time','Note'])
df.insert(0, "Date", event_date) 
df.insert(1, "Event", event_name)
df

In [None]:
supp_result_df = supp_result_df.append(df)
print(len(supp_result_df))
print(len(supp_result_df['Event'].unique()))

In [None]:
result_df = result_df.append(supp_result_df)
print(len(result_df))
print(len(result_df['Event'].unique()))

In [None]:
# Remove any duplicate rows that might have gathered
result_df = result_df.drop_duplicates()

In [None]:
# Save ufc matches to file
result_df.to_csv("data/ufc_matches.csv", index=False)
result_df.to_json("data/ufc_matches.json",orient='records')

## Scrape list of scheduled events

In [10]:
# Collect list of scheduled events
url = 'https://en.wikipedia.org/wiki/List_of_UFC_events'
url_request = requests.get(url).text
soup = BeautifulSoup(url_request, 'html.parser')

scheduled_events_data = []
scheduled_table = soup.find('table',{'id':'Scheduled_events'})
scheduled_table_rows = scheduled_table.find_all('tr')
for row in scheduled_table_rows[1:]:
    scheduled_events_data.append([t.text.strip() for t in row.find_all('td')]) 
            
scheduled_events_df = pd.DataFrame(scheduled_events_data, columns=['Event', 'Original Date', 'Venue', 'Location', 'Ref','Notes'])

In [11]:
scheduled_events_df

Unnamed: 0,Event,Original Date,Venue,Location,Ref,Notes
0,UFC Fight Night,"Aug 15, 2020",3Arena,"Dublin, Ireland",[9],Postponed
1,UFC Fight Night 178,"July 18, 2020",TBD,TBD,[10],Planned
2,UFC 251,"July 11, 2020",TBD,TBD,[11],Planned
3,UFC on ESPN 10,"Jun 27, 2020",TBD,TBD,[12],Planned
4,UFC Fight Night 177,"Jun 20, 2020",TBD,TBD,[12],Planned
5,UFC Fight Night 176,"Jun 13, 2020",TBD,TBD,[13],Planned
6,UFC 250: Nunes vs. Spencer,"Jun 6, 2020",UFC APEX,"Las Vegas, Nevada, U.S.",[14],Planned


In [12]:
links = []
for row in scheduled_table_rows[1:]:
#     links.append([t.get('href') for t in row.find_all('a')]) 
    links.append("https://en.wikipedia.org" + row.find('a').get('href'))
scheduled_events_df.insert(2, "wikipage", links) 

In [13]:
scheduled_events_df.to_csv("data/list_of_UFC_scheduled_events.csv",index=False)

## Get links to posters of past events

In [None]:
# Read list of past UFC events
events_df = pd.read_csv("data/list_of_UFC_past_events.csv")
events_occurred_df = events_df[events_df['Attendance']!="Cancelled"]
# len(events_occurred_df[279:])
events_occurred_df = events_occurred_df.reset_index(drop=True)
events_occurred_df

In [None]:
# Get event posters
posters_df = pd.DataFrame()   
for index, row in events_occurred_df.iterrows():
    event_name = row['Event']
    event_url = row['wikipage']
    event_date = row['Date']
    print(str(index) + "\t" + event_date + "\t" + event_name)
#     print(event_url)
    try:
        url_request = requests.get(event_url).text
        soup = BeautifulSoup(url_request, 'html.parser')
        data = []
#         table = soup.find('table',{'class':'infobox'})
#         images = table.find('img')
        images = soup.find('img')
        image_link = "http:" + images['src']
#         print(image_link)
        data.append(image_link) 
        df = pd.DataFrame(data, columns=['poster_url'])
        df.insert(0, "Date", event_date) 
        df.insert(1, "Event", event_name)
        df.insert(2, "wikipage", event_url)
    except:
        print("Error")
        df = pd.DataFrame(["Not Found"], columns=['poster_url'])
        df.insert(0, "Date", event_date) 
        df.insert(1, "Event", event_name)
        df.insert(2, "wikipage", event_url)
    posters_df = posters_df.append(df, ignore_index=True)

In [None]:
posters_df['Date']= pd.to_datetime(posters_df['Date'],format= '%b %d, %Y')
posters_df = posters_df.sort_values('Date').reset_index(drop=True)
# posters_df[posters_df['poster_url']=="Not Found"]
# posters_df

In [None]:
# posters_df.loc[posters_df['Event'] == "UFC on Fox: Henderson vs. Melendez", 'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/8/84/UFC_on_Fox_Henderson_vs._Melendez_Poster.gif/220px-UFC_on_Fox_Henderson_vs._Melendez_Poster.gif"
# posters_df.loc[posters_df['Event'] == "UFC on FX: Belfort vs. Bisping", 'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/f/f5/UFC_on_FX_Belfort_vs._Bisping_poster.jpg/220px-UFC_on_FX_Belfort_vs._Bisping_poster.jpg"
# posters_df.loc[posters_df['Event'] == "UFC 149: Faber vs. Barão", 'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/f/f4/UFC_149_Faber_vs._Bar%C3%A3o_poster.jpg/220px-UFC_149_Faber_vs._Bar%C3%A3o_poster.jpg"
# posters_df.loc[posters_df['Event'] == "UFC on FX: Johnson vs. McCall", 'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/e/ea/UFC_on_FX_Johnson_vs._McCall.jpg/220px-UFC_on_FX_Johnson_vs._McCall.jpg"
# posters_df.loc[posters_df['Event'] == "UFC on Fuel TV: The Korean Zombie vs. Poirier", 'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/4/4c/UFC_on_Fuel_TV_Korean_Zombie_vs._Poirier_poster.jpg/220px-UFC_on_Fuel_TV_Korean_Zombie_vs._Poirier_poster.jpg"
# posters_df.loc[posters_df['Event'] == "UFC on Fuel TV: Sanchez vs. Ellenberger", 'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/4/4b/UFC_on_Fuel_TV_Sanchez_vs._Ellenberger_poster.jpg/220px-UFC_on_Fuel_TV_Sanchez_vs._Ellenberger_poster.jpg"
# posters_df.loc[posters_df['Event'] == "UFC on Fox: Evans vs. Davis", 'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/b/b3/UFC_on_Fox_Evans_vs._Davis_poster.jpg/220px-UFC_on_Fox_Evans_vs._Davis_poster.jpg"
# posters_df.loc[posters_df['Event'] == "UFC Fight Night: Stevenson vs. Guillard", 'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/b/bb/UFCFIGHTNIGHT9.jpg/220px-UFCFIGHTNIGHT9.jpg"

# posters_df.loc[posters_df['Event'] == "The Ultimate Fighter: Team Couture vs. Team Liddell Finale", 'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/8/83/TUF_1_Finale_Poster_-_Fitness_Philippines.jpg/220px-TUF_1_Finale_Poster_-_Fitness_Philippines.jpg"
# posters_df.loc[posters_df['Event'] == "The Ultimate Fighter: Team Hughes vs. Team Franklin Finale", 'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/c/c9/Tuf2Finale.jpg"
# posters_df.loc[posters_df['Event'] == "The Ultimate Fighter: Team Ortiz vs. Team Shamrock Finale", 'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/f/f0/Tuf-3-finale.jpg/220px-Tuf-3-finale.jpg"
# posters_df.loc[posters_df['Event'] == "The Ultimate Fighter: The Comeback Finale", 'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/4/42/UltimateFighter4.jpg/220px-UltimateFighter4.jpg"
# posters_df.loc[posters_df['Event'] == "The Ultimate Fighter: Team Pulver vs. Team Penn Finale", 'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/a/a0/TUF_5_Finale.jpg/220px-TUF_5_Finale.jpg"
# posters_df.loc[posters_df['Event'] == "The Ultimate Fighter: Team Hughes vs. Team Serra Finale", 'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/5/51/Ultimatefighter6.jpg/220px-Ultimatefighter6.jpg"
# posters_df.loc[posters_df['Event'] == "The Ultimate Fighter: Team Rampage vs. Team Forrest Finale", 'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/5/5d/Ultimatefighter7.jpg/220px-Ultimatefighter7.jpg"
# posters_df.loc[posters_df['Event'] == "The Ultimate Fighter: United States vs. United Kingdom Finale", 'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/a/a2/UFCSanchezGuida.jpg"
# posters_df.loc[posters_df['Event'] == "UFC on FX: Sotiropoulos vs. Pearson", 'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/8/81/The_smashes_finale.png/220px-The_smashes_finale.png"
# posters_df.loc[posters_df['Event'] == "The Ultimate Fighter Nations Finale: Bisping vs. Kennedy", 'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/1/1b/TUF_Nations_Finale_event_poster.jpg/220px-TUF_Nations_Finale_event_poster.jpg"
# posters_df.loc[posters_df['Event'] == "The Ultimate Fighter: A Champion Will Be Crowned Finale", 'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/1/1a/TUF_20_finale_event_poster.jpg"
# posters_df.loc[posters_df['Event'] == "The Ultimate Fighter Latin America 3 Finale: dos Anjos vs. Ferguson", 'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/3/37/TUF_LA_3_Finale.jpg"

# Missing wiki page
# posters_df.loc[posters_df['Event'] == "UFC Ultimate Fight Night", 'poster_url'] = "https://m.media-amazon.com/images/M/MV5BMTBjMjJjMWEtYTc3Yi00YTIyLWEyMDMtMjg5NmIyYTFlMmJmXkEyXkFqcGdeQXVyNDczMDU5Nw@@._V1_.jpg"

In [None]:
# event_url = posters_df[posters_df['poster_url'].str.contains("Flag")]['wikipage'].iloc[6]
# url_request = requests.get(event_url).text
# soup = BeautifulSoup(url_request, 'html.parser')
# images = soup.find_all('img')
# for image in images:
#     image_link = "http:" + image['src']
#     print(image_link) 

In [None]:
# Writer posters_df to file
posters_df.to_csv("data/ufc_posters.csv", index=True)

In [112]:
# Load posters_df
posters_df = pd.read_csv("data-mapping/ufc_posters.csv")

In [113]:
posters_df = posters_df.drop(columns=['id'])

# Insert UFC 250
# new_row = {'Date':'2020-06-06', 'Event':'UFC 250: Nunes vs. Spencer',
#            'poster_url':'https://upload.wikimedia.org/wikipedia/en/d/db/UFC_250_official_poster.jpg',
#           'wikipage':'https://en.wikipedia.org/wiki/UFC_250'}
# posters_df = posters_df.append(new_row, ignore_index=True)
posters_df

Unnamed: 0,Date,Event,poster_url,wikipage
0,1993-11-12,UFC 1: The Beginning,http://upload.wikimedia.org/wikipedia/en/thumb/8/83/UFC1vhs.jpg/220px-UFC1vhs.jpg,https://en.wikipedia.org/wiki/UFC_1
1,1994-03-11,UFC 2: No Way Out,http://upload.wikimedia.org/wikipedia/en/thumb/2/2e/UFC2poster.jpg/220px-UFC2poster.jpg,https://en.wikipedia.org/wiki/UFC_2
2,1994-09-09,UFC 3: The American Dream,http://upload.wikimedia.org/wikipedia/en/thumb/c/c0/UFC_3_promotional_poster.jpg/220px-UFC_3_promotional_poster.jpg,https://en.wikipedia.org/wiki/UFC_3
3,1994-12-16,UFC 4: Revenge of the Warriors,http://upload.wikimedia.org/wikipedia/en/thumb/6/61/Ufc4.gif/220px-Ufc4.gif,https://en.wikipedia.org/wiki/UFC_4
4,1995-04-07,UFC 5: The Return of the Beast,http://upload.wikimedia.org/wikipedia/en/thumb/8/84/Ufc_5.jpg/220px-Ufc_5.jpg,https://en.wikipedia.org/wiki/UFC_5
...,...,...,...,...
513,2020-05-09 00:00:00,UFC 249 Ferguson vs. Gaethje,http://upload.wikimedia.org/wikipedia/en/thumb/6/61/Verified_UFC_249_Poster.jpg/220px-Verified_UFC_249_Poster.jpg,https://en.wikipedia.org/wiki/UFC_249
514,2020-05-13 00:00:00,UFC Fight Night Smith vs. Teixeira,http://upload.wikimedia.org/wikipedia/en/thumb/d/de/UFC_Fight_Night-_Smith_v_Teixeira.jpeg/220px-UFC_Fight_Night-_Smith_v_Teixeira.jpeg,https://en.wikipedia.org/wiki/UFC_Fight_Night:_Smith_vs._Teixeira
515,2020-05-16 00:00:00,UFC on ESPN Overeem vs. Harris,http://upload.wikimedia.org/wikipedia/en/thumb/9/98/Overeem_vs_Harris_official_poster.jpg/220px-Overeem_vs_Harris_official_poster.jpg,https://en.wikipedia.org/wiki/UFC_on_ESPN:_Overeem_vs._Harris
516,2020-05-30 00:00:00,UFC_Fight_Night_176 Woodley vs. Burns,http://upload.wikimedia.org/wikipedia/en/thumb/a/a6/UFC_Fight_Night-_Woodley_vs_Burns.png/220px-UFC_Fight_Night-_Woodley_vs_Burns.png,https://en.wikipedia.org/wiki/UFC_Fight_Night_176


In [124]:
# posters_df[posters_df['poster_url'].isna()]
# posters_df[posters_df['poster_url'].str.contains('Logo_of_the_Ultimate_Fighting_Championship')]
# posters_df[posters_df['poster_url'].str.contains('Symbol_support_vote')]
# posters_df[posters_df['poster_url'].str.contains('icon')]
posters_df[posters_df['Event'].str.contains('Finale')]['poster_url']
# posters_df[posters_df['Event'].str.contains('UFC on Fuel TV: Sanchez vs. Ellenberger')]

56     http://upload.wikimedia.org/wikipedia/en/thumb/8/83/TUF_1_Finale_Poster_-_Fitness_Philippines.jpg/220px-TUF_1_Finale_Poster_-_Fitness_Philippines.jpg                                        
63     http://upload.wikimedia.org/wikipedia/en/c/c9/Tuf2Finale.jpg                                                                                                                                 
71     http://upload.wikimedia.org/wikipedia/en/thumb/f/f0/Tuf-3-finale.jpg/220px-Tuf-3-finale.jpg                                                                                                  
79     http://upload.wikimedia.org/wikipedia/en/thumb/4/42/UltimateFighter4.jpg/220px-UltimateFighter4.jpg                                                                                          
92     http://upload.wikimedia.org/wikipedia/en/thumb/a/a0/TUF_5_Finale.jpg/220px-TUF_5_Finale.jpg                                                                                                  
100    http://u

In [130]:
# posters_df.loc[30,'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/4/4e/UFC_27_poster.jpg/220px-UFC_27_poster.jpg"
# posters_df.loc[59,'poster_url'] = "https://m.media-amazon.com/images/M/MV5BMTBjMjJjMWEtYTc3Yi00YTIyLWEyMDMtMjg5NmIyYTFlMmJmXkEyXkFqcGdeQXVyNDczMDU5Nw@@._V1_.jpg"
# posters_df.loc[84,'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/8/81/UFC67.jpg/220px-UFC67.jpg"
# posters_df.loc[86,'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/b/bb/UFCFIGHTNIGHT9.jpg/220px-UFCFIGHTNIGHT9.jpg"
# posters_df.loc[132,'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/f/ff/UFC100lesnarmir.jpg"
# posters_df.loc[142,'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/4/49/Ufc_108_poster.jpg/220px-Ufc_108_poster.jpg"
# posters_df.loc[169,'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/2/29/UFC_127_poster.jpg/220px-UFC_127_poster.jpg"
# posters_df.loc[171,'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/d/d4/UFC128ShogunvsJones.jpg/220px-UFC128ShogunvsJones.jpg"
# posters_df.loc[218,'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/e/ec/UFConFUEL6.jpg/220px-UFConFUEL6.jpg"
# posters_df.loc[333,'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/8/87/UFN_75_poster.jpg"
# posters_df.loc[123,'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/c/c6/Ufc_94_poster.jpg"
# posters_df.loc[209,'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/7/71/UFC_148_Event_Poster.jpg/220px-UFC_148_Event_Poster.jpg"
# posters_df.loc[195,'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/b/b3/UFC_on_Fox_Evans_vs._Davis_poster.jpg/220px-UFC_on_Fox_Evans_vs._Davis_poster.jpg"
# posters_df.loc[203,'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/4/4c/UFC_on_Fuel_TV_Korean_Zombie_vs._Poirier_poster.jpg/220px-UFC_on_Fuel_TV_Korean_Zombie_vs._Poirier_poster.jpg"
# posters_df.loc[206,'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/e/ea/UFC_on_FX_Johnson_vs._McCall.jpg/220px-UFC_on_FX_Johnson_vs._McCall.jpg"
# posters_df.loc[211,'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/f/f4/UFC_149_Faber_vs._Bar%C3%A3o_poster.jpg/220px-UFC_149_Faber_vs._Bar%C3%A3o_poster.jpg"
# posters_df.loc[224,'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/f/f5/UFC_on_FX_Belfort_vs._Bisping_poster.jpg/220px-UFC_on_FX_Belfort_vs._Bisping_poster.jpg"
# posters_df.loc[233,'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/8/84/UFC_on_Fox_Henderson_vs._Melendez_Poster.gif/220px-UFC_on_Fox_Henderson_vs._Melendez_Poster.gif"
# posters_df.loc[120,'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/2/22/Tuf8-finale-poster.jpg/220px-Tuf8-finale-poster.jpg"
# posters_df.loc[499,'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/8/84/UFC_244_Poster.jpg/220px-UFC_244_Poster.jpg"
# posters_df.loc[36,'poster_url'] = "https://images.tapology.com/poster_images/155/profile/UFC_33_Victory_In_Vegas_Poster.JPG?1300686162"

# posters_df.loc[514,'poster_url'] = "https://dtvimages.hs.llnwd.net/e1//db_photos/showcards/v5/AllPhotos/18256516/p18256516_b_v5_aa.jpg"
# posters_df.loc[164,'poster_url'] = "https://upload.wikimedia.org/wikipedia/en/thumb/f/f8/Tuf_12_poster.jpg/220px-Tuf_12_poster.jpg"
# posters_df.loc[197,'poster_url'] = "http://upload.wikimedia.org/wikipedia/en/thumb/4/4b/UFC_on_Fuel_TV_Sanchez_vs._Ellenberger_poster.jpg/220px-UFC_on_Fuel_TV_Sanchez_vs._Ellenberger_poster.jpg"

# posters_df.loc[100,'poster_url'] = "https://i.pinimg.com/474x/6e/fa/01/6efa01c878ff09456dfad296643704a8--ufc-combate-ufc-events.jpg"
# posters_df.loc[140,'poster_url'] = "https://upload.wikimedia.org/wikipedia/en/d/d5/Tuf_10_finale_newsletter_poster.jpg"
# posters_df.loc[153,'poster_url'] = "https://upload.wikimedia.org/wikipedia/en/1/16/TUF_11_Poster.jpg"
# posters_df.loc[175,'poster_url'] = "https://upload.wikimedia.org/wikipedia/en/9/91/TUF13Finale.jpg"


In [131]:
posters_df.to_csv("data-mapping/ufc_posters.csv", index_label = 'id', index=True)

In [132]:
# event_url = posters_df.loc[100]['wikipage']
# url_request = requests.get(event_url).text
# soup = BeautifulSoup(url_request, 'html.parser')
# images = soup.find_all('img')
# for image in images:
#     image_link = "http:" + image['src']
#     print(image_link) 
