 The purpose of this script is to scrape a bleacher report website and obtain a list of wrestling names 🤼

In [1]:
from bs4 import BeautifulSoup
import requests
import re

In [2]:
# Make request to URL using requests package
url = 'https://bleacherreport.com/articles/1273228-pro-wrestlings-50-worststupidest-ring-names-of-all-time'
r = requests.get(url)

In [3]:
# The HTML as a python string (first five hundred characters)
r.text[:500]

'<!DOCTYPE html><html class="no-js" lang="en" data-reactroot=""><head><meta charSet="utf-8"/><meta http-equiv="Accept-CH" content="DPR,Width,Viewport-Width"/><meta name="aol-te-auth" content="1c424580-0f86-4d9b-88b2-bc8c0d029d4c"/><meta name="blitz" content="mu-6e4ce5cd-57f20d11-7c0ecee9-d55c79e2"/><meta name="msvalidate.01" content="7A63840181953B2A5A1FEA25FB45A991"/><meta name="robots" content="NOODP,NOYDIR"/><meta name="verify-v1" content="+Ntj422Jc4V03qgBqLYbF3LMvrursV0X2btn2Zoqn9w="/><meta n'

In [4]:
# Make the HTML searchable with BeautifulSoup package
soup = BeautifulSoup(r.text)

In [5]:
# Find all h1 HTML elements
soup.findAll('h1')

[<h1>Pro Wrestling's 50 Worst/Stupidest Ring Names of All Time</h1>,
 <h1>Pro Wrestling's 50 Worst/Stupidest Ring Names of All Time</h1>,
 <h1>50. Pegasus Kid</h1>,
 <h1>49. Shelton Benjamin</h1>,
 <h1>48. Lash LeRoux  </h1>,
 <h1>47. Pez Whatley</h1>,
 <h1>46. Test</h1>,
 <h1>45. Mean Mark Callous </h1>,
 <h1>44. Scotty 2 Hotty </h1>,
 <h1>43. Dolph Ziggler</h1>,
 <h1>42. Most Modern Divas</h1>,
 <h1>41. Giant Baba</h1>,
 <h1>40. Damien Demento  </h1>,
 <h1>39. Festus</h1>,
 <h1>38. Brutus “the Barber” Beefcake</h1>,
 <h1>37. Diamond Studd  </h1>,
 <h1>   36. Kung Fu Naki</h1>,
 <h1>35. Dump Matsumoto</h1>,
 <h1>34. Disco Inferno</h1>,
 <h1>33. Boogeyman</h1>,
 <h1>32. The Yeti</h1>,
 <h1>31. Loch Ness</h1>,
 <h1>30. Firebreaker Chip</h1>,
 <h1>29. Jimmy Wang Yang</h1>,
 <h1>28. Thurman “Sparky” Plugg  </h1>,
 <h1>27. The Red Rooster</h1>,
 <h1>26. Basham Brothers</h1>,
 <h1> 25. Justin Credible</h1>,
 <h1>24. Duke "the Dumpster" Droese</h1>,
 <h1>23. The Battman</h1>,
 <h1>22. Arachn

In [6]:
# Find all h1 elements with one or two digits followed by a period
# Introduction to regular expressions: https://docs.python.org/3/howto/regex.html
h1_elements = soup.findAll('h1', text=re.compile('^\d{1,2}.'))
h1_elements

[<h1>50. Pegasus Kid</h1>,
 <h1>49. Shelton Benjamin</h1>,
 <h1>48. Lash LeRoux  </h1>,
 <h1>47. Pez Whatley</h1>,
 <h1>46. Test</h1>,
 <h1>45. Mean Mark Callous </h1>,
 <h1>44. Scotty 2 Hotty </h1>,
 <h1>43. Dolph Ziggler</h1>,
 <h1>42. Most Modern Divas</h1>,
 <h1>41. Giant Baba</h1>,
 <h1>40. Damien Demento  </h1>,
 <h1>39. Festus</h1>,
 <h1>38. Brutus “the Barber” Beefcake</h1>,
 <h1>37. Diamond Studd  </h1>,
 <h1>35. Dump Matsumoto</h1>,
 <h1>34. Disco Inferno</h1>,
 <h1>33. Boogeyman</h1>,
 <h1>32. The Yeti</h1>,
 <h1>31. Loch Ness</h1>,
 <h1>30. Firebreaker Chip</h1>,
 <h1>29. Jimmy Wang Yang</h1>,
 <h1>28. Thurman “Sparky” Plugg  </h1>,
 <h1>27. The Red Rooster</h1>,
 <h1>26. Basham Brothers</h1>,
 <h1>24. Duke "the Dumpster" Droese</h1>,
 <h1>23. The Battman</h1>,
 <h1>22. Arachnaman</h1>,
 <h1>21. Chilly McFreeze</h1>,
 <h1>20. Irwin R Schyster</h1>,
 <h1>19. Max Moon</h1>,
 <h1>18. Mr. Pogo</h1>,
 <h1>17. Mantaur</h1>,
 <h1>16. Terra Ryzin  </h1>,
 <h1>15. Puke</h1>,
 <h1>14

In [7]:
# Clean the names by 
# a) Removing the leading digits and period (re.sub)
# b) stripping the leading and ending whitespace (strip)
names = []
for elem in h1_elements:
    clean_name = re.sub(r'^\d{1,2}. ', '', elem.text).strip()
    names.append(clean_name)

In [8]:
names

['Pegasus Kid',
 'Shelton Benjamin',
 'Lash LeRoux',
 'Pez Whatley',
 'Test',
 'Mean Mark Callous',
 'Scotty 2 Hotty',
 'Dolph Ziggler',
 'Most Modern Divas',
 'Giant Baba',
 'Damien Demento',
 'Festus',
 'Brutus “the Barber” Beefcake',
 'Diamond Studd',
 'Dump Matsumoto',
 'Disco Inferno',
 'Boogeyman',
 'The Yeti',
 'Loch Ness',
 'Firebreaker Chip',
 'Jimmy Wang Yang',
 'Thurman “Sparky” Plugg',
 'The Red Rooster',
 'Basham Brothers',
 'Duke "the Dumpster" Droese',
 'The Battman',
 'Arachnaman',
 'Chilly McFreeze',
 'Irwin R Schyster',
 'Max Moon',
 'Mr. Pogo',
 'Mantaur',
 'Terra Ryzin',
 'Puke',
 'Bastion Booger',
 'Big Dick Johnson',
 'M.I. Smooth',
 'Tugboat',
 'Meat',
 'Shaggy 2 Dope',
 'Shark Boy',
 'Henry O. Godwinn and Phinias I. Godwinn',
 'Ze Gangsta',
 'Isaac Yankem D.D.S.',
 'Beaver Cleavage',
 'Curry Man',
 'The Blue Meanie',
 'Ding Dongs']