## Set up

In [1]:
# load the packages
import requests
from bs4 import BeautifulSoup

In [2]:
# Define the URL of the site
base_site = "https://editorial.rottentomatoes.com/guide/140-essential-action-movies-to-watch-now/2/"

In [3]:
# sending a request to the webpage
response = requests.get(base_site)
response.status_code

200

In [4]:
# get the HTML from the webpage
html = response.content

In [5]:
# convert the HTML to a BeatifulSoup object
soup = BeautifulSoup(html, 'lxml')

In [6]:
# Find all div tags on the webpage containing the information we want to scrape
divs = soup.find_all("div", {"class": "col-sm-18 col-full-xs countdown-item-content"})

In [7]:
# Extracting all 'h2' tags
headings = [div.find("h2") for div in divs]
headings

[<h2><a href="https://www.rottentomatoes.com/m/13_assassins_2011/">13 Assassins</a> <span class="subtle start-year">(2011)</span> <span class="icon tiny certified" title="Certified Fresh"></span> <span class="tMeterScore">95%</span></h2>,
 <h2><a href="https://www.rottentomatoes.com/m/full_contact/">Full Contact</a> <span class="subtle start-year">(1992)</span> <span class="icon tiny fresh" title="Fresh"></span> <span class="tMeterScore">88%</span></h2>,
 <h2><a href="https://www.rottentomatoes.com/m/indiana_jones_and_the_last_crusade/">Indiana Jones and the Last Crusade</a> <span class="subtle start-year">(1989)</span> <span class="icon tiny certified" title="Certified Fresh"></span> <span class="tMeterScore">88%</span></h2>,
 <h2><a href="https://www.rottentomatoes.com/m/kung_fu_hustle/">Kung Fu Hustle</a> <span class="subtle start-year">(2005)</span> <span class="icon tiny certified" title="Certified Fresh"></span> <span class="tMeterScore">90%</span></h2>,
 <h2><a href="https://www

## Extracting the scores

In [8]:
# Filtering only the spans containing the score
[heading.find("span", class_ = 'tMeterScore') for heading in headings]

[<span class="tMeterScore">95%</span>,
 <span class="tMeterScore">88%</span>,
 <span class="tMeterScore">88%</span>,
 <span class="tMeterScore">90%</span>,
 <span class="tMeterScore">93%</span>,
 <span class="tMeterScore">94%</span>,
 <span class="tMeterScore">90%</span>,
 <span class="tMeterScore">93%</span>,
 <span class="tMeterScore">97%</span>,
 <span class="tMeterScore">98%</span>,
 <span class="tMeterScore">93%</span>,
 <span class="tMeterScore">92%</span>,
 <span class="tMeterScore">90%</span>,
 <span class="tMeterScore">82%</span>,
 <span class="tMeterScore">98%</span>,
 <span class="tMeterScore">81%</span>,
 <span class="tMeterScore">88%</span>,
 <span class="tMeterScore">96%</span>,
 <span class="tMeterScore">91%</span>,
 <span class="tMeterScore">90%</span>,
 <span class="tMeterScore">85%</span>,
 <span class="tMeterScore">96%</span>,
 <span class="tMeterScore">97%</span>,
 <span class="tMeterScore">87%</span>,
 <span class="tMeterScore">77%</span>,
 <span class="tMeterScore

In [9]:
# Extracting the score string
scores = [heading.find("span", class_ = 'tMeterScore').string for heading in headings]
scores

['95%',
 '88%',
 '88%',
 '90%',
 '93%',
 '94%',
 '90%',
 '93%',
 '97%',
 '98%',
 '93%',
 '92%',
 '90%',
 '82%',
 '98%',
 '81%',
 '88%',
 '96%',
 '91%',
 '90%',
 '85%',
 '96%',
 '97%',
 '87%',
 '77%',
 '90%',
 '94%',
 '79%',
 '83%',
 '85%',
 '92%',
 '91%',
 '94%',
 '93%',
 '77%',
 '82%',
 '66%',
 '89%',
 '89%',
 '95%',
 '93%',
 '100%',
 '98%',
 '80%',
 '94%',
 '70%',
 '87%',
 '93%',
 '100%',
 '76%',
 '85%',
 '73%',
 '94%',
 '83%',
 '86%',
 '97%',
 '81%',
 '92%',
 '82%',
 '95%',
 '86%',
 '86%',
 '97%',
 '95%',
 '99%',
 '94%',
 '88%',
 '93%',
 '93%',
 '97%']

In [10]:
# Removing the '%' sign
scores = [s.strip('%') for s in scores]
scores

['95',
 '88',
 '88',
 '90',
 '93',
 '94',
 '90',
 '93',
 '97',
 '98',
 '93',
 '92',
 '90',
 '82',
 '98',
 '81',
 '88',
 '96',
 '91',
 '90',
 '85',
 '96',
 '97',
 '87',
 '77',
 '90',
 '94',
 '79',
 '83',
 '85',
 '92',
 '91',
 '94',
 '93',
 '77',
 '82',
 '66',
 '89',
 '89',
 '95',
 '93',
 '100',
 '98',
 '80',
 '94',
 '70',
 '87',
 '93',
 '100',
 '76',
 '85',
 '73',
 '94',
 '83',
 '86',
 '97',
 '81',
 '92',
 '82',
 '95',
 '86',
 '86',
 '97',
 '95',
 '99',
 '94',
 '88',
 '93',
 '93',
 '97']

In [11]:
# Converting each score to an integer
scores = [int(s) for s in scores]
scores

[95,
 88,
 88,
 90,
 93,
 94,
 90,
 93,
 97,
 98,
 93,
 92,
 90,
 82,
 98,
 81,
 88,
 96,
 91,
 90,
 85,
 96,
 97,
 87,
 77,
 90,
 94,
 79,
 83,
 85,
 92,
 91,
 94,
 93,
 77,
 82,
 66,
 89,
 89,
 95,
 93,
 100,
 98,
 80,
 94,
 70,
 87,
 93,
 100,
 76,
 85,
 73,
 94,
 83,
 86,
 97,
 81,
 92,
 82,
 95,
 86,
 86,
 97,
 95,
 99,
 94,
 88,
 93,
 93,
 97]