## Set up

In [1]:
# load the packages
import requests
from bs4 import BeautifulSoup

In [2]:
# Define the URL of the site
base_site = "https://editorial.rottentomatoes.com/guide/140-essential-action-movies-to-watch-now/2/"

In [3]:
# sending a request to the webpage
response = requests.get(base_site)
response.status_code

200

In [4]:
# get the HTML from the webpage
html = response.content

In [12]:
# convert the HTML to a BeatifulSoup object
soup = BeautifulSoup(html, 'lxml')

with open('Rotton Tomatoes with lxml.html','wb') as file:
    file.write(soup.prettify('utf-8'))

In [6]:
# Find all div tags on the webpage containing the information we want to scrape
divs = soup.find_all("div", {"class": "col-sm-18 col-full-xs countdown-item-content"})

In [7]:
# Extracting all 'h2' tags
headings = [div.find("h2") for div in divs]
headings

[<h2><a href="https://www.rottentomatoes.com/m/1018009-running_scared/">Running Scared</a> <span class="subtle start-year">(1986)</span> <span class="icon tiny fresh" title="Fresh"></span> <span class="tMeterScore">60%</span></h2>,
 <h2><a href="https://www.rottentomatoes.com/m/equilibrium/">Equilibrium</a> <span class="subtle start-year">(2002)</span> <span class="icon tiny rotten" title="Rotten"></span> <span class="tMeterScore">40%</span></h2>,
 <h2><a href="https://www.rottentomatoes.com/m/hero/">Hero</a> <span class="subtle start-year">(2004)</span> <span class="icon tiny certified" title="Certified Fresh"></span> <span class="tMeterScore">95%</span></h2>,
 <h2><a href="https://www.rottentomatoes.com/m/1017666-road_house/">Road House</a> <span class="subtle start-year">(1989)</span> <span class="icon tiny rotten" title="Rotten"></span> <span class="tMeterScore">39%</span></h2>,
 <h2><a href="https://www.rottentomatoes.com/m/unstoppable-2010/">Unstoppable</a> <span class="subtle st

## Extracting the scores

In [14]:
# Filtering only the spans containing the score
score=[]
for x in headings:
    score.append(x.find('span',class_='tMeterScore'))
score

[<span class="tMeterScore">60%</span>,
 <span class="tMeterScore">40%</span>,
 <span class="tMeterScore">95%</span>,
 <span class="tMeterScore">39%</span>,
 <span class="tMeterScore">86%</span>,
 <span class="tMeterScore">88%</span>,
 <span class="tMeterScore">84%</span>,
 <span class="tMeterScore">69%</span>,
 <span class="tMeterScore">68%</span>,
 <span class="tMeterScore">46%</span>,
 <span class="tMeterScore">53%</span>,
 <span class="tMeterScore">92%</span>,
 <span class="tMeterScore">93%</span>,
 <span class="tMeterScore">96%</span>,
 <span class="tMeterScore">54%</span>,
 <span class="tMeterScore">55%</span>,
 <span class="tMeterScore">68%</span>,
 <span class="tMeterScore">60%</span>,
 <span class="tMeterScore">60%</span>,
 <span class="tMeterScore">60%</span>,
 <span class="tMeterScore">59%</span>,
 <span class="tMeterScore">91%</span>,
 <span class="tMeterScore">76%</span>,
 <span class="tMeterScore">38%</span>,
 <span class="tMeterScore">55%</span>,
 <span class="tMeterScore

In [15]:
# Extracting the score string
score_str = [s.string for s in score]
score_str

['60%',
 '40%',
 '95%',
 '39%',
 '86%',
 '88%',
 '84%',
 '69%',
 '68%',
 '46%',
 '53%',
 '92%',
 '93%',
 '96%',
 '54%',
 '55%',
 '68%',
 '60%',
 '60%',
 '60%',
 '59%',
 '91%',
 '76%',
 '38%',
 '55%',
 '43%',
 '52%',
 '63%',
 '63%',
 '67%',
 '61%',
 '72%',
 '92%',
 '72%',
 '79%',
 '65%',
 '97%',
 '71%',
 '94%',
 '68%',
 '86%',
 '68%',
 '92%',
 '91%',
 '89%',
 '63%',
 '93%',
 '69%',
 '69%',
 '91%',
 '58%',
 '60%',
 '70%',
 '62%',
 '51%',
 '93%',
 '73%',
 '74%',
 '71%',
 '77%',
 '79%',
 '80%',
 '80%',
 '82%',
 '85%',
 '86%',
 '91%',
 '86%',
 '87%',
 '93%',
 '95%',
 '88%',
 '88%',
 '90%',
 '93%',
 '94%',
 '90%',
 '93%',
 '98%',
 '98%',
 '93%',
 '92%',
 '90%',
 '82%',
 '98%',
 '81%',
 '88%',
 '96%',
 '89%',
 '90%',
 '85%',
 '96%',
 '97%',
 '87%',
 '77%',
 '90%',
 '94%',
 '79%',
 '83%',
 '85%',
 '92%',
 '91%',
 '94%',
 '93%',
 '77%',
 '82%',
 '66%',
 '89%',
 '89%',
 '95%',
 '93%',
 '100%',
 '98%',
 '80%',
 '94%',
 '71%',
 '87%',
 '93%',
 '100%',
 '76%',
 '85%',
 '73%',
 '94%',
 '83%',
 '86%'

In [17]:
# Removing the '%' sign
score_str = [s.strip('%') for s in score_str]
score_str

['60',
 '40',
 '95',
 '39',
 '86',
 '88',
 '84',
 '69',
 '68',
 '46',
 '53',
 '92',
 '93',
 '96',
 '54',
 '55',
 '68',
 '60',
 '60',
 '60',
 '59',
 '91',
 '76',
 '38',
 '55',
 '43',
 '52',
 '63',
 '63',
 '67',
 '61',
 '72',
 '92',
 '72',
 '79',
 '65',
 '97',
 '71',
 '94',
 '68',
 '86',
 '68',
 '92',
 '91',
 '89',
 '63',
 '93',
 '69',
 '69',
 '91',
 '58',
 '60',
 '70',
 '62',
 '51',
 '93',
 '73',
 '74',
 '71',
 '77',
 '79',
 '80',
 '80',
 '82',
 '85',
 '86',
 '91',
 '86',
 '87',
 '93',
 '95',
 '88',
 '88',
 '90',
 '93',
 '94',
 '90',
 '93',
 '98',
 '98',
 '93',
 '92',
 '90',
 '82',
 '98',
 '81',
 '88',
 '96',
 '89',
 '90',
 '85',
 '96',
 '97',
 '87',
 '77',
 '90',
 '94',
 '79',
 '83',
 '85',
 '92',
 '91',
 '94',
 '93',
 '77',
 '82',
 '66',
 '89',
 '89',
 '95',
 '93',
 '100',
 '98',
 '80',
 '94',
 '71',
 '87',
 '93',
 '100',
 '76',
 '85',
 '73',
 '94',
 '83',
 '86',
 '97',
 '81',
 '92',
 '82',
 '95',
 '86',
 '86',
 '97',
 '95',
 '97',
 '94',
 '87',
 '93',
 '93',
 '97']

In [19]:
# Converting each score to an integer
score_int = [int(s) for s in score_str]
score_int

[60,
 40,
 95,
 39,
 86,
 88,
 84,
 69,
 68,
 46,
 53,
 92,
 93,
 96,
 54,
 55,
 68,
 60,
 60,
 60,
 59,
 91,
 76,
 38,
 55,
 43,
 52,
 63,
 63,
 67,
 61,
 72,
 92,
 72,
 79,
 65,
 97,
 71,
 94,
 68,
 86,
 68,
 92,
 91,
 89,
 63,
 93,
 69,
 69,
 91,
 58,
 60,
 70,
 62,
 51,
 93,
 73,
 74,
 71,
 77,
 79,
 80,
 80,
 82,
 85,
 86,
 91,
 86,
 87,
 93,
 95,
 88,
 88,
 90,
 93,
 94,
 90,
 93,
 98,
 98,
 93,
 92,
 90,
 82,
 98,
 81,
 88,
 96,
 89,
 90,
 85,
 96,
 97,
 87,
 77,
 90,
 94,
 79,
 83,
 85,
 92,
 91,
 94,
 93,
 77,
 82,
 66,
 89,
 89,
 95,
 93,
 100,
 98,
 80,
 94,
 71,
 87,
 93,
 100,
 76,
 85,
 73,
 94,
 83,
 86,
 97,
 81,
 92,
 82,
 95,
 86,
 86,
 97,
 95,
 97,
 94,
 87,
 93,
 93,
 97]