# Step 1: Import Libraries

In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import lxml

# Step 2: Get Soup

In [2]:
base_url='https://editorial.rottentomatoes.com/guide/140-essential-action-movies-to-watch-now/'

In [3]:
response=requests.get(base_url)
response

<Response [200]>

In [4]:
html=response.content

In [5]:
soup= BeautifulSoup(html,'lxml')

In [6]:
#with open("Rotten tomatoes.html","wb") as file:
#    file.write(soup.prettify('utf-8'))

# Step 3: Get Info From Soup

In [7]:
#find unique class or "marker" from source code using inspect element, then use it to search through soup
rows=soup.find_all('div',class_='row countdown-item')
len(rows)

140

In [13]:
rows[0]

<div class="row countdown-item" id="row-index-140" style="padding-left: 10px;">
<div class="countdown-index-resposive">#140</div>
<div class="col-sm-6 col-full-xs">
<a class="article_movie_poster" href="https://www.rottentomatoes.com/m/1018009-running_scared/">
<div><img alt="" class="article_poster" sborder="" src="https://resizing.flixster.com/CmoPUfXGhLtGpC1YxPG62_ibzQE=/180x240/v1.bTsxMTYxNDg5MjtqOzE4Nzk2OzIwNDg7NjE4OzgyNA" style="border-color: #EEEEEE; border-style: solid; border-width: 1px;"/></div>
</a>
</div>
<div class="col-sm-18 col-full-xs countdown-item-content">
<div class="row countdown-item-title-bar">
<div class="col-sm-20 col-full-xs" style="height: 100%;">
<div class="article_movie_title" style="float: left;">
<div><h2><a href="https://www.rottentomatoes.com/m/1018009-running_scared/">Running Scared</a> <span class="subtle start-year">(1986)</span> <span class="icon tiny rotten" title="Rotten"></span> <span class="tMeterScore">57%</span></h2></div>
</div>
</div>
<div 

In [8]:
#extract heading "h2" from all rows and put them in list headings
headings= [row.find('h2') for row in rows]
headings[0]

<h2><a href="https://www.rottentomatoes.com/m/1018009-running_scared/">Running Scared</a> <span class="subtle start-year">(1986)</span> <span class="icon tiny rotten" title="Rotten"></span> <span class="tMeterScore">57%</span></h2>

### Titles

In [9]:
#extract titles from <a>
titles=[heading.find("a").string for heading in headings]
len(titles)

140

### Year

In [10]:
#extract year from <span>
years=[heading.find('span').string for heading in headings]
years[139]

'(2015)'

In [11]:
#strip the paranthesis
years=[year.strip('()') for year in years]
years[5]

'1971'

### Rating

In [14]:
#extract rating from second <span>
ratings=[heading.find('span',class_="tMeterScore").string for heading in headings]
ratings[139]

'97%'

### Critic Consensus

In [33]:
#extract critic consensus
critic_consensus= [row.find('div', class_="info critics-consensus").text for row in rows]
critic_consensus[0]

'Critics Consensus: Running Scared struggles to strike a consistent balance between violent action and humor, but the chemistry between its well-matched leads keeps things entertaining.'

In [35]:
# remove the common phrase in the beginning
common_phrase='Critics Consensus: '
commonlen=len(common_phrase)

In [41]:
critic_consensus=[item[commonlen:] if item.startswith('Critics Consensus: ') else item for item in critic_consensus]

In [44]:
critic_consensus[0:5]

['Running Scared struggles to strike a consistent balance between violent action and humor, but the chemistry between its well-matched leads keeps things entertaining.',
 'Equilibrium is a reheated mishmash of other sci-fi movies.',
 'With death-defying action sequences and epic historic sweep, Hero offers everything a martial arts fan could ask for.',
 "Whether Road House is simply bad or so bad it's good depends largely on the audience's fondness for Swayze -- and tolerance for violently cheesy action.",
 "As fast, loud, and relentless as the train at the center of the story, Unstoppable is perfect popcorn entertainment -- and director Tony Scott's best movie in years."]

### Synopsis

In [17]:
#extract synopsis
synopsis= [row.find('div', class_="info synopsis").text for row in rows]
synopsis[0]

'Synopsis: Distinguished by a sharp, witty dialogue between its two cop protagonists, Ray and Danny (Gregory Hines and Billy Crystal), this... [More]'

In [46]:
# remove the common phrase in the beginning
common_phrase='Synopsis: '
commonlen=len(common_phrase)

In [47]:
synopsis=[item[commonlen:] if item.startswith('Synopsis: ') else item for item in synopsis]

In [50]:
synopsis[0:2]

['Distinguished by a sharp, witty dialogue between its two cop protagonists, Ray and Danny (Gregory Hines and Billy Crystal), this... [More]',
 'In the nation of Libria, there is always peace among men. The rules of the Librian system are simple. If... [More]']

### Cast

In [18]:
#extract cast
cast= [row.find('div', class_="info cast").text for row in rows]
cast[0]

'\nStarring: Gregory Hines, Billy Crystal, Jimmy Smits, Steven Bauer'

In [51]:
# remove the common phrase in the beginning
common_phrase='\nStarring: '
commonlen=len(common_phrase)

In [52]:
cast=[item[commonlen:] if item.startswith(common_phrase) else item for item in cast]

In [54]:
cast[0:2]

['Gregory Hines, Billy Crystal, Jimmy Smits, Steven Bauer',
 'Christian Bale, Emily Watson, Taye Diggs, Angus Macfadyen']

### Director

In [19]:
#extract director
director= [row.find('div', class_="info director").text for row in rows]
director[0]

'\nDirected By: Peter Hyams'

In [55]:
# remove the common phrase in the beginning
common_phrase='\nDirected By: '
commonlen=len(common_phrase)

In [56]:
director=[item[commonlen:] if item.startswith(common_phrase) else item for item in director]

In [57]:
director[0:2]

['Peter Hyams', 'Kurt Wimmer']

# Step 4: Construct Data Frame

In [66]:
#put lists in a list to make it a dataframe
df = pd.DataFrame(list(zip(titles, years,ratings,critic_consensus,synopsis,cast,director)), 
               columns =['Title','Year','Rating','Critic_Consensus','Synopsis','Cast','Director']) 

In [67]:
df.head(3)

Unnamed: 0,Title,Year,Rating,Critic_Consensus,Synopsis,Cast,Director
0,Running Scared,1986,57%,Running Scared struggles to strike a consisten...,"Distinguished by a sharp, witty dialogue betwe...","Gregory Hines, Billy Crystal, Jimmy Smits, Ste...",Peter Hyams
1,Equilibrium,2002,41%,Equilibrium is a reheated mishmash of other sc...,"In the nation of Libria, there is always peace...","Christian Bale, Emily Watson, Taye Diggs, Angu...",Kurt Wimmer
2,Hero,2004,95%,With death-defying action sequences and epic h...,Hero is two-time Academy Award nominee Zhang Y...,"Jet Li, Tony Leung Chiu Wai, Maggie Cheung, Da...",Zhang Yimou


In [78]:
pd.set_option('display.max_colwidth',None)

In [79]:
df.head(3)

Unnamed: 0,Title,Year,Rating,Critic_Consensus,Synopsis,Cast,Director
0,Running Scared,1986,57%,"Running Scared struggles to strike a consistent balance between violent action and humor, but the chemistry between its well-matched leads keeps things entertaining.","Distinguished by a sharp, witty dialogue between its two cop protagonists, Ray and Danny (Gregory Hines and Billy Crystal), this... [More]","Gregory Hines, Billy Crystal, Jimmy Smits, Steven Bauer",Peter Hyams
1,Equilibrium,2002,41%,Equilibrium is a reheated mishmash of other sci-fi movies.,"In the nation of Libria, there is always peace among men. The rules of the Librian system are simple. If... [More]","Christian Bale, Emily Watson, Taye Diggs, Angus Macfadyen",Kurt Wimmer
2,Hero,2004,95%,"With death-defying action sequences and epic historic sweep, Hero offers everything a martial arts fan could ask for.",Hero is two-time Academy Award nominee Zhang Yimou's directorial attempt at exploring the concept of a Chinese hero. During the... [More],"Jet Li, Tony Leung Chiu Wai, Maggie Cheung, Daoming Chen",Zhang Yimou


In [68]:
#see specific row
df.loc[df['Title'] == 'Iron Man']

Unnamed: 0,Title,Year,Rating,Critic_Consensus,Synopsis,Cast,Director
75,Iron Man,2008,94%,"Powered by Robert Downey Jr.'s vibrant charm, ...",Billionaire industrialist and genius inventor ...,"Robert Downey Jr., Terrence Howard, Jeff Bridg...",


# Step 5: Export File

In [80]:
df.to_csv('Movies Info.csv',index=False, header=True)