### Importing packages

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

### Making Get Request

In [2]:
base_site="https://editorial.rottentomatoes.com/guide/140-essential-action-movies-to-watch-now/"

In [3]:
response = requests.get(base_site)
response

<Response [200]>

In [4]:
html=response.content
html[:100]

b'<!DOCTYPE html>\n<html lang="en-US">\n    <head prefix="og: http://ogp.me/ns# flixstertomatoes: http:/'

### Making the soup

In [5]:
soup = BeautifulSoup(html,"lxml")

In [6]:
soup = BeautifulSoup(html,"html.parser")

### Exporting HTML to a file

In [7]:
with open("rottentomatoes_response.html",'wb') as file:
    file.write(soup.prettify('utf-8'))

### Searching and navigating HTML tree

#### Searching- find() and find_all()

#### Extracting Movies div containing required info

In [57]:
divs= soup.find_all('div',{'class':'col-sm-18 col-full-xs countdown-item-content'})
# divs

In [9]:
divs[0].find('a').string

'Running Scared'

In [10]:
divs[0].find('span',{"class":'start-year'}).string

'(1986)'

In [11]:
divs[1].find('span',{"class":'tMeterScore'}).string

'41%'

### Getting years

In [56]:
years = [div.find('span',{"class":'start-year'}).string for div in divs]
# years

In [55]:
years= [year.strip('()') for year in years]
# years

In [14]:
len(years)

140

### Getting movies rates

In [54]:
rates = [div.find('span',{"class":'tMeterScore'}).string for div in divs]
# rates

In [16]:
len(rates)

140

### Getting titles

In [17]:
titles = [div.find('a').string for div in divs]

In [53]:
# titles

In [19]:
len(titles)

140

### Getting cast names

In [52]:
cast_info = [div.find('div',{"class":'cast'}) for div in divs]
# cast_info

In [21]:
cast_links= cast_info[0].find_all('a')

In [22]:
cast_links

[<a class="" href="//www.rottentomatoes.com/celebrity/gregory_hines">Gregory Hines</a>,
 <a class="" href="//www.rottentomatoes.com/celebrity/billy_crystal">Billy Crystal</a>,
 <a class="" href="//www.rottentomatoes.com/celebrity/jimmy_smits">Jimmy Smits</a>,
 <a class="" href="//www.rottentomatoes.com/celebrity/steven_bauer">Steven Bauer</a>]

In [23]:
cast_names= [link.string for link in cast_links]

In [24]:
cast_names

['Gregory Hines', 'Billy Crystal', 'Jimmy Smits', 'Steven Bauer']

In [25]:
', '.join(cast_names)

'Gregory Hines, Billy Crystal, Jimmy Smits, Steven Bauer'

##### Getting cast in one line using nested list comprehension

In [26]:
cast_names= [', '.join([link.string for link in names.find_all('a')]) for names in cast_info]

In [51]:
# cast_names

In [28]:
len(cast_names)

140

### Getting Critics

In [50]:
critics = [div.find('div',{'class':'critics-consensus'}) for div in divs]
# critics

In [30]:
critics[0].text.strip('Critics Consensus:')

'Running Scared struggles to strike a consistent balance between violent action and humor, but the chemistry between its well-matched leads keeps things entertaining.'

In [31]:
critics_consensus = [c.text.strip('Critics Consensus:') if c.text.startswith('Critics Consensus:') else c.text for c in  soup.find_all('div',class_='critics-consensus') ]

In [49]:
# critics_consensus

In [33]:
len(critics_consensus)

140

### Getting directors

In [48]:
director = [div.find('div',{'class':'director'}) for div in divs]
# director

In [35]:
director[0].find('a').string

'Peter Hyams'

In [36]:
directors = [d.find('a').string for d in [div.find('div',{'class':'director'}) for div in divs]]

In [47]:
# directors

In [38]:
len(directors)

140

### Getting synopsis

In [46]:
syn = [div.find('div',{'class':'synopsis'}) for div in divs]
# syn

In [40]:
syn[2].contents[1]

' In this visually arresting martial arts epic set in ancient China, an unnamed fighter (Jet Li) is being honored for...'

In [41]:
syns= [s.contents[1] for s in  [div.find('div',{'class':'synopsis'}) for div in divs]]

In [45]:
# syns

In [43]:
len(syns)

140

### Creating dataframe

In [44]:
data= pd.DataFrame()

data['titles']=titles
data['years']=years
data['cast_names']=cast_names
data['syns']=syns
data['directors']=directors
data['critics_consensus']=critics_consensus
data['rates']=rates


data.head()

Unnamed: 0,titles,years,cast_names,syns,directors,critics_consensus,rates
0,Running Scared,1986,"Gregory Hines, Billy Crystal, Jimmy Smits, Ste...","Ray and Danny (Gregory Hines, Billy Crystal) ...",Peter Hyams,Running Scared struggles to strike a consisten...,57%
1,Equilibrium,2002,"Christian Bale, Emily Watson, Taye Diggs, Angu...","In a futuristic world, a regime has eliminate...",Kurt Wimmer,No consensus yet.,41%
2,Hero,2002,"Jet Li, Tony Leung Chiu Wai, Maggie Cheung Man...",In this visually arresting martial arts epic ...,Zhang Yimou,With death-defying action sequences and epic h...,94%
3,Road House,1989,"Patrick Swayze, Kelly Lynch, Sam Elliott, Ben ...","The Double Deuce is the meanest, loudest and ...",Rowdy Herrington,No consensus yet.,40%
4,Unstoppable,2010,"Denzel Washington, Chris Pine, Rosario Dawson,...","When a massive, unmanned locomotive roars out...",Tony Scott,"As fast, loud, and relentless as the train at ...",87%
