# Data Scraping using Beautiful Soup and API

In [1]:
from bs4 import BeautifulSoup
import requests
import random
import pandas as pd
import json 

###  To get summary and release year of some of my favourite movies

In [2]:
url='http://www.imdb.com/chart/top'
fav_movies=['Soul','Inside Out','Drishyam',
            'Jurassic Park','The Lion King',
            'The Prestige','Inception',
            'Interstellar','Shutter Island','Coco']

In [3]:
def get_imd_movies(url):
    '''input: imdb url to get names and details of the listed movies
    output: list with details of the movie like movie name, release year'''
    
    page = requests.get(url)
    soup = BeautifulSoup(page.text,
                         'html.parser')
    movies = soup.find_all("td", 
                           class_="titleColumn")
    print(type(movies))
    return movies


In [59]:
def get_imd_movie_info(movie):
    '''input: movie name 
    output: title,release year and url of the particular movie'''
    
    movie_title = movie.a.contents[0]
    movie_year = movie.span.contents[0]
    movie_url = 'http://www.imdb.com' + movie.a['href']
    return movie_title, movie_year, movie_url

In [60]:
def get_imd_summary(url):
    '''input: url of the movie
    output: short summary of the movie from the movie page'''
    
    movie_page = requests.get(url)
    soup = BeautifulSoup(movie_page.text,
                         'html.parser')
    return soup.find("div", 
                     class_="summary_text").contents[0].strip()


In [61]:
title=[]
date=[]
synopsis=[]

for movie in get_imd_movies(url):
    movie_title, movie_year, movie_url=get_imd_movie_info(movie) # get movie title, release year and url of movie page
    
    if movie_title in fav_movies:
        
        summary=get_imd_summary(movie_url)
        title.append(movie_title)
        date.append(movie_year)
        synopsis.append(summary)
        
        print(movie_title,movie_year,'\nSummary:\n',summary,'\n\n' )
   

Inception (2010) 
Summary:
 A thief who steals corporate secrets through the use of dream-sharing technology is given the inverse task of planting an idea into the mind of a C.E.O. 


Interstellar (2014) 
Summary:
 A team of explorers travel through a wormhole in space in an attempt to ensure humanity's survival. 


The Lion King (1994) 
Summary:
 Lion prince Simba and his father are targeted by his bitter uncle, who wants to ascend the throne himself. 


The Prestige (2006) 
Summary:
 After a tragic accident, two stage magicians engage in a battle to create the ultimate illusion while sacrificing everything they have to outwit each other. 


Coco (2017) 
Summary:
 Aspiring musician Miguel, confronted with his family's ancestral ban on music, enters the Land of the Dead to find his great-great-grandfather, a legendary singer. 


Shutter Island (2010) 
Summary:
 In 1954, a U.S. Marshal investigates the disappearance of a murderer who escaped from a hospital for the criminally insane. 



In [29]:
'''Generate a dataframe from the sccraped data'''

data={'Movie':title,'Release_year':date,'Synopsis':synopsis}
df = pd.DataFrame(data, columns = ['Movie','Release_year','Synopsis'])
df.head()


Unnamed: 0,Movie,Release_year,Synopsis
0,Inception,(2010),A thief who steals corporate secrets through t...
1,Interstellar,(2014),A team of explorers travel through a wormhole ...
2,The Lion King,(1994),Lion prince Simba and his father are targeted ...
3,The Prestige,(2006),"After a tragic accident, two stage magicians e..."
4,Coco,(2017),"Aspiring musician Miguel, confronted with his ..."


### Get live weather of three towns: Tokyo, Osaka, Sapporo

In [31]:
api_key='9c70a373bfbe5a47c4f2789493bd5416'

In [44]:
base_url = "http://api.openweathermap.org/data/2.5/weather?"
cities=['Tokyo','Osaka','Sapporo'] 

for city in cities: 
    url = base_url + "appid=" + api_key + "&q=" + city 
    response = requests.get(url) 
    x = response.json() 
    
    if x["cod"] != "404": 
        y = x["main"] 
        current_temperature = y["temp"] 
        z = x["weather"] 
        weather= z[0]["description"]  
        
        print("\nCity:",city,"\nTemperature(Kelvin):",str(current_temperature),
              "\nWeather:",str(weather))
    else:
        print(" \nCity Not Found ") 


City: Tokyo 
Temperature(Kelvin): 278.65 
Weather: few clouds

City: Osaka 
Temperature(Kelvin): 275.26 
Weather: clear sky

City: Sapporo 
Temperature(Kelvin): 266.48 
Weather: broken clouds


### To get a list of all the places that had an earthquake with a magnitude greater than 5

In [54]:
page = requests.get('https://earthquakes.bgs.ac.uk/earthquakes/recent_world_events.html') 
# the page had all the cities that had a greater magnitude earthquake within 200 days listed

soup = BeautifulSoup(page.text, 'html.parser')
quakes_5= soup.find_all("tr")[1:]  

print('Number of earthquakes which had a magnitude greater than 5 in past 200 days:\n',len(quakes_5))

Number of earthquakes which had a magnitude greater than 5 in past 200 days:
 23


In [53]:
for tr in soup.find_all('tr')[1:]:
    tds = tr.find_all('td')
    print ("Area: %s, \n Magnitude: %s" % \
          (tds[7].text, tds[5].text))

Area:  SAN JUAN,ARGENTINA , 
 Magnitude:  6.4 
Area:  SULAWESI,INDONSIA , 
 Magnitude:  6.2 
Area:  NORTHERN MONGOLIA , 
 Magnitude:  6.7 
Area:  CROATIA , 
 Magnitude:  6.4 
Area:  LOS RIOS,CHILE , 
 Magnitude:  6.7 
Area:  EASTERN AEGEAN SEA , 
 Magnitude:  7.0 
Area:  SWITZERLAND , 
 Magnitude:  4.6 
Area:  ALASKA PENINSULA , 
 Magnitude:  7.6 
Area:  TONGA ISLANDS REGION , 
 Magnitude:  6.5 
Area:  MID-ATLANTIC RIDGE , 
 Magnitude:  6.9 
Area:  MID-ATLANTIC RIDGE , 
 Magnitude:  6.6 
Area:  ATACAMA,CHILE , 
 Magnitude:  6.5 
Area:  ATACAMA,CHILE , 
 Magnitude:  6.3 
Area:  ATACAMA,CHILE , 
 Magnitude:  6.8 
Area:  MID-ATLANTIC RIDGE , 
 Magnitude:  6.5 
Area:  BANDA SEA , 
 Magnitude:  6.9 
Area:  SUMATRA,INDONESIA , 
 Magnitude:  6.9 
Area:  SUMATRA,INDONESIA , 
 Magnitude:  6.8 
Area:  SAMAR,PHILIPPINES , 
 Magnitude:  6.6 
Area:  VANUATU , 
 Magnitude:  6.5 
Area:  ALASKA PENINSULA , 
 Magnitude:  7.8 
Area:  PAPUA NEW GUINEA , 
 Magnitude:  7.0 
Area:  JAVA SEA , 
 Magnitude:  