# 01. OpenWeatherMap API

### Data Acquisition - Web API's
- Application Programming Interface - Set of routines running over web server.
- Connects Application & Database
- Specify Data with URL
- Retrieve Data & Parse it
- API may return data in JSON, XML data formats

### JSON
- JavaScript Object Notation
- Dictionary like data structure : key value pairs
- Keys are strings, values can be anything
- Easy to read and write
 
### XML
- eXtensible Markup Language
- Structure similar to XML
- Custom Tags
- No Style

### Type of Requests
- GET
- POST
- DELETE
- PUT
- PATCH

### Two Libraries:
- Urllib
- Requests

In [3]:
from urllib.request import urlopen

api_url = "https://samples.openweathermap.org/data/2.5/weather?q=London,uk&appid=b6907d289e10d714a6e88b30761fae22"

In [5]:
url_result = urlopen(api_url)
data = url_result.read()
print(data)
print(type(data))

b'{"coord":{"lon":-0.13,"lat":51.51},"weather":[{"id":300,"main":"Drizzle","description":"light intensity drizzle","icon":"09d"}],"base":"stations","main":{"temp":280.32,"pressure":1012,"humidity":81,"temp_min":279.15,"temp_max":281.15},"visibility":10000,"wind":{"speed":4.1,"deg":80},"clouds":{"all":90},"dt":1485789600,"sys":{"type":1,"id":5091,"message":0.0103,"country":"GB","sunrise":1485762037,"sunset":1485794875},"id":2643743,"name":"London","cod":200}'
<class 'bytes'>


In [6]:
import json
json_data = json.loads(data)

In [7]:
print(json_data)

{'coord': {'lon': -0.13, 'lat': 51.51}, 'weather': [{'id': 300, 'main': 'Drizzle', 'description': 'light intensity drizzle', 'icon': '09d'}], 'base': 'stations', 'main': {'temp': 280.32, 'pressure': 1012, 'humidity': 81, 'temp_min': 279.15, 'temp_max': 281.15}, 'visibility': 10000, 'wind': {'speed': 4.1, 'deg': 80}, 'clouds': {'all': 90}, 'dt': 1485789600, 'sys': {'type': 1, 'id': 5091, 'message': 0.0103, 'country': 'GB', 'sunrise': 1485762037, 'sunset': 1485794875}, 'id': 2643743, 'name': 'London', 'cod': 200}


In [8]:
print(type(json_data))

<class 'dict'>


In [9]:
json_data['coord']

{'lon': -0.13, 'lat': 51.51}

In [10]:
json_data['name']

'London'

In [11]:
json_string = json.dumps(json_data)
print(json_string)
print(type(json_string))

{"coord": {"lon": -0.13, "lat": 51.51}, "weather": [{"id": 300, "main": "Drizzle", "description": "light intensity drizzle", "icon": "09d"}], "base": "stations", "main": {"temp": 280.32, "pressure": 1012, "humidity": 81, "temp_min": 279.15, "temp_max": 281.15}, "visibility": 10000, "wind": {"speed": 4.1, "deg": 80}, "clouds": {"all": 90}, "dt": 1485789600, "sys": {"type": 1, "id": 5091, "message": 0.0103, "country": "GB", "sunrise": 1485762037, "sunset": 1485794875}, "id": 2643743, "name": "London", "cod": 200}
<class 'str'>


### Requests Library
- Most Popular Python Library
- Wrapper over urllib
- Works well with Python 3

### Installation
- pip install requests

# 02. Google API : Authentication

In [15]:
import requests

In [16]:
url = "https://maps.googleapis.com/maps/api/geocode/json?"

In [21]:
parameters = {
    "address" : "coding blocks pitampura"
}

In [22]:
r = requests.get(url,params=parameters)

In [23]:
r.url

'https://maps.googleapis.com/maps/api/geocode/json?address=coding+blocks+pitampura'

In [24]:
r.content

b'{\n   "error_message" : "You must use an API key to authenticate each request to Google Maps Platform APIs. For additional information, please refer to http://g.co/dev/maps-no-account",\n   "results" : [],\n   "status" : "REQUEST_DENIED"\n}\n'

In [26]:
parameters = {
    "address" : "coding blocks pitampura",
    "key" : "AIzaSyDxpzAOiOie2lqiUfMhWegOvmbKH25TNlE"
}

In [27]:
r = requests.get(url,params=parameters)

In [28]:
r.url

'https://maps.googleapis.com/maps/api/geocode/json?address=coding+blocks+pitampura&key=AIzaSyDxpzAOiOie2lqiUfMhWegOvmbKH25TNlE'

In [29]:
r.content

b'{\n   "results" : [\n      {\n         "address_components" : [\n            {\n               "long_name" : "Sector 2",\n               "short_name" : "Sector 2",\n               "types" : [ "political", "sublocality", "sublocality_level_1" ]\n            },\n            {\n               "long_name" : "Noida",\n               "short_name" : "Noida",\n               "types" : [ "locality", "political" ]\n            },\n            {\n               "long_name" : "Uttar Pradesh",\n               "short_name" : "UP",\n               "types" : [ "administrative_area_level_1", "political" ]\n            },\n            {\n               "long_name" : "India",\n               "short_name" : "IN",\n               "types" : [ "country", "political" ]\n            },\n            {\n               "long_name" : "201301",\n               "short_name" : "201301",\n               "types" : [ "postal_code" ]\n            }\n         ],\n         "formatted_address" : "A-73 Near Sector 15 Metro

In [30]:
print(r.content.decode('UTF-8'))

{
   "results" : [
      {
         "address_components" : [
            {
               "long_name" : "Sector 2",
               "short_name" : "Sector 2",
               "types" : [ "political", "sublocality", "sublocality_level_1" ]
            },
            {
               "long_name" : "Noida",
               "short_name" : "Noida",
               "types" : [ "locality", "political" ]
            },
            {
               "long_name" : "Uttar Pradesh",
               "short_name" : "UP",
               "types" : [ "administrative_area_level_1", "political" ]
            },
            {
               "long_name" : "India",
               "short_name" : "IN",
               "types" : [ "country", "political" ]
            },
            {
               "long_name" : "201301",
               "short_name" : "201301",
               "types" : [ "postal_code" ]
            }
         ],
         "formatted_address" : "A-73 Near Sector 15 Metro Station Noida, Sector 2, Noida,

### Try making a POST Request on pastebin.com
### What is Pastebin.com all about?
Pastebin is a website where you can store any text online for easy sharing. The website is mainly used by programmers to store pieces of sources code or configuration information, but anyone is more than welcome to paste any type of text. The idea behind the site is to make it more convenient for people to share large amounts of text online.

[Post request](https://pastebin.com/api)

# 03. Using Facebook API

In [32]:
import requests

In [37]:
url = "http://graph.facebook.com/4/picture?type=large"

In [38]:
r = requests.get(url)

In [39]:
print(r)

<Response [200]>


In [40]:
print(r.content)

b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x02\x00\x00\x01\x00\x01\x00\x00\xff\xed\x00\x9cPhotoshop 3.0\x008BIM\x04\x04\x00\x00\x00\x00\x00\x80\x1c\x02g\x00\x14EijAN2tIMJ-3dRhxQN6D\x1c\x02(\x00bFBMD01000ac0030000dc060000890b0000fe0c0000600e00001e1100005f1600000f17000066180000a619000065220000\xff\xe2\x02\x1cICC_PROFILE\x00\x01\x01\x00\x00\x02\x0clcms\x02\x10\x00\x00mntrRGB XYZ \x07\xdc\x00\x01\x00\x19\x00\x03\x00)\x009acspAPPL\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf6\xd6\x00\x01\x00\x00\x00\x00\xd3-lcms\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\ndesc\x00\x00\x00\xfc\x00\x00\x00^cprt\x00\x00\x01\\\x00\x00\x00\x0bwtpt\x00\x00\x01h\x00\x00\x00\x14bkpt\x00\x00\x01|\x00\x00\x00\x14rXYZ\x00\x00\x01\x90\x00\x00\x00\x14gXYZ\x00\x00\x01\xa4\x00\x00\x00\x14bXYZ\x00\x00\x01\xb8\x00\x00\x00\x1

In [41]:
with open("../Images/sample_pic.jpg","wb") as f:
    f.write(r.content)

# 04. Image Scrapping

### Beautiful Soup
Beautiful Soup is a python library for pulling data out of HTML and XML files. It works with your favourite parser to provide idiomatic ways of navigating, searching and modifying the parse tree. It commonly saves programmers hours or days of work.

### Installation:
- pip install html5lib

[Inspirational Quotes](https://www.values.com/inspirational-quotes)

In [145]:
import bs4
import requests

In [146]:
url = "https://www.passiton.com/inspirational-quotes?page=2"

In [147]:
response = requests.get(url)

In [148]:
response.content

b'<!DOCTYPE html>\n<html class="no-js" dir="ltr" lang="en-US">\n    <head>\n        <title>Inspirational Quotes - Motivational Quotes - Leadership Quotes | Page 2 | PassItOn.com</title>\n        <meta charset="utf-8">\n        <meta http-equiv="content-type" content="text/html; charset=utf-8" />\n        <meta http-equiv="X-UA-Compatible" content="IE=edge" />\n        <meta name="viewport" content="width=device-width,initial-scale=1.0,maximum-scale=1" />\n        <meta name="description" content="The Foundation for a Better Life | Pass It On.com">\n        <link rel="apple-touch-icon" sizes="180x180" href="/apple-touch-icon.png">\n        <link rel="icon" type="image/png" sizes="32x32" href="/favicon-32x32.png">\n        <link rel="icon" type="image/png" sizes="16x16" href="/favicon-16x16.png">\n        <link rel="manifest" href="/site.webmanifest">\n        <link rel="mask-icon" href="/safari-pinned-tab.svg" color="#c8102e">\n        <meta name="msapplication-TileColor" content="#c810

In [149]:
# HTML parser
soup = bs4.BeautifulSoup(response.content)

In [150]:
image_element = soup.findAll('img')

In [151]:
image_element[0]

<img alt="Pass It On" class="logo-dark" data-rjs="/assets/pofo/logo@2x-77b9f4fa1e4bafa6ff119f8f162b7bd9.png" src="/assets/pofo/logo-6d680decaadef58e4fbb586e147bc135.png"/>

In [152]:
print(len(image_element))

38


In [153]:
for i in range(len(image_element)):
    print(image_element[i].attrs['src'])

/assets/pofo/logo-6d680decaadef58e4fbb586e147bc135.png
/assets/pofo/logo-white-2d99f390ad2f6d3bbf59a52aadb55d60.png
/assets/pofo/instagram_glyph-3a42844bdfa0b6e1ff03e4baf2b978d1.png
/assets/pofo/facebook_glyph-2aa185e76f313012536c98ffd6ae7bcf.png
/assets/pofo/twitter_glyph-79c7ef76202036934bd904f83a3b4ef5.png
https://assets.passiton.com/quotes/quote_artwork/7973/medium/20190828_wednesday_quote.jpg?1566582774
https://assets.passiton.com/quotes/quote_artwork/5224/medium/20190827_tuesday_quote.jpg?1566582744
https://assets.passiton.com/quotes/quote_artwork/7561/medium/20190826_monday_quote.jpg?1566582711
https://assets.passiton.com/quotes/quote_artwork/6469/medium/20190823_friday_quote.jpg?1565804805
https://assets.passiton.com/quotes/quote_artwork/7972/medium/20190822_thursday_quote.jpg?1565803891
https://assets.passiton.com/quotes/quote_artwork/6231/medium/20190821_wednesday_quote.jpg?1565803837
https://assets.passiton.com/quotes/quote_artwork/7971/medium/20190820_tuesday_quote_alternat

In [154]:
image_element = image_element[5:-1]

In [155]:
image_element[0].attrs

{'alt': 'The best way to not feel hopeless is to get up and do something. Don’t wait for good things to happen to you. If you go out and make some good things happen, you will fill the world with hope, you will fill yourself with hope. #<Author:0x00007f873e9d9af0>',
 'class': ['margin-10px-bottom', 'shadow'],
 'src': 'https://assets.passiton.com/quotes/quote_artwork/7973/medium/20190828_wednesday_quote.jpg?1566582774',
 'width': '310',
 'height': '310'}

In [156]:
image_element[0].attrs['src']

'https://assets.passiton.com/quotes/quote_artwork/7973/medium/20190828_wednesday_quote.jpg?1566582774'

In [157]:
with open('../Images/inspiration.jpg','wb') as file:
    img_url = image_element[0].attrs['src']
    print(img_url)
    response =requests.get(img_url)
    file.write(response.content)

https://assets.passiton.com/quotes/quote_artwork/7973/medium/20190828_wednesday_quote.jpg?1566582774


In [159]:
for index,image in enumerate(image_element):
    with open('../Images/Inspiration/inspiration{}.jpg'.format(index),'wb') as file:
        img_url = image.attrs['src']
        response = requests.get(img_url)
        file.write(response.content)

### Snapdeal Image Scrapping

In [160]:
import bs4
import requests

In [161]:
keyword = input("Search Products & Brands")

Search Products & Brandsbottle


In [162]:
url = "https://www.snapdeal.com/search?keyword="+keyword
print(url)

https://www.snapdeal.com/search?keyword=bottle


In [163]:
response = requests.get(url)

In [164]:
response.content

b'<!DOCTYPE html>\n\t<!--[if IE 8]><html lang="en" class="ie ie8 lt-ie9 lt-ie10"> <![endif]-->\n<!--[if IE 9]><html lang="en" class="ie ie9 lt-ie10"> <![endif]-->\n<!--[if IE]><html lang="en" class="ie"><![endif]-->\n<!--[if gt IE 9]><!--><html lang="en"><!--<![endif]-->\n\t<head prefix="og: https://ogp.me/ns# fb: https://ogp.me/ns/fb# snapdeallog: https://ogp.me/ns/fb/snapdeallog#">\n\t\t<link rel="dns-prefetch" href="https://i1.sdlcdn.com" />\r\n\t<link rel="dns-prefetch" href="https://i2.sdlcdn.com" />\r\n\t<link rel="dns-prefetch" href="https://i3.sdlcdn.com" />\r\n\t<link rel="dns-prefetch" href="https://i4.sdlcdn.com" />\r\n\t<link rel="dns-prefetch" href="https://n1.sdlcdn.com" />\r\n\t<link rel="dns-prefetch" href="https://n2.sdlcdn.com" />\r\n\t<link rel="dns-prefetch" href="https://n3.sdlcdn.com" />\r\n\t<link rel="dns-prefetch" href="https://n4.sdlcdn.com" />\r\n\t<link rel="dns-prefetch" href="https://sa.snapdeal.com" />\r\n\t<link rel="dns-prefetch" href="https://search-su

In [165]:
soup = bs4.BeautifulSoup(response.content)
pictures = soup.findAll('picture')
print(len(pictures))

20


In [166]:
for i in range(len(pictures)):
    print(pictures[i].source.attrs['srcset'])

https://n1.sdlcdn.com/imgs/i/z/e/large/KC-Stainless-Steel-Fridge-Water-SDL336476550-1-bc49e.jpg
https://n4.sdlcdn.com/imgs/i/w/m/large/MiltonHot_ColdImage-47ab1.jpg
https://n1.sdlcdn.com/imgs/i/t/7/large/KESHAV-prestige-type-copper-bottle-SDL459398759-1-a65e0.jpeg
https://n2.sdlcdn.com/imgs/i/z/e/large/Cello-Infuse-Bottle-Yellow-800-SDL265869829-1-7ef20.jpg
https://n4.sdlcdn.com/imgs/i/z/i/large/Copper-Fort-Multicolour-1000-ml-SDL818753797-1-65443.jpg
https://n1.sdlcdn.com/imgs/i/p/p/large/Aaban-Flat-Plastic-A5-Memo-SDL529587764-1-0e601.jpeg
https://n2.sdlcdn.com/imgs/i/z/i/large/KC-Brown-1000-ml-Copper-SDL170317004-1-37835.jpg
https://n3.sdlcdn.com/imgs/h/o/w/large/Prestige-Steel-Flask-500-ml-SDL904377263-1-94b03.jpeg
https://n3.sdlcdn.com/imgs/c/o/x/large/Milton-Flip-Lid-Silver-1000-SDL780488087-1-d43e0.jpg
https://n1.sdlcdn.com/imgs/i/b/t/large/Cromoxome-SPRAY-Blue-600-ml-SDL832352140-1-c3a3c.png
https://n1.sdlcdn.com/imgs/i/z/i/large/Copper-Brown-1000-ml-Copper-SDL408386705-1-1c118

In [167]:
import os

In [168]:
for index,image in enumerate(pictures):
    if not os.path.exists('../Images/'+keyword):
        os.mkdir('../Images/'+keyword)
    s = '../Images/'+keyword+'/'+keyword
    with open('{}{}.jpg'.format(s,index),'wb') as file:
        img_url = image.source.attrs['srcset']
        response = requests.get(img_url)
        file.write(response.content)