In [1]:
import requests

## Getting response from website

In [None]:
response = requests.get('https://en.wikipedia.org/robots.txt')

In [None]:
text = response.text
print(text)
#jezeli chcemy zmienic kodowanie ...

In [None]:
response.headers

In [None]:
response.request.headers

## Status Codes

In [None]:
status = response.status_code
status

In [None]:
status == requests.codes.ALL_OK

If we made a bad request (a 4XX client error or 5XX server error response), we can raise it with Response.raise_for_status():

In [None]:
bad_r = requests.get('http://httpbin.org/status/404')
bad_r.status_code

In [None]:
bad_r.raise_for_status()

In [None]:
response.raise_for_status()

## Timeouts

You can tell Requests to stop waiting for a response after a given number of seconds with the timeout parameter.  
Nearly all production code should use this parameter in nearly all requests. Failure to do so can cause your program to hang indefinitely.  

---
**Note**  
timeout is not a time limit on the entire response download; rather, an exception is raised if the server has not issued a response for timeout seconds (more precisely, if no bytes have been received on the underlying socket for timeout seconds). If no timeout is specified explicitly, requests do not time out.

http://docs.python-requests.org/en/master/user/quickstart/#timeouts

In [None]:
requests.get('http://github.com', timeout=0.001)

## Headers 
Custom headers, i.e. for authentication (providing token)

In [None]:
header = {'Authorization': 'Token 1234567890'}

In [None]:
response = requests.get('http://github.com', headers=header)

In [None]:
response.request.headers

## JSON

In [None]:
base_url = 'http://maps.googleapis.com/maps/api/geocode/json'

In [None]:
my_params = {'address': '100 Broadway, New York, NY, U.S.A', 
             'language': 'en'}

In [None]:
response = requests.get(base_url, params = my_params)
response.status_code

In [None]:
response.headers

Parse response as JSON

In [None]:
response.json()

In [None]:
results = response.json()['results']

In [None]:
len(results)

In [None]:
results[0]

Getting location

In [None]:
results[0]['geometry'] # ['location']

In [None]:
results[0]['geometry']['location']

In [None]:
x_geo = results[0]['geometry']['location']
print(x_geo['lng'], x_geo['lat'])

## Rest API methods

http://jsonplaceholder.typicode.com/

In [None]:
from urllib import parse
#do tworzenia zlozonych url

In [None]:
base_url = 'http://jsonplaceholder.typicode.com'

In [None]:
parse.urljoin(base_url, 'customers')

### Get

In [None]:
response = requests.api.get(parse.urljoin(base_url, 'posts'))

posts = response.json()

In [None]:
posts

In [None]:
len(posts)

In [None]:
posts[0]

In [None]:
print(posts[0]['body'])

In [None]:
print(' '.join([p['body'] for p in posts]))

In [None]:
response = requests.api.get(parse.urljoin(base_url, 'posts'), params={'id': 1})

In [None]:
response.url

In [None]:
posts[0]

In [None]:
response.json()

### Post

In [None]:
response = requests.api.post(parse.urljoin(base_url, 'posts'), data={'body':'new post body', 'title': 'new post', 'userId': 1})

In [None]:
print(response.text)
print("Status code:", response.status_code)

In [None]:
requests.status_codes._codes[response.status_code]

In [None]:
requests.status_codes._codes[200]

In [None]:
requests.status_codes._codes[200][0]

### Put

In [None]:
response = requests.api.put(parse.urljoin(base_url, 'posts/1'), 
                            data={'body':'new post body', 'userId': 1})
#patch aktualizuje to co wrzuciliśmy

In [None]:
response.url

In [None]:
print(response.text)
print("Status code:", response.status_code)

### Patch

In [None]:
response = requests.api.patch(parse.urljoin(base_url, 'posts/1'), data={'body':'new post body'})

In [None]:
response.url

In [None]:
print(response.text)
print("Status code:", response.status_code)

### Delete

In [None]:
response = requests.api.delete(parse.urljoin(base_url, 'posts/1'))

In [None]:
response.url

In [None]:
print(response.text)
print("Status code:", response.status_code)

In [None]:
response = requests.api.delete(parse.urljoin(base_url, 'posts'), data={'id': 1})
print(response.text)
print("Status code:", response.status_code)

## Parsing HTML

lxml - http://lxml.de/  
BeautifulSoup4 - https://www.crummy.com/software/BeautifulSoup/

In [None]:
import bs4

In [None]:
page = requests.get("http://www.example.com").text

In [None]:
try:
    response = requests.get("http://www.example.com", timeout=5)
    response.raise_for_status()
except (requests.HTTPError, requests.ConnectTimeout) as e:
    print("Failed to load web resource, error:", e)
else:
    page = response.text

In [None]:
page

In [None]:
doc = bs4.BeautifulSoup(page, 'lxml')

In [None]:
doc

### Getting all links

In [None]:
link = doc.find_all("a")[0]

In [None]:
link = doc.find_all("a")[0]
print(link.text)

In [None]:
print(link.attrs['href'])

In [None]:
for link in doc.find_all("a"):
    print(link.attrs['href'])

In [None]:
link.attrs.get('stuff')

In [None]:
link.attrs.get('stuff', 'http://default.com')

In [None]:
link.attrs('stuff', "http://default.com")

## XML

https://docs.python.org/3/library/xml.etree.elementtree.html

In [None]:
import xml.etree.ElementTree

In [None]:
response = requests.get("http://www.thomas-bayer.com/sqlrest/CUSTOMER/")

In [None]:
print(response.text)

In [None]:
customers_xml = xml.etree.ElementTree.fromstring(response.text)

In [None]:
customers_xml.getchildren()  # ALL CHILDREN ELEMENTS

In [None]:
customer0 = customers_xml.getchildren()[0]
customer0

In [None]:
customer0.text  # VALUE OF ELEMENT - TEXT CONTENT

In [None]:
customer0.items()  # ITEMS (ATTRIBUTES) OF AN ELEMENT, (KEY, VALUE)

In [None]:
customer0.keys()  # KEYS - ATTRIBUTES NAMES

In [None]:
customer0.get('{http://www.w3.org/1999/xlink}href')  # GET ATTRIBUTE BY KEY

In [None]:
customers_xml.find('CUSTOMER')  # GET CHILD ELEMENT FOR NAME (RETURNS ONLY FIRST FOUND ELEMENT)

In [None]:
customers_xml.find('CUSTOMER')  # GET 1st CHILD ELEMENT FOR NAME

In [None]:
customers_xml.findall('CUSTOMER')  # GET ALL CHILD ELEMENT FOR NAME

# Exercise

Our final output is a list of Customers containing their data and their coordinates. It can be represented by list of dicts.  
We will get our Customers from XML exercise and their coordinates from `googleapis` (JSON exercise).

---
1. Fetch customers from **http://www.thomas-bayer.com/sqlrest/CUSTOMER/**. Create list named **customers** containing dicts with Customers `id` and `link`.  
    Output should look like this: 
           [{'id': '2', 'link': 'http://www.thomas-bayer.com/sqlrest/CUSTOMER/2/'},
            {'id': '3', 'link': 'http://www.thomas-bayer.com/sqlrest/CUSTOMER/3/'},
            ... 
           ]
           
2. For each Customer fetch their details using their links. Create list named **customers_data** containing dicts with Customers `id`, `link`, `firstname`, `lastname`, `city` and `street`.  
    Output should look like this:
        [{'city': 'Lyon',
          'firstname': 'Anne',
          'id': '2',
          'lastname': 'Miller',
          'link': 'http://www.thomas-bayer.com/sqlrest/CUSTOMER/2/',
          'street': '20 Upland Pl.'},
         {'city': 'San Francisco',
          'firstname': 'Michael',
          'id': '3',
          'lastname': 'Clancy',
          'link': 'http://www.thomas-bayer.com/sqlrest/CUSTOMER/3/',
          'street': '542 Upland Pl.'},
          ...
         ]
         
3. For each Customer fetch their coordinates using **http://maps.googleapis.com/maps/api/geocode/json** and Customers data (`"{street}, {city}"`).  
    Create list named **customers_data_geo** containing dicts with Customers `id`, `link`, `firstname`, `lastname`, `city`, `street`, `lat` and `lng`.  
    If Customers location is not found, then fill `lat` and `lng` with `None`, otherwise use first found location.  
    Output should look like this:
        [{'city': 'Lyon',
          'firstname': 'Anne',
          'id': '2',
          'lastname': 'Miller',
          'lat': None,
          'link': 'http://www.thomas-bayer.com/sqlrest/CUSTOMER/2/',
          'lng': None,
          'street': '20 Upland Pl.'},
         {'city': 'San Francisco',
          'firstname': 'Michael',
          'id': '3',
          'lastname': 'Clancy',
          'lat': 37.7302685,
          'link': 'http://www.thomas-bayer.com/sqlrest/CUSTOMER/3/',
          'lng': -122.4659227,
          'street': '542 Upland Pl.'},
          ...
         ]

In [None]:
response = requests.get('http://www.thomas-bayer.com/sqlrest/CUSTOMER/')
customers_xml = xml.etree.ElementTree.fromstring(response.text)

customers = customers_xml.getchildren()

customers_temp = []

for customer in customers:
    d = {'id': customer.text,
        'link': customer.get('{http://www.w3.org/1999/xlink}href')}
    customers_temp.append(d)

In [None]:
customers = customers_temp
customers

In [None]:
customer = customers[0]

In [None]:
customer

In [None]:
response = requests.get(customer['link'])
customer_xml = xml.etree.ElementTree.fromstring(response.text)

In [None]:
firstname = customer_xml.find('FIRSTNAME').text
lastname = customer_xml.find('LASTNAME').text
street = customer_xml.find('STREET').text
city = customer_xml.find('CITY').text
d = {'id': customer['id'],
    'link': customer['link'],
    'firstname': firstname,
    'lastname': lastname,
    'street': street,
    'city': city} 

In [None]:
d

In [None]:
customers_data = []

for customer in customers: 
    response = requests.get(customer['link'])
    customer_xml = xml.etree.ElementTree.fromstring(response.text)
    firstname = customer_xml.find('FIRSTNAME').text
    lastname = customer_xml.find('LASTNAME').text
    street = customer_xml.find('STREET').text
    city = customer_xml.find('CITY').text
    d = {'id': customer['id'],
        'link': customer['link'],
        'firstname': firstname,
        'lastname': lastname,
        'street': street,
        'city': city} 
    customers_data.append(d)
    
customers_data
    