# HTTP GET Request to get HTML content from a website 

<h3>Step 1: Import the requests library</h3>

In [None]:
import requests

<h3>Step 2: Send an HTTP request, get the response, and save in a variable</h3>

In [None]:
response = requests.get("http://www.ynet.co.il")

<h3>Step 3: Check the response status code to see if everything went as planned</h3>
<li>status code 200: the request response cycle was successful
<li>any other status code: it didn't work (e.g., 404 = page not found)

In [None]:
print(response.status_code)

<h3>Step 4: Get the content of the response</h3>
<li>Convert to utf-8 if necessary

In [None]:
response.content.decode('utf-8')

<h4>Problem: Get the contents of Wikipedia's main page and look for the string "Did you know" in it</h4>
<li>Open https://en.wikipedia.org/wiki/main_page 
<li>Check the status you get
<li> Get the content and decode it
<li> Search the string "Did you know" using the str find function
<li> if you get a positive number --> you found the string.
<li> if you get -1 there is a problem...


In [None]:
url = "https://en.wikipedia.org/wiki/main_page"
#The rest of your code should go below this line

response = requests.get(url)
print(response.status_code)
#print(response.content.decode('utf-8'))
res = response.content.decode('utf-8')
res.find("Did you know")


<h1>JSON</h1>
<li>The python library - json - deals with converting text to and from JSON


In [None]:
import json
data = '''{
  "name" : "Chuck",
  "phone" : {
    "type" : "intl",
    "number" : "+1 734 303 4456"
   },
   "email" : {
     "hide" : "yes",
     "type" : "organizational"
   }
}'''

info = json.loads(data)
print(type(info))
print('Name:',info["name"])
print('Hide:',info["email"]["hide"])


In [None]:
info["email"]['hide']

In [None]:
import json
input = '''[
  { "id" : "001",
    "x" : "2",
    "name" : "Chuck"
  } ,
  { "id" : "009",
    "x" : "7",
    "name" : "Chuck"
  }
]'''

info = json.loads(input)
print(type(info))
print('User count:', len(info))
for item in info:
    print('Name', item['name'])
    print('Id', item['id'])
    print('Attribute', item['x'])


In [None]:
data_string = '[{"b": [2, 4], "c": 3.0, "a": "A"}]'
python_data = json.loads(data_string)
print(python_data)

<h3>json.loads recursively decodes a string in JSON format into equivalent python objects</h3>
<li>data_string's outermost element is converted into a python list
<li>the first element of that list is converted into a dictionary
<li>the key of that dictionary is converted into a string
<li>the value of that dictionary is converted into a list of two integer elements

In [None]:
print(type(data_string),type(python_data))
print(type(python_data[0]),python_data[0])
print(type(python_data[0]['b']),python_data[0]['b'])

<h3>json.loads will throw an exception if the format is incorrect</h3>

In [None]:
#Wrong
#json.loads("Hello")
#Correct
json.loads('"Hello"')

In [None]:
import json
data_string = json.dumps(python_data)
print(type(data_string))
print(data_string)


<h1>requests library and JSON</h1>

In [None]:
#api_key = 'AIzaSy___IDByT70' HERE PUT YOUR OWN API_KEY
address="Ariel University, Ariel, Israel"
url="https://maps.googleapis.com/maps/api/geocode/json?address=%s&key=%s" % (address,api_key)
response = requests.get(url).json()  # if the response is of json format the .json() will load the json into a python object
print(type(response))

In [None]:
response

In [None]:
url

## Please note that Google Maps response has been loaded in the response (as a string). The try-except code below has been updated similarly

In [None]:
response = """
{
  "results": [
    {
      "access_points": [],
      "address_components": [
        {
          "long_name": "65",
          "short_name": "65",
          "types": [
            "street_number"
          ]
        },
        {
          "long_name": "Ramat HaGolan Street",
          "short_name": "Ramat HaGolan St",
          "types": [
            "route"
          ]
        },
        {
          "long_name": "Ari'el",
          "short_name": "Ari'el",
          "types": [
            "locality",
            "political"
          ]
        },
        {
          "long_name": "4070000",
          "short_name": "4070000",
          "types": [
            "postal_code"
          ]
        }
      ],
      "formatted_address": "Ramat HaGolan St 65, Ari'el",
      "geometry": {
        "location": {
          "lat": 32.103188,
          "lng": 35.207718
        },
        "location_type": "ROOFTOP",
        "viewport": {
          "northeast": {
            "lat": 32.1045369802915,
            "lng": 35.2090669802915
          },
          "southwest": {
            "lat": 32.1018390197085,
            "lng": 35.2063690197085
          }
        }
      },
      "place_id": "ChIJ96oNT6onHRURAs0Qa9GVqSw",
      "plus_code": {
        "compound_code": "4635+73 Ari'el",
        "global_code": "8G4Q4635+73"
      },
      "types": [
        "establishment",
        "point_of_interest",
        "university"
      ]
    }
  ],
  "status": "OK"
}"""
import json
data = json.loads(response)

<h3>Exception checking!</h3>

In [None]:
address="Ariel University, New York, NY"
url="https://maps.googleapis.com/maps/api/geocode/json?address=%s" % (address)
try:
    if not data["status"] == "OK":
        print("HTTP error")
    else:
        try:
            response_data = json.loads(response)
        except:
            print("Response not in valid JSON format")
except:
    print("Something went wrong with requests.get")
print(type(response_data))
print(response_data)

### If we want to pretty print the data: 

In [None]:
print(json.dumps(response_data, indent=4))

In [None]:
# address="Ariel University, Ariel, Israel"
address="אוניברסיטת אריאל , אריאל, ישראל"

url="https://maps.googleapis.com/maps/api/geocode/json?address=%s&key=%s" % (address,api_key)
try:
    response = requests.get(url)
    if not response.status_code == 200:
        print("HTTP error",response.status_code)
    else:
        try:
            response_data = response.json()
        except:
            print("Response not in valid JSON format")
except:
    print("Something went wrong with requests.get")
print(type(response_data))


In [None]:
data["status"]

In [None]:
response_data

In [None]:
response_data['results'][0]["geometry"]["location"]['lat']

In [None]:
print(json.dumps(response_data, indent = 4))

<h2>Problem 1: Write a function that takes an address as an argument and returns a (latitude, longitude) tuple</h2>

In [None]:
def get_lat_lng(address_string):
    #python code goes here

In [None]:
def get_lat_lng(address_string):
    import requests
    #python code goes here
    url="https://maps.googleapis.com/maps/api/geocode/json?address=%s&key=%s" % (address_string,api_key)

    # Another approach:
    # serviceurl="https://maps.googleapis.com/maps/api/geocode/json?"
    # parms = dict()
    # parms['address'] = address_string
    # parms['key'] = api_key
    # url = serviceurl + urllib.parse.urlencode(parms)

    try:
        response = requests.get(url)
        if not response.status_code == 200:
            print("HTTP error",response.status_code)
        else:
            try:
                response_data = response.json()
                return(response_data['results'][0]['geometry']['location'])
            except:
                print("Response not in valid JSON format")
    except:
        print("Something went wrong with requests.get")

In [None]:
get_lat_lng("אוניברסיטת אריאל, ישראל")

<h2>Problem 2: Extend the function so that it takes a possibly incomplete address as an argument and returns a list of tuples of the form (complete address, latitude, longitude)</h2>

In [None]:
def get_lat_lng(address_string):
    #python code goes here
    
    
    

<h1>XML</h1>
<li>The python library - lxml - deals with converting an xml string to python objects and vice versa</li>

In [None]:
data = '''<person>
  <name>Ron</name>
  <phone type="intl">
     +972 - 52 7223388
   </phone>
   <email hide="yes"/>
</person>'''


In [None]:
from lxml import etree
root = etree.XML(data)

In [None]:
type(root)

In [None]:
print("Name:", root.find('name').text) #.text - returns the text between tags
print("Attr:", root.find('email').get('hide')) # get retuns an attribute

### what happens if there is an error in the xml?

In [None]:

data = '''<person>
  <name>Chuck</name>
  <phone type="intl">
     +1 734 303 4456
   </phone>
   <email hide="yes"/>
</person'''

from lxml import etree
root = etree.XML(data)
print("Name:", root.find('name').text)
print("Attr:", root.find('email').get('hide')) # get retuns an attribute

In [None]:
input = '''<stuff>
    <users>
        <user x="2">
            <id>001</id>
            <name>Chuck</name>
        </user>
        <user x="7">
            <id>009</id>
            <name>Brent</name>
        </user>
    </users>
</stuff>'''

stuff = etree.XML(input)
lst = stuff.findall('users/user')
print('User count:', len(lst))
for item in lst:
#     print(type(item))
    print('Name', item.find('name').text)
    print('Id', item.find('id').text)
    print('Attribute', item.get("x"))


In [None]:
lst = stuff.findall('users/user/name')
print('User count:', len(lst))
for item in lst:
    print(item.text)

In [None]:
data_string = """
<Bookstore>
   <Book ISBN="ISBN-13:978-1599620787" Price="15.23" Weight="1.5">
      <Title>New York Deco</Title>
      <Authors>
         <Author Residence="New York City">
            <First_Name>Richard</First_Name>
            <Last_Name>Berenholtz</Last_Name>
         </Author>
      </Authors>
   </Book>
   <Book ISBN="ISBN-13:978-1579128562" Price="15.80">
      <Remark>
      Five Hundred Buildings of New York and over one million other books are available for Amazon Kindle.
      </Remark>
      <Title>Five Hundred Buildings of New York</Title>
      <Authors>
         <Author Residence="Beijing">
            <First_Name>Bill</First_Name>
            <Last_Name>Harris</Last_Name>
         </Author>
         <Author Residence="New York City">
            <First_Name>Jorg</First_Name>
            <Last_Name>Brockmann</Last_Name>
         </Author>
      </Authors>
   </Book>
</Bookstore>
"""

from lxml import etree
root = etree.XML(data_string)

In [None]:
from lxml import etree
root = etree.XML(data_string)
print(root.tag,type(root.tag))

In [None]:
print(etree.tostring(root, pretty_print=True).decode("utf-8"))

<h3>Iterating over an XML tree</h3>
<li>Use an iterator. 
<li>The iterator will generate every tree element for a given subtree

In [None]:
for element in root.iter():
    print(element)

<h4>Or just use the child in subtree construction

In [None]:
for child in root:
    print(child)

<h4>Accessing the tag</h4>


In [None]:
for child in root:
    print(child.tag)

<h4>Using the iterator to get specific tags<h4>
<li>In the below example, only the author tags are accessed
<li>For each author tag, the .find function accesses the First_Name and Last_Name tags
<li>The .find function only looks at the children, not other descendants, so be careful!
<li>The .text attribute prints the text in a leaf node

In [None]:
for element in root.iter("Author"):
    print(element.find('First_Name').text,element.find('Last_Name').text)

<h4>Problem: Find the last names of all authors in the tree “root” using xpath</h4>

In [None]:
for element in root.findall('Book/Authors/Author/Last_Name'):
    print(element.text)

<h4>Using values of attributes as filters</h4>
<li>Example: Find the first name of the author of a book that weighs 1.5 oz

In [None]:
root.find('Book[@Weight="1.5"]/Authors/Author/First_Name').text

<h4>Problem: Print first and last names of all authors who live in New York City</h4>

In [None]:
for element in root.iter("Authors"):
    print(element.find('Author[@Residence="New York City"]/First_Name').text,
          element.find('Author[@Residence="New York City"]/Last_Name').text)


In [None]:
root.find("Book/Remark").text

## Oauth

In [None]:
!pip install oauth2

In [None]:
##### save this code in hidden.py #####

def oauth():
    return {"consumer_key": "H8z7......7MW6T",
            "consumer_secret": "dWa6F39......vi3RZy",
            "token_key": "1092......dBnDzavJ",
            "token_secret": "whdB......2XjRcqx1"}

In [None]:
#### save this code in twurl.py #######

import urllib.request, urllib.parse, urllib.error
import oauth2 as oauth
import hidden

# https://apps.twitter.com/
# Create App and get the four strings, put them in hidden.py

def augment(url, parameters):
    secrets = hidden.oauth()

    consumer = oauth.Consumer(secrets['consumer_key'],
                              secrets['consumer_secret'])
    token = oauth.Token(secrets['token_key'], secrets['token_secret'])

    oauth_request = oauth.Request.from_consumer_and_token(consumer,
                    token=token, http_method='GET', http_url=url,
                    parameters=parameters)
    oauth_request.sign_request(oauth.SignatureMethod_HMAC_SHA1(),
                               consumer, token)
    return oauth_request.to_url()



In [None]:
import urllib.request, urllib.parse, urllib.error
import twurl

import json

TWITTER_URL = 'https://api.twitter.com/1.1/friends/list.json'

while True:
    print('')
    #acct = input('Enter Twitter Account:')
    acct = "goldanat"
    if (len(acct) < 1): break
    url = twurl.augment(TWITTER_URL,
                        {'screen_name': acct, 'count': '5'})
    print('Retrieving', url)
    connection = urllib.request.urlopen(url)
    data = connection.read().decode()
    headers = dict(connection.getheaders())
    print('Remaining', headers['x-rate-limit-remaining'])
    js = json.loads(data)
    print(json.dumps(js, indent=4))

    for u in js['users']:
        print(u['screen_name'])
        s = u['status']['text']
        print('  ', s[:50])

        
