# Send HTTP Requests in Python

## Use the socket library

In [1]:
import socket

In [2]:
mysocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
mysocket.connect(('www.ufl.edu',80))

In [3]:
mysocket.send('GET http://www.ufl.edu/index.html \n\n')

36

In [4]:
while True:
    data = mysocket.recv(512)  #get 512 characters each time
    if (len(data)<1):
        break
    print data
mysocket.close()

<!DOCTYPE html>
<html lang="en" class="no-js">
<head>
	<meta charset="utf-8">
	<meta name="viewport" content="width=device-width,initial-scale=1">
	<meta http-equiv="X-UA-Compatible" content="IE=edge" />
	<title>University of Florida</title>
	<link rel="stylesheet" href="/media/templates/uf2015/css/style.css">
    

    
	<link rel="icon" href="/media/templates/uf2015/img/favicon.ico" >
	<link rel="apple-touch-icon" href="/media/templates/uf2015/img/favicon-180.png">
	<meta name="msapplication-
TileColor" content="#00529b">
	<meta name="msapplication-TileImage" content="/media/templates/uf2015/img/favicon-144.png">
	<script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)})(window,document,'script','//www.google-analytics.com/analytics.js','ga');ga('create', 'UA-3703196-1', 'auto');ga('sen

## Use the urllib library

In [5]:
import urllib

In [6]:
fhand = urllib.urlopen('http://www.ufl.edu/index.html')

for line in fhand:
    print line.strip()

<!DOCTYPE html>
<html lang="en" class="no-js">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width,initial-scale=1">
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<title>University of Florida</title>
<link rel="stylesheet" href="/media/templates/uf2015/css/style.css">



<link rel="icon" href="/media/templates/uf2015/img/favicon.ico" >
<link rel="apple-touch-icon" href="/media/templates/uf2015/img/favicon-180.png">
<meta name="msapplication-TileColor" content="#00529b">
<meta name="msapplication-TileImage" content="/media/templates/uf2015/img/favicon-144.png">
<script>(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)})(window,document,'script','//www.google-analytics.com/analytics.js','ga');ga('create', 'UA-3703196-1', 'auto');ga('send', 'pageview');</script>
</head>


# Web Scraping

## Get all links from a web page

In [5]:
import urllib
from bs4 import *

In [6]:
html = urllib.urlopen('http://www.ufl.edu/index.html').read()
soup = BeautifulSoup(html,"html5lib") #parsed html

In [7]:
tags = soup('a') #retrieve a list of the anchor tags
for tag in tags:
    print tag.get('href',None) #get the value of attribute 'href'

#main
http://www.ufl.edu/
/about/
/about/administration/
/about/maps-directions/
/about/campus-tours/
/about/university-facts/
/about/history/
/about/offices-services/
/academics/
/academics/colleges/
/academics/programs/
/academics/courses/
/academics/online-learning/
https://catalog.ufl.edu/ugrad/current/Pages/certificates.aspx
http://pd.dce.ufl.edu/program-finder
/academics/libraries/
/academics/resources/
/admissions/
/admissions/undergraduate/
/admissions/graduate/
http://www.admissions.ufl.edu/prospectiveinternational.html
http://www.ufl.edu/academics/online-learning/
/admissions/professional/
http://www.sfa.ufl.edu/
/student-life/
/student-life/arts/
/student-life/recreation-fitness/
/student-life/cultural-opportunities/
/student-life/involvement/
/student-life/health-safety/
/student-life/career-exploration/
/student-life/housing/
/student-life/success-services/
/research/
/research/centers-institutes/
/research/undergraduate-research/
/research/graduate-research/
http://resear

# API

## Google Geocoding API

In [8]:
import urllib
import json

In [11]:
serviceurl = 'http://maps.googleapis.com/maps/api/geocode/json?'
address = 'Gainesville FL'

In [12]:
url = serviceurl + urllib.urlencode({'sensor':'false','address':address})
print 'Retrieving', url
uh = urllib.urlopen(url)
data = uh.read()
print 'Retrieved', len(data), 'characters'

Retrieving http://maps.googleapis.com/maps/api/geocode/json?sensor=false&address=Gainesville+FL
Retrieved 1756 characters


In [13]:
try: js = json.loads(str(data))
except: js = None
if 'status' not in js or js['status'] != 'OK':
    print 'Fail to retrieve'
    print data
print json.dumps(js, indent=4)

{
    "status": "OK", 
    "results": [
        {
            "geometry": {
                "location_type": "APPROXIMATE", 
                "bounds": {
                    "northeast": {
                        "lat": 29.770553, 
                        "lng": -82.218976
                    }, 
                    "southwest": {
                        "lat": 29.601988, 
                        "lng": -82.42051599999999
                    }
                }, 
                "viewport": {
                    "northeast": {
                        "lat": 29.770553, 
                        "lng": -82.218976
                    }, 
                    "southwest": {
                        "lat": 29.601988, 
                        "lng": -82.42051599999999
                    }
                }, 
                "location": {
                    "lat": 29.6516344, 
                    "lng": -82.32482619999999
                }
            }, 
            "address_components": [
   

In [14]:
lat = js['results'][0]['geometry']['location']['lat']
lng = js['results'][0]['geometry']['location']['lng']
print 'lat',lat,'lng',lng

lat 29.6516344 lng -82.3248262


In [15]:
location = js['results'][0]['formatted_address']
print location

Gainesville, FL, USA


## Twitter API

In [19]:
import urllib
from oauth import oauth

In [20]:
secrets = {
    "consumer_key" : "j...u",
    "consumer_secret" : "u...d",
    "token_key" : "8...L",
    "token_secret" : "f...V"
}
apiurl = 'https://api.twitter.com/1.1/statuses/user_timeline.json'
parameters = {'screen_name':'realDonaldTrump','count':'1'}

In [21]:
def augment(secrets,url,parameters):
    consumer = oauth.OAuthConsumer(secrets['consumer_key'],secrets['consumer_secret'])
    token = oauth.OAuthToken(secrets['token_key'],secrets['token_secret'])
    oauth_request = oauth.OAuthRequest.from_consumer_and_token(consumer,token=token, http_method='GET',http_url=url,parameters=parameters)
    oauth_request.sign_request(oauth.OAuthSignatureMethod_HMAC_SHA1(),consumer,token)
    return oauth_request.to_url()

In [22]:
url = augment(secrets,apiurl,parameters)

In [23]:
print url

https://api.twitter.com/1.1/statuses/user_timeline.json?count=1&oauth_version=1.0&oauth_token=835522116502044673-s1D23cl12aj4mV7zrIoR9MR6xhFsrNL&screen_name=realDonaldTrump&oauth_nonce=20347394&oauth_timestamp=1488221452&oauth_signature=xsBDlcTMVlK0kfQieVftG7bR9GM%3D&oauth_consumer_key=jrzbw5dGqKVy2gYDZBTifeOtu&oauth_signature_method=HMAC-SHA1


In [24]:
connection = urllib.urlopen(url)

In [25]:
data = connection.read()

In [26]:
print data

[{"created_at":"Mon Feb 27 17:06:23 +0000 2017","id":836261209540288513,"id_str":"836261209540288513","text":"Great meeting with CEOs of leading U.S. health insurance companies who provide great healthcare to the American peo\u2026 https:\/\/t.co\/60jigi6ffS","truncated":true,"entities":{"hashtags":[],"symbols":[],"user_mentions":[],"urls":[{"url":"https:\/\/t.co\/60jigi6ffS","expanded_url":"https:\/\/twitter.com\/i\/web\/status\/836261209540288513","display_url":"twitter.com\/i\/web\/status\/8\u2026","indices":[117,140]}]},"source":"\u003ca href=\"http:\/\/twitter.com\/download\/iphone\" rel=\"nofollow\"\u003eTwitter for iPhone\u003c\/a\u003e","in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":{"id":25073877,"id_str":"25073877","name":"Donald J. Trump","screen_name":"realDonaldTrump","location":"Washington, DC","description":"45th President of the United States of America","url":

In [27]:
headers = connection.info().dict
print headers

{'content-length': '2593', 'x-rate-limit-reset': '1488222396', 'x-rate-limit-remaining': '899', 'x-xss-protection': '1; mode=block', 'x-content-type-options': 'nosniff', 'x-connection-hash': '0ab4eb470af66f4345e022a2456f417c', 'x-twitter-response-tags': 'BouncerCompliant', 'cache-control': 'no-cache, no-store, must-revalidate, pre-check=0, post-check=0', 'status': '200 OK', 'content-disposition': 'attachment; filename=json.json', 'set-cookie': 'lang=en; Path=/, guest_id=v1%3A148822149657350382; Domain=.twitter.com; Path=/; Expires=Wed, 27-Feb-2019 18:51:36 UTC', 'expires': 'Tue, 31 Mar 1981 05:00:00 GMT', 'x-access-level': 'read-write', 'last-modified': 'Mon, 27 Feb 2017 18:51:36 GMT', 'pragma': 'no-cache', 'date': 'Mon, 27 Feb 2017 18:51:36 GMT', 'x-rate-limit-limit': '900', 'x-response-time': '67', 'x-transaction': '00c695ad00005f9b', 'strict-transport-security': 'max-age=631138519', 'server': 'tsa_b', 'x-frame-options': 'SAMEORIGIN', 'content-type': 'application/json;charset=utf-8'}