In [1]:
import urllib
import webbrowser

from pprint import pprint

In [2]:
bin_url = 'https://httpbin.org/'

In [3]:
webbrowser.open(bin_url)

True

In [4]:
resp = urllib.request.urlopen(bin_url)

resp

<http.client.HTTPResponse at 0x287e3ef5930>

In [5]:
resp.geturl()

'https://httpbin.org/'

In [6]:
resp.getcode()

200

In [7]:
print(resp.info())

Date: Wed, 30 Mar 2022 19:47:50 GMT
Content-Type: text/html; charset=utf-8
Content-Length: 9593
Connection: close
Server: gunicorn/19.9.0
Access-Control-Allow-Origin: *
Access-Control-Allow-Credentials: true




In [9]:
data = resp.read()

pprint(data.decode('UTF-8'))

('<!DOCTYPE html>\n'
 '<html lang="en">\n'
 '\n'
 '<head>\n'
 '    <meta charset="UTF-8">\n'
 '    <title>httpbin.org</title>\n'
 '    <link '
 'href="https://fonts.googleapis.com/css?family=Open+Sans:400,700|Source+Code+Pro:300,600|Titillium+Web:400,600,700"\n'
 '        rel="stylesheet">\n'
 '    <link rel="stylesheet" type="text/css" '
 'href="/flasgger_static/swagger-ui.css">\n'
 '    <link rel="icon" type="image/png" href="/static/favicon.ico" '
 'sizes="64x64 32x32 16x16" />\n'
 '    <style>\n'
 '        html {\n'
 '            box-sizing: border-box;\n'
 '            overflow: -moz-scrollbars-vertical;\n'
 '            overflow-y: scroll;\n'
 '        }\n'
 '\n'
 '        *,\n'
 '        *:before,\n'
 '        *:after {\n'
 '            box-sizing: inherit;\n'
 '        }\n'
 '\n'
 '        body {\n'
 '            margin: 0;\n'
 '            background: #fafafa;\n'
 '        }\n'
 '    </style>\n'
 '</head>\n'
 '\n'
 '<body>\n'
 '    <a href="https://github.com/requests/httpbin"

In [10]:
resp.status, resp.reason

(200, 'OK')

In [11]:
resp2_data = resp.read()
pprint(resp2_data.decode('UTF-8'))

''


In [12]:
req = urllib.request.Request(bin_url, method='OPTIONS')
resp = urllib.request.urlopen(req)

In [13]:
print(resp.info())

Date: Wed, 30 Mar 2022 19:50:08 GMT
Content-Type: text/html; charset=utf-8
Content-Length: 0
Connection: close
Server: gunicorn/19.9.0
Allow: OPTIONS, GET, HEAD
Access-Control-Allow-Origin: *
Access-Control-Allow-Credentials: true
Access-Control-Allow-Methods: GET, POST, PUT, DELETE, PATCH, OPTIONS
Access-Control-Max-Age: 3600




In [14]:
post_data = urllib.parse.urlencode({"name": "Alice", "college": "Harvard"}).encode('ascii')

req = urllib.request.Request('https://httpbin.org/post', 
                             method='POST',
                             data=post_data,
                             headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64)'})

resp = urllib.request.urlopen(req)

print(resp.info())

Date: Wed, 30 Mar 2022 19:52:28 GMT
Content-Type: application/json
Content-Length: 501
Connection: close
Server: gunicorn/19.9.0
Access-Control-Allow-Origin: *
Access-Control-Allow-Credentials: true




In [15]:
data = resp.read().decode('UTF-8')

pprint(data)

('{\n'
 '  "args": {}, \n'
 '  "data": "", \n'
 '  "files": {}, \n'
 '  "form": {\n'
 '    "college": "Harvard", \n'
 '    "name": "Alice"\n'
 '  }, \n'
 '  "headers": {\n'
 '    "Accept-Encoding": "identity", \n'
 '    "Content-Length": "26", \n'
 '    "Content-Type": "application/x-www-form-urlencoded", \n'
 '    "Host": "httpbin.org", \n'
 '    "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64)", \n'
 '    "X-Amzn-Trace-Id": "Root=1-6244b4fc-446fe1ef14efc9ec32e064ee"\n'
 '  }, \n'
 '  "json": null, \n'
 '  "origin": "191.185.159.149", \n'
 '  "url": "https://httpbin.org/post"\n'
 '}\n')


In [19]:
# BEST PRACTICE:
with urllib.request.urlopen('https://www.google.com/search?q=pluralsight') as resp:
    pprint(resp.data().decode('ISO-8859-1'))

HTTPError: HTTP Error 403: Forbidden

In [20]:
from urllib.error import URLError, HTTPError
try:
    with urllib.request.urlopen('https://www.google.com/search?q=pluralsight') as resp:
        pprint(resp.data().decode('ISO-8859-1'))
except HTTPError as e:
    print(e.reason, e.code)

Forbidden 403


In [24]:
try:
    with urllib.request.urlopen('https://www.loonyX-corn.com') as resp:
        pprint(resp.data().decode('UTF-8'))
except HTTPError as e:
    print(e.reason)

URLError: <urlopen error [Errno 11001] getaddrinfo failed>

In [25]:
parsed_data = urllib.parse.urlparse(\
    'http://www.loonycorn.in:80/languages/Python.html')

In [26]:
parsed_data

ParseResult(scheme='http', netloc='www.loonycorn.in:80', path='/languages/Python.html', params='', query='', fragment='')

In [27]:
parsed_data.scheme

'http'

In [28]:
parsed_data.netloc

'www.loonycorn.in:80'

In [29]:
parsed_data = urllib.parse.urlparse(\
    'http://www.loonycorn.in:80/languages/Python.html?greeting=hello&person=John')

In [30]:
parsed_data

ParseResult(scheme='http', netloc='www.loonycorn.in:80', path='/languages/Python.html', params='', query='greeting=hello&person=John', fragment='')

In [31]:
parsed_data = urllib.parse.urlparse(\
    'http://www.loonycorn.in:80/languages/Python.html#courses')

In [32]:
parsed_data

ParseResult(scheme='http', netloc='www.loonycorn.in:80', path='/languages/Python.html', params='', query='', fragment='courses')