# Python standard library
- huge collection of routines included with Python
- many interfaces to operating system functionality
    - networking IO
    - file IO
- data persistence
- common file formats like JSON, XML, HTML
- development tools
- [doc](https://docs.python.org/3/library/index.html)

# Networking
- sockets
    - 'socket' module has low level networking
    - unpleasant to use - you will probably never need it
    - [doc](https://docs.python.org/3/library/socket.html)
      
- HTTP
    - most network traffic runs over HTTP
    - 'urllib' will retrieve HTTP content, as a 'bytes' object
    - 'urllib.urlopen' will return an object that can:
        - can iterate over the lines
        - grab entire page as one string
        - read one character at a time
    - [doc](https://docs.python.org/3.7/library/urllib.request.html#module-urllib.request)

In [1]:
import urllib

cu = 'http://columbia.edu'

# returns object that represents the network connection
cur = urllib.request.urlopen(cu)
cur

<http.client.HTTPResponse at 0x104315e10>

In [2]:
# headers from the server

cur.headers.items()

[('Date', 'Tue, 03 Mar 2020 19:37:45 GMT'),
 ('Server', 'Apache'),
 ('Cache-Control', 'max-age=300, public'),
 ('Content-language', 'en'),
 ('X-XSS-Protection', '1; mode=block'),
 ('X-Frame-Options', 'SAMEORIGIN'),
 ('X-Content-Type-Options', 'nosniff'),
 ('Expires', 'Sun, 19 Nov 1978 05:00:00 GMT'),
 ('Last-Modified', 'Tue, 03 Mar 2020 19:37:45 GMT'),
 ('ETag', '"1583264265"'),
 ('Content-Type', 'text/html; charset=UTF-8'),
 ('Age', '136'),
 ('X-Varnish-Cache', 'HIT (134)'),
 ('Accept-Ranges', 'bytes'),
 ('Content-Length', '122245'),
 ('Connection', 'close')]

In [3]:
# headers is a dictionary

cur.headers['Server']

'Apache'

In [4]:
# the network connection is an iterable and iterator
# can only iterator over it once, then it is exhausted

cur is iter(cur)

True

In [5]:
# usual iteration protocol reads one line at a time
# note the lines comming back are byte arrays(b'), 
# not strings.

next(cur), next(cur)

(b'<!DOCTYPE html>\n',
 b'<html  lang="en" dir="ltr" prefix="content: http://purl.org/rss/1.0/modules/content/  dc: http://purl.org/dc/terms/  foaf: http://xmlns.com/foaf/0.1/  og: http://ogp.me/ns#  rdfs: http://www.w3.org/2000/01/rdf-schema#  schema: http://schema.org/  sioc: http://rdfs.org/sioc/ns#  sioct: http://rdfs.org/sioc/types#  skos: http://www.w3.org/2004/02/skos/core#  xsd: http://www.w3.org/2001/XMLSchema# " class="wf-loading" data-ng-app="app">\n')

In [6]:
# grab the rest of the lines with 'list'
# note 'charset=utf-8'

lines = list(cur)
lines[:7]

[b'  <head>\n',
 b'    <meta charset="utf-8" />\n',
 b'<script>(function(i,s,o,g,r,a,m){i["GoogleAnalyticsObject"]=r;i[r]=i[r]||function(){(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)})(window,document,"script","https://www.google-analytics.com/analytics.js","ga");ga("create", "UA-18290390-1", {"cookieDomain":"auto"});ga("set", "anonymizeIp", true);ga("send", "pageview");</script>\n',
 b'<meta name="title" content="Homepage | Columbia University in the City of New York" />\n',
 b'<link rel="shortlink" href="https://www.columbia.edu/content/" />\n',
 b'<link rel="canonical" href="https://www.columbia.edu/content/" />\n',
 b'<meta name="Generator" content="Drupal 8 (https://www.drupal.org)" />\n']

In [7]:
# or read lines with a for loop


cur = urllib.request.urlopen(cu)

for j, line in enumerate(cur):
    # line is a 'bytes' object, not a 'str' object
    print(j, line)
    if j == 20:
        break


0 b'<!DOCTYPE html>\n'
1 b'<html  lang="en" dir="ltr" prefix="content: http://purl.org/rss/1.0/modules/content/  dc: http://purl.org/dc/terms/  foaf: http://xmlns.com/foaf/0.1/  og: http://ogp.me/ns#  rdfs: http://www.w3.org/2000/01/rdf-schema#  schema: http://schema.org/  sioc: http://rdfs.org/sioc/ns#  sioct: http://rdfs.org/sioc/types#  skos: http://www.w3.org/2004/02/skos/core#  xsd: http://www.w3.org/2001/XMLSchema# " class="wf-loading" data-ng-app="app">\n'
2 b'  <head>\n'
3 b'    <meta charset="utf-8" />\n'
4 b'<script>(function(i,s,o,g,r,a,m){i["GoogleAnalyticsObject"]=r;i[r]=i[r]||function(){(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)})(window,document,"script","https://www.google-analytics.com/analytics.js","ga");ga("create", "UA-18290390-1", {"cookieDomain":"auto"});ga("set", "anonymizeIp", true);ga("send", "pageview");</script>\n'
5 b'<meta name="title" content=

# A simple web server

In [None]:
# will serve files in the current directory
# at localhost:port

import http.server
import socketserver

port = 8002

# url = http://localhost:8002

Handler = http.server.SimpleHTTPRequestHandler

httpd = socketserver.TCPServer(("", port), Handler)

print("serving at port", port)
httpd.serve_forever()

serving at port 8002


127.0.0.1 - - [03/Mar/2020 14:47:29] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [03/Mar/2020 14:47:30] code 404, message File not found
127.0.0.1 - - [03/Mar/2020 14:47:30] "GET /favicon.ico HTTP/1.1" 404 -


# Hit it 
[simple](http://localhost:8002)

# 'Real' python web servers
- two main ones are Django and Flask
- Django [doc](https://www.djangoproject.com)
- Flask [doc](http://flask.pocoo.org)