In [None]:
# Move do tmp file
from os import chdir
chdir("/var/folders/qg/pwxx_zfd07x_rpw143q33c840000gn/T/python3io/")

# 

<a href="timethis.py">timethis.py</a>.  A utility function for
   making performance measurements.  Used in many of the code samples
   that follow..

In [None]:
"""
timethis.py

Author : David Beazley
         http://www.dabeaz.com
         Copyright (C) 2010

timethis is a utility library for making simple timing benchmarks.  A
single function timethis() is provided.   The function operates as
either a context manager or a decorator.  Here are some examples.

If you want to time a block of code, do this:

with timethis("Counting to a million"):
     n = 0
     while n < 1000000:
         n += 1

The string in quotes is a description that describes the code block
in question.   It will be printed in the output.

If you want to time a function, you can use a decorator:

@timethis
def count_to_a_million():
    n = 0
    while n < 1000000:
        n += 1

count_to_a_million()

All timing output is collected and not printed until a program
exits.  If any code block or function marked with timethis() is
executed more than once, timing measurements are collected 
and used to calculate a mean and standard deviation.
"""

import atexit
import time
import math
from   contextlib import contextmanager
from   collections import defaultdict

# Dictionary holding timing measurements
_stats = defaultdict(list)

# Exit processing to print performance results
def _printstats():
    if not _stats:
        return
    maxwidth = max(len(str(key)) for key in _stats)
    for key,times in sorted(list(_stats.items()),key=lambda x: str(x[0])):
        # Compute average and standard deviation
        mean = sum(times)/float(len(times))
        stddev = math.sqrt(sum((x-mean)**2 for x in times)/len(times))
        print(("{0:<{maxwidth}s} : {1:0.5f}s : N={2:5d} : stddev={3:0.5f}".format(
                key,mean,len(times),stddev,maxwidth=maxwidth)))

atexit.register(_printstats)

# This utility function is used to perform timing benchmarks
def timethis(what):
    @contextmanager
    def benchmark():
        start = time.time()
        yield
        end = time.time()
        _stats[what].append(end-start)
    if hasattr(what,"__call__"):
        def timed(*args,**kwargs):
            with benchmark():
                return what(*args,**kwargs)
        return timed
    else:
        return benchmark()

# Example
if __name__ == '__main__':
    # A single measurement
    with timethis("count to ten million"):
        n = 0
        while n < 10000000:
            n += 1

    # Repeated measurements
    for i in range(10):
        with timethis("count to one million"):
             n = 0
             while n < 1000000:
                 n += 1

    # A function call
    @timethis
    def count_to_a_million():
        n = 0
        while n < 1000000:
            n += 1

    count_to_a_million()
    count_to_a_million()
    count_to_a_million()

# Introducting Python 3

<a href="printlinks.py">printlinks.py</a>.  A Python 2 program
   that simply prints all of the links on a specified HTML page fetched
   with urlopen().  Try converting this program to Python 3 using
   2to3..

In [None]:
# printlinks.py
#
# A Python 2 program that prints links on a web page. 
# Run as a command line tool
#
#    bash % python printlinks.py http://www.python.org
#
# Try running 2to3 on this program and converting it to Python 3.

import urllib.request, urllib.parse, urllib.error
import sys
from html.parser import HTMLParser

class LinkPrinter(HTMLParser):
    def handle_starttag(self,tag,attrs):
        if tag == 'a':
           for name,value in attrs:
               if name == 'href': print(value)

data = urllib.request.urlopen(sys.argv[1]).read()
LinkPrinter().feed(data)

# Working with Text

<a href="textop.py">textop.py</a>.  Performance timings of various
   text operations.  Try it with different versions of Python..

In [None]:
# textop.py
#
# A program that reads a large collection of text into memory 
# and performs various operations on it.   This should work in both
# Python 2 and Python 3.  Use it for a performance comparison.

NSAMPLES = 10
from timethis import timethis
import sys

# For Python 2, map range to xrange
try:
    range = xrange
except NameError:
    pass

# Read an Apache log file into memory and replicate it to make a large sample.
# The result should be a string with about 6 million characters in it
logdata = open("access-log","rt").read()*10

# Test 1: Memory use
print(("Size %d bytes" % sys.getsizeof(logdata)))

# Test 2: Finding all lines using find() and slicing
with timethis("find lines"):
    index = 0
    while index < len(logdata):
        nextindex = logdata.find("\n",index)
        line = logdata[index:nextindex]
        index = nextindex+1

# Test 3 : Split into lines
with timethis("line splitting"):
    lines = logdata.splitlines()

# Test 4 : Splitting on whitespace
with timethis("whitespace splitting"):
    fields = logdata.split()

# Test 5 : Regex pattern matching.  
import re
ip_pattern = re.compile(r"\d+\.\d+\.\d+\.\d+")
with timethis("regex pattern matching"):
    unique_ips = set()
    for m in ip_pattern.finditer(logdata):
        unique_ips.add(m.group())

# Test 6 : Iterate by characters
with timethis("iterate by character"):
    for c in logdata:
        pass

# Test 7 : Replace text
with timethis("Replace characters"):
    s = logdata.replace(" ",":")

# Printing and Formatting

<a href="textformat.py">textformat.py</a>.  Examples of new-style
  formatting applied to a list of tuples in order to make a formatted table..

In [None]:
# textformat.py
#
# Different examples of text formatting illustrated by the output of
# table in different formats.

# The file stocks.csv has some CSV formatted stock market data
# "symbol",price,change,volume.   Read it into a list of tuples

stockdata = []
for line in open("stocks.csv"):
    fields = line.split(",")
    record = (fields[0].strip('"'),float(fields[1]),float(fields[2]),int(fields[3]))
    stockdata.append(record)

# Traditional string formatting

print("Traditional string formatting:")
for s in stockdata:
    print(("%10s %10.2f %10.2f %10d" % s))

# Some new-style formatting examples
print("\nNew-style formatting:")
for s in stockdata:
    print(("{0:10s} {1:10.2f} {2:10.2f} {3:10d}".format(*s)))

print("\nNew-style formatting with omitted fields")
for s in stockdata:
    print(("{:10s} {:10.2f} {:10.2f} {:10d}".format(*s)))

print("\nNew-style formatting with alignment:")
for s in stockdata:
    print(("{0:>10s} {1:10.2f} {2:10.2f} {3:10d}".format(*s)))

print("\nNew-style formatting with indexing")
for s in stockdata:
    print(("{0[0]:>10s} {0[1]:10.2f} {0[2]:10.2f} {0[3]:10d}".format(s)))

WIDTH = 18
print("\nNew-style formatting with customizable width")
for s in stockdata:
    print(("{0:{width}s} {1:{width}.2f} {2:{width}.2f} {3:{width}d}".format(*s,width=WIDTH)))

<a href="textformat2.py">textformat2.py</a>.  Examples of new-style
  formatting applied to a list of dictionaries in order to make a formatted table..

In [None]:
# textformat.py
#
# Different examples of text formatting.

# The file stocks.csv has some CSV formatted stock market data
# "symbol",price,change,volume.   Read it into a list of dictionaries

stockdata = []
for line in open("stocks.csv"):
    fields = line.split(",")
    record = {
        'name': fields[0].strip('"'),
        'price': float(fields[1]),
        'change' : float(fields[2]),
        'volume' : int(fields[3])}
    stockdata.append(record)

# Traditional string formatting

print("Traditional string formatting:")
for s in stockdata:
    print(("%(name)10s %(price)10.2f %(change)10.2f %(volume)10d" % s))

# Some new-style formatting examples
print("\nNew-style formatting:")
for s in stockdata:
    print(("{name:>10s} {price:10.2f} {change:10.2f} {volume:10d}".format(**s)))

print("\nNew-style formatting with dictionary lookups:")
for s in stockdata:
    print(("{s[name]:>10s} {s[price]:10.2f} {s[change]:10.2f} {s[volume]:10d}".format(s=s)))

<a href="textformat3.py">textformat3.py</a>.  Examples of new-style
  formatting applied to a list of instances in order to make a formatted table..

In [None]:
# textformat.py
#
# Different examples of text formatting.

# The file stocks.csv has some CSV formatted stock market data
# "symbol",price,change,volume.   Read it into a list of dictionaries

from collections import namedtuple
StockData = namedtuple("StockData",["name","price","change","volume"])

stockdata = []
for line in open("stocks.csv"):
    fields = line.split(",")
    record = StockData(fields[0].strip('"'),float(fields[1]),float(fields[2]),int(fields[3]))
    stockdata.append(record)

# Traditional string formatting

print("Traditional string formatting:")
for s in stockdata:
    print(("%10s %10.2f %10.2f %10d" % (s.name,s.price,s.change,s.volume)))

# Some new-style formatting examples
print("\nNew-style formatting:")
for s in stockdata:
    print(("{s.name:>10s} {s.price:10.2f} {s.change:10.2f} {s.volume:10d}".format(s=s)))

# Binary Data Handling

<a href="msgfrag.py">msgfrag.py</a>.  A comparison ofjoining byte
   fragments together using concatenation, join, and bytearray
   extension..

In [None]:
# msgfrag.py
#
# Three different techniques of forming a large message from fragments of bytes.

from timethis import timethis

FRAGMENT_SIZE = 256
NUMBER_FRAGS  = 10000

# A generator that creates byte fragments for us
def make_fragments(size,count):
    frag = b"x"*size
    while count > 0:
        yield frag
        count -= 1

# Try byte concatenation
with timethis("Byte concatenation +="):
    msg = b""
    for chunk in make_fragments(FRAGMENT_SIZE, NUMBER_FRAGS):
        msg += chunk

# Try .join()
with timethis("Joining a list of fragments"):
    msgparts = []
    for chunk in make_fragments(FRAGMENT_SIZE, NUMBER_FRAGS):
        msgparts.append(chunk)
    msg = b"".join(msgparts)

# Try bytearray.extend
with timethis("Extending a bytearray"):
    msg = bytearray()
    for chunk in make_fragments(FRAGMENT_SIZE, NUMBER_FRAGS):
        msg.extend(chunk)

<a href="structwrite.py">structwrite.py</a>.  Two techniques of
   writing binary data structures are compared..

In [None]:
# structwrite.py
#
# An example of writing a binary-packed data structure

from timethis import timethis
import struct
from random import random

# Create a million random (x,y) points
points = [(random(),random()) for n in range(1000000)]

# Write out to a file using write() and struct.pack()
with timethis("Writing many small structs"):
    f = open("pts1.bin","wb")
    f.write(struct.pack("I",len(points)))
    for p in points:
        f.write(struct.pack("ff",*p))
    f.close()

# Pack a bytearray and write it all at once
with timethis("Packing a bytearray and writing"):
    out = bytearray()
    out.extend(struct.pack("I",len(points)))
    for p in points:
        out.extend(struct.pack("ff",*p))
    f = open("pts2.bin","wb")
    f.write(out)
    f.close()

# The io module

# System Interfaces

# Library Design Issues

# Feedback