## WHITESPACE FORMATTING

In [3]:
for i in [1, 2, 3, 4, 5]:
    print(i)
    for j in [1, 2, 3, 4, 5]:
        print(j)
        print(i + j)
    print(i)
print("done looping")

1
1
2
2
3
3
4
4
5
5
6
1
2
1
3
2
4
3
5
4
6
5
7
2
3
1
4
2
5
3
6
4
7
5
8
3
4
1
5
2
6
3
7
4
8
5
9
4
5
1
6
2
7
3
8
4
9
5
10
5
done looping


In [None]:
# Whitespace is ignored inside parentheses and brackets, which can be helpful for longwinded computations

In [4]:
long_winded_computation = (1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 +
                           13 + 14 + 15 + 16 + 17 + 18 + 19 + 20)

In [None]:
# for making code easier to read

In [None]:
list_of_lists = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]

In [None]:
easier_to_read_list_of_lists = [[1, 2, 3],
                                [4, 5, 6],
                                [7, 8, 9]]

In [None]:
# use a backslash to indicate that a statement continues onto the next line, although we’ll rarely do this

## MODULES

In [5]:
import re
my_regex = re.compile("[0-9]+", re.I)

In [None]:
# If you already had a different re in your code, you could use an alias

In [6]:
import re as regex
my_regex = regex.compile("[0-9]+", regex.I)

In [None]:
# You might also do this if your module has an unwieldy name or if you’re going to be typing it a lot. For example, a standard convention when visualizing data with matplotlib is

In [None]:
import matplotlib.pyplot as plt
plt.plot(...)

In [None]:
# If you need a few specific values from a module, you can import them explicitly and use them without qualification

In [None]:
from collections import defaultdict, Counter
lookup = defaultdict(int)
my_counter = Counter()

## FUNCTIONS

In [1]:
def double(x):
    return x * 2

In [None]:
# Python functions are first-class, which means that we can assign them to variables and pass them into functions just like any other arguments

In [2]:
def apply_to_one(f):
    return f(1)

my_double = double
x = apply_to_one(my_double)

In [None]:
# It is also easy to create short anonymous functions, or lambdas

In [None]:
y = apply_to_one(lambda x: x + 4)

In [None]:
# You can assign lambdas to variables, although most people will tell you that you should just use def instead

In [None]:
another_double = lambda x: 2 * x       # don't do this
def another_double(x):
    """Do this instead"""
    return 2 * x

In [None]:
# Function parameters can also be given default arguments, which only need to be specified when you want a value other than the default

In [3]:
def my_print(message = "my default message"):
    print(message)
my_print("hello")   # prints 'hello'
my_print()          # prints 'my default message'

hello
my default message


In [None]:
# It is sometimes useful to specify arguments by name

In [4]:
def full_name(first = "What's-his-name", last = "Something"):
    return first + " " + last
full_name("Joel", "Grus")     # "Joel Grus"
full_name("Joel")             # "Joel Something"
full_name(last="Grus")        # "What's-his-name Grus"

"What's-his-name Grus"

# STRING

In [5]:
single_quoted_string = 'data science'
double_quoted_string = "data science"

In [None]:
# Python uses backslashes to encode special characters. For example

In [None]:
tab_string = "\t"       # represents the tab character
len(tab_string)         # is 1

In [None]:
# If you want backslashes as backslashes (which you might in Windows directory names or in regular expressions), you can create raw strings using r""

In [None]:
not_tab_string = r"\t"  # represents the characters '\' and 't'
len(not_tab_string)     # is 2

In [None]:
# You can create multiline strings using three double quotes

In [None]:
multi_line_string = """This is the first line.
and this is the second line
and this is the third line"""

# EXCEPTIONS

In [6]:
try:
    print(0 / 0)
except ZeroDivisionError:
    print("cannot divide by zero")

cannot divide by zero


# LIST

In [7]:
integer_list = [1, 2, 3]
heterogeneous_list = ["string", 0.1, True]
list_of_lists = [integer_list, heterogeneous_list, []]
list_length = len(integer_list)     # equals 3
list_sum    = sum(integer_list)     # equals 6

In [None]:
# You can get or set the nth element of a list with square brackets

In [8]:
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
zero = x[0]          # equals 0, lists are 0-indexed
one = x[1]           # equals 1
nine = x[-1]         # equals 9, 'Pythonic' for last element
eight = x[-2]        # equals 8, 'Pythonic' for next-to-last element
x[0] = -1            # now x is [-1, 1, 2, 3, ..., 9]

In [None]:
# You can also use square brackets to slice lists. The slice i:j means all elements from i
# (inclusive) to j (not inclusive). If you leave off the start of the slice, you’ll slice from
# the beginning of the list, and if you leave of the end of the slice, you’ll slice until the
# end of the list

In [9]:
first_three = x[:3]                 # [-1, 1, 2]
three_to_end = x[3:]                # [3, 4, ..., 9]
one_to_four = x[1:5]                # [1, 2, 3, 4]
last_three = x[-3:]                 # [7, 8, 9]
without_first_and_last = x[1:-1]    # [1, 2, ..., 8]
copy_of_x = x[:]                    # [-1, 1, 2, ..., 9]

In [None]:
# You can similarly slice strings and other “sequential” types.
# A slice can take a third argument to indicate its stride, which can be negative

In [10]:
every_third = x[::3]                 # [-1, 3, 6, 9]
five_to_three = x[5:2:-1]            # [5, 4, 3]

In [None]:
# Python has an in operator to check for list membership

In [None]:
1 in [1, 2, 3]    # True
0 in [1, 2, 3]    # False

In [None]:
# It is easy to concatenate lists together. If you want to modify a list in place, you can use extend to add items from another collection

In [11]:
x = [1, 2, 3]
x.extend([4, 5, 6])     # x is now [1, 2, 3, 4, 5, 6]

In [None]:
# If you don’t want to modify x, you can use list addition

In [12]:
x = [1, 2, 3]
y = x + [4, 5, 6]

In [None]:
# More frequently we will append to lists one item at a time

In [None]:
x = [1, 2, 3]
x.append(0)      # x is now [1, 2, 3, 0]
y = x[-1]        # equals 0
z = len(x)       # equals 4

# TUPLES

In [13]:
my_list = [1, 2]
my_tuple = (1, 2)
other_tuple = 3, 4
my_list[1] = 3      # my_list is now [1, 3]
try:
    my_tuple[1] = 3
except TypeError:
    print("cannot modify a tuple")

cannot modify a tuple


In [None]:
# Tuples are a convenient way to return multiple values from functions

In [14]:
def sum_and_product(x, y):
    return (x + y), (x * y)
sp = sum_and_product(2, 3)     # sp is (5, 6)
s, p = sum_and_product(5, 10)  # s is 15, p is 50

In [None]:
# Tuples (and lists) can also be used for multiple assignment

In [15]:
x, y = 1, 2     # now x is 1, y is 2
x, y = y, x     # Pythonic way to swap variables; now x is 2, y is 1

# DICTIONARIES

In [16]:
empty_dict = {}                     # Pythonic
empty_dict2 = dict()                # less Pythonic
grades = {"Joel": 80, "Tim": 95}    # dictionary literal

In [None]:
# You can look up the value for a key using square brackets

In [17]:
joels_grade = grades["Joel"]

In [None]:
# But you’ll get a KeyError if you ask for a key that’s not in the dictionary

In [18]:
try:
    kates_grade = grades["Kate"]
except KeyError:
    print("no grade for Kate!")

no grade for Kate!


In [None]:
# You can check for the existence of a key using in

In [19]:
joel_has_grade = "Joel" in grades     # True
kate_has_grade = "Kate" in grades     # False

In [None]:
# Dictionaries have a get method that returns a default value (instead of raising an exception) when you look up a key that’s not in the dictionary

In [20]:
joels_grade = grades.get("Joel", 0)   # equals 80
kates_grade = grades.get("Kate", 0)   # equals 0
no_ones_grade = grades.get("No One")  # default is None

In [None]:
# You can assign key/value pairs using the same square brackets

In [21]:
grades["Tim"] = 99                    # replaces the old value
grades["Kate"] = 100                  # adds a third entry
num_students = len(grades)            # equals 3

In [None]:
# you can use dictionaries to represent structured data

In [22]:
tweet = {
    "user" : "joelgrus",
    "text" : "Data Science is Awesome",
    "retweet_count" : 100,
    "hashtags" : ["#data", "#science", "#datascience", "#awesome", "#yolo"]
}

In [23]:
# Besides looking for specific keys, we can look at all of them

In [24]:
tweet_keys   = tweet.keys()     # iterable for the keys
tweet_values = tweet.values()   # iterable for the values
tweet_items  = tweet.items()    # iterable for the (key, value) tuples

In [25]:
"user" in tweet_keys            # True, but not Pythonic
"user" in tweet                 # Pythonic way of checking for keys
"joelgrus" in tweet_values      # True (slow but the only way to check)

True

## DEFAULTDICT

In [None]:
# Imagine that you’re trying to count the words in a document. An obvious approach is
# to create a dictionary in which the keys are words and the values are counts. As you
# check each word, you can increment its count if it’s already in the dictionary and add
# it to the dictionary if it’s not

In [None]:
word_counts = {}
for word in document:
    if word in word_counts:
        word_counts[word] += 1
    else:
        word_counts[word] = 1

In [None]:
# You could also use the “forgiveness is better than permission” approach and just handle the exception from trying to look up a missing key

In [None]:
word_counts = {}
for word in document:
    try:
        word_counts[word] += 1
    except KeyError:
        word_counts[word] = 1

In [None]:
# A third approach is to use get, which behaves gracefully for missing keys

In [None]:
word_counts = {}
for word in document:
    previous_count = word_counts.get(word, 0)
    word_counts[word] = previous_count + 1

In [None]:
# Every one of these is slightly unwieldy, which is why defaultdict is useful. A
# defaultdict is like a regular dictionary, except that when you try to look up a key it
# doesn’t contain, it first adds a value for it using a zero-argument function you pro‐
# vided when you created it. In order to use defaultdicts, you have to import them
# from collections

In [None]:
from collections import defaultdict
word_counts = defaultdict(int)          # int() produces 0
for word in document:
    word_counts[word] += 1

In [None]:
# They can also be useful with list or dict, or even your own functions

In [None]:
dd_list = defaultdict(list)             # list() produces an empty list
dd_list[2].append(1)                    # now dd_list contains {2: [1]}
dd_dict = defaultdict(dict)             # dict() produces an empty dict
dd_dict["Joel"]["City"] = "Seattle"     # {"Joel" : {"City": Seattle"}}
dd_pair = defaultdict(lambda: [0, 0])
dd_pair[2][1] = 1                       # now dd_pair contains {2: [0, 1]}

# COUNTERS

In [None]:
from collections import Counter
c = Counter([0, 1, 2, 0])

In [None]:
# This gives us a very simple way to solve our word_counts problem

In [None]:
# recall, document is a list of words
word_counts = Counter(document)

In [None]:
# A Counter instance has a most_common method that is frequently useful

In [None]:
# print the 10 most common words and their counts
for word, count in word_counts.most_common(10):
    print(word, count)

# SETS

In [27]:
primes_below_10 = {2, 3, 5, 7}

In [None]:
# However, that doesn’t work for empty sets, as {} already means “empty dict.” In that case you’ll need to use set() itself

In [28]:
s = set()
s.add(1)       # s is now {1}
s.add(2)       # s is now {1, 2}
s.add(2)       # s is still {1, 2}
x = len(s)     # equals 2
y = 2 in s     # equals True
z = 3 in s     # equals False

In [29]:
# We’ll use sets for two main reasons. The first is that in is a very fast operation on sets.
# If we have a large collection of items that we want to use for a membership test, a set
# is more appropriate than a list

In [None]:
stopwords_list = ["a", "an", "at"] + hundreds_of_other_words + ["yet", "you"]
"zip" in stopwords_list     # False, but have to check every element
stopwords_set = set(stopwords_list)
"zip" in stopwords_set      # very fast to check

In [None]:
# The second reason is to find the distinct items in a collection

In [None]:
item_list = [1, 2, 3, 1, 2, 3]
num_items = len(item_list)                # 6
item_set = set(item_list)                 # {1, 2, 3}
num_distinct_items = len(item_set)        # 3
distinct_item_list = list(item_set)       # [1, 2, 3]

# CONTROL FLOW

In [31]:
if 1 > 2:
    message = "if only 1 were greater than two..."
elif 1 > 3:
    message = "elif stands for 'else if'"
else:
    message = "when all else fails use else (if you want to)"

In [None]:
# You can also write a ternary if-then-else on one line, which we will do occasionally

In [None]:
parity = "even" if x % 2 == 0 else "odd"

In [None]:
# Python has a while loop

In [None]:
x = 0
while x < 10:
    print(f"{x} is less than 10")
    x += 1

In [None]:
# although more often we’ll use for and in

In [None]:
# range(10) is the numbers 0, 1, ..., 9
for x in range(10):
    print(f"{x} is less than 10")

In [None]:
# If you need more complex logic, you can use continue and break

In [None]:
for x in range(10):
    if x == 3:
Control Flow 
| 25
        continue  # go immediately to the next iteration
    if x == 5:
        break     # quit the loop entirely
    print(x)

# THRUTINESS

In [None]:
one_is_less_than_two = 1 < 2          # equals True
true_equals_false = True == False     # equals False

In [None]:
# Python uses the value None to indicate a nonexistent value. It is similar to other languages’ null

In [None]:
x = None
assert x == None, "this is the not the Pythonic way to check for None"
assert x is None, "this is the Pythonic way to check for None"

In [None]:
# Python has an all function, which takes an iterable and returns True precisely when
# every element is truthy, and an any function, which returns True when at least one element is truthy

In [None]:
all([True, 1, {3}])   # True, all are truthy
all([True, 1, {}])    # False, {} is falsy
any([True, 1, {}])    # True, True is truthy
all([])               # True, no falsy elements in the list
any([])               # False, no truthy elements in the list

# SORTING

In [None]:
x = [4, 1, 2, 3]
y = sorted(x)     # y is [1, 2, 3, 4], x is unchanged
x.sort()          # now x is [1, 2, 3, 4]

In [None]:
# By default, sort (and sorted) sort a list from smallest to largest based on naively comparing the elements to one another

In [None]:
# If you want elements sorted from largest to smallest, you can specify a reverse=True
# parameter. And instead of comparing the elements themselves, you can compare the results of a function that you specify with key

In [None]:
# sort the list by absolute value from largest to smallest
x = sorted([-4, 1, -2, 3], key=abs, reverse=True)  # is [-4, 3, -2, 1]
# sort the words and counts from highest count to lowest
wc = sorted(word_counts.items(),
            key=lambda word_and_count: word_and_count[1],
            reverse=True)

# List Comprehensions

In [None]:
even_numbers = [x for x in range(5) if x % 2 == 0]  # [0, 2, 4]
squares      = [x * x for x in range(5)]            # [0, 1, 4, 9, 16]
even_squares = [x * x for x in even_numbers]        # [0, 4, 16]

In [None]:
# You can similarly turn lists into dictionaries or sets

In [None]:
square_dict = {x: x * x for x in range(5)}  # {0: 0, 1: 1, 2: 4, 3: 9, 4: 16}
square_set  = {x * x for x in [1, -1]}      # {1}

In [None]:
# If you don’t need the value from the list, it’s common to use an underscore as the variable

In [None]:
zeros = [0 for _ in even_numbers]

In [None]:
# A list comprehension can include multiple fors

In [None]:
pairs = [(x, y)
         for x in range(10)
         for y in range(10)]   # 100 pairs (0,0) (0,1) ... (9,8), (9,9)

In [None]:
# and later fors can use the results of earlier ones

In [None]:
increasing_pairs = [(x, y)                       # only pairs with x < y,
                    for x in range(10)           # range(lo, hi) equals
                    for y in range(x + 1, 10)]   # [lo, lo + 1, ..., hi - 1]

# Automated Testing and Assert

In [None]:
# As data scientists, we’ll be writing a lot of code. How can we be confident our code iscorrect?
# One way is with types (discussed shortly), but another way is with automated tests.

In [None]:
# There are elaborate frameworks for writing and running tests, but in this book we’ll restrict ourselves to using assert statements
# which will cause your code to raise an AssertionError if your specified condition is not truthy

In [None]:
assert 1 + 1 == 2
assert 1 + 1 == 2, "1 + 1 should equal 2 but didn't"

In [None]:
# It’s not particularly interesting to assert that 1 + 1 = 2. What’s more interesting is to
# assert that functions you write are doing what you expect them to

In [None]:
def smallest_item(xs):
    return min(xs)
assert smallest_item([10, 20, 5, 40]) == 5
assert smallest_item([1, 0, -1, 2]) == -1

In [None]:
# Another less common use is to assert things about inputs to functions

In [None]:
def smallest_item(xs):
    assert xs, "empty list has no smallest item"
    return min(xs)

# Object-Oriented Programming

In [None]:
# To define a class, you use the class keyword and a PascalCase name

In [32]:
class CountingClicker:
    """A class can/should have a docstring, just like a function"""

In [None]:
# A class contains zero or more member functions. By convention, each takes a first parameter, self, that refers to the particular class instance.

In [None]:
# Normally, a class has a constructor, named __init__. It takes whatever parameters
# you need to construct an instance of your class and does whatever setup you need

In [None]:
def __init__(self, count = 0):
        self.count = count

In [None]:
# Although the constructor has a funny name, we construct instances of the clicker using just the class name

In [None]:
clicker1 = CountingClicker()           # initialized to 0
clicker2 = CountingClicker(100)        # starts with count=100
clicker3 = CountingClicker(count=100)  # more explicit way of doing the same

In [None]:
# Notice that the __init__ method name starts and ends with double underscores.
# These “magic” methods are sometimes called “dunder” methods (doubleUNDERscore, get it?) and represent “special” behaviors.

In [None]:
# Another such method is __repr__, which produces the string representation of a class instance

In [None]:
def __repr__(self):
        return f"CountingClicker(count={self.count})"

In [None]:
# And finally we need to implement the public API of our class

In [None]:
def click(self, num_times = 1):
        """Click the clicker some number of times."""
        self.count += num_times
    def read(self):
        return self.count
    def reset(self):
        self.count = 0

In [None]:
# Having defined it, let’s use assert to write some test cases for our clicker

In [None]:
clicker = CountingClicker()
assert clicker.read() == 0, "clicker should start with count 0"
clicker.click()
clicker.click()
assert clicker.read() == 2, "after two clicks, clicker should have count 2"
clicker.reset()
assert clicker.read() == 0, "after reset, clicker should be back to 0"

In [None]:
# Writing tests like these help us be confident that our code is working the way it’s designed to, 
# and that it remains doing so whenever we make changes to it.

In [None]:
# We’ll also occasionally create subclasses that inherit some of their functionality from a
# parent class. For example, we could create a non-reset-able clicker by using Counting
# Clicker as the base class and overriding the reset method to do nothing

In [None]:
# A subclass inherits all the behavior of its parent class.
class NoResetClicker(CountingClicker):
    # This class has all the same methods as CountingClicker
    # Except that it has a reset method that does nothing.
    def reset(self):
        pass
clicker2 = NoResetClicker()
assert clicker2.read() == 0
clicker2.click()
assert clicker2.read() == 1
clicker2.reset()
assert clicker2.read() == 1, "reset shouldn't do anything"

# Iterables and Generators

In [None]:
# One way to create generators is with functions and the yield operator

In [None]:
def generate_range(n):
    i = 0
    while i < n:
        yield i   # every call to yield produces a value of the generator
        i += 1

In [None]:
# The following loop will consume the yielded values one at a time until none are left

In [None]:
for i in generate_range(10):
    print(f"i: {i}")

In [None]:
# With a generator, you can even create an infinite sequence

In [None]:
def natural_numbers():
    """returns 1, 2, 3, ..."""
    n = 1
    while True:
        yield n
        n += 1

In [None]:
# A second way to create generators is by using for comprehensions wrapped in parentheses

In [None]:
evens_below_20 = (i for i in generate_range(20) if i % 2 == 0)

In [None]:
# Such a “generator comprehension” doesn’t do any work until you iterate over it (using for or next).
# We can use this to build up elaborate data-processing pipelines

In [None]:
# None of these computations *does* anything until we iterate
data = natural_numbers()
evens = (x for x in data if x % 2 == 0)
even_squares = (x ** 2 for x in evens)
even_squares_ending_in_six = (x for x in even_squares if x % 10 == 6)
# and so on

In [None]:
# Not infrequently, when we’re iterating over a list or a generator we’ll want not just the values but also their indices.
#For this common case Python provides an enumerate function, which turns values into pairs (index, value).

In [None]:
names = ["Alice", "Bob", "Charlie", "Debbie"]
# not Pythonic
for i in range(len(names)):
    print(f"name {i} is {names[i]}")
# also not Pythonic
i = 0
for name in names:
    print(f"name {i} is {names[i]}")
    i += 1
# Pythonic
for i, name in enumerate(names):
    print(f"name {i} is {name}")

# Randomness

In [33]:
import random
random.seed(10)  # this ensures we get the same results every time
four_uniform_randoms = [random.random() for _ in range(4)]

In [None]:
# The random module actually produces pseudorandom (that is, deterministic) numbers based on an internal state that you can set with random.
# seed if you want to get reproducible results

In [34]:
random.seed(10)         # set the seed to 10
print(random.random())  # 0.57140259469
random.seed(10)         # reset the seed to 10
print(random.random())  # 0.57140259469 again

0.5714025946899135
0.5714025946899135


In [None]:
# We’ll sometimes use random.randrange, which takes either one or two arguments and returns an element chosen randomly from the corresponding range

In [35]:
random.randrange(10)    # choose randomly from range(10) = [0, 1, ..., 9]
random.randrange(3, 6)  # choose randomly from range(3, 6) = [3, 4, 5]

4

In [None]:
# There are a few more methods that we’ll sometimes find convenient. For example, random.shuffle randomly reorders the elements of a list

In [36]:
up_to_ten = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
random.shuffle(up_to_ten)
print(up_to_ten)
# [7, 2, 6, 8, 9, 4, 10, 1, 3, 5]   (your results will probably be different)

[5, 6, 9, 2, 3, 7, 8, 4, 1, 10]


In [None]:
# If you need to randomly pick one element from a list, you can use random.choice

In [37]:
my_best_friend = random.choice(["Alice", "Bob", "Charlie"])

In [None]:
# And if you need to randomly choose a sample of elements without replacement (i.e., with no duplicates), you can use random.sample

In [38]:
lottery_numbers = range(60)
winning_numbers = random.sample(lottery_numbers, 6)  # [16, 36, 10, 6, 25, 9]

In [None]:
# To choose a sample of elements with replacement (i.e., allowing duplicates), you can just make multiple calls to random.choice

In [40]:
four_with_replacement = [random.choice(range(10)) for _ in range(4)]
print(four_with_replacement)

[6, 4, 4, 7]


# Regular Expressions

In [41]:
import re
re_examples = [                        # All of these are True, because
    not re.match("a", "cat"),              #  'cat' doesn't start with 'a'
    re.search("a", "cat"),                 #  'cat' has an 'a' in it
    not re.search("c", "dog"),             #  'dog' doesn't have a 'c' in it.
    3 == len(re.split("[ab]", "carbs")),   #  Split on a or b to ['c','r','s'].
    "R-D-" == re.sub("[0-9]", "-", "R2D2") #  Replace digits with dashes.
    ]
assert all(re_examples), "all the regex examples should be True"

In [None]:
# One important thing to note is that re.match checks whether the beginning of a string matches a regular expression,
# while re.search checks whether any part of a string matches a regular expression. At some point you will mix these two up and it will cause you grief.

# zip and Argument Unpacking

In [None]:
# The zip function transforms multiple iterables into a single iterable of tuples of corresponding function

In [42]:
list1 = ['a', 'b', 'c']
list2 = [1, 2, 3]
# zip is lazy, so you have to do something like the following
[pair for pair in zip(list1, list2)]    # is [('a', 1), ('b', 2), ('c', 3)]

[('a', 1), ('b', 2), ('c', 3)]

In [None]:
# You can also “unzip” a list using a strange trick

In [43]:
pairs = [('a', 1), ('b', 2), ('c', 3)]
letters, numbers = zip(*pairs)

In [None]:
# You can use argument unpacking with any function

In [44]:
def add(a, b): return a + b
add(1, 2)      # returns 3
try:
    add([1, 2])
except TypeError:
    print("add expects two inputs")
add(*[1, 2])   # returns 3

add expects two inputs


3

# args and kwargs

In [None]:
# Let’s say we want to create a higher-order function that takes as input some function f
# and returns a new function that for any input returns twice the value of f

In [45]:
def doubler(f):
    # Here we define a new function that keeps a reference to f
    def g(x):
        return 2 * f(x)
    # And return that new function
    return g

In [None]:
# This works in some cases

In [46]:
def f1(x):
    return x + 1
g = doubler(f1)
assert g(3) == 8,  "(3 + 1) * 2 should equal 8"
assert g(-1) == 0, "(-1 + 1) * 2 should equal 0"

In [None]:
# However, it doesn’t work with functions that take more than a single argument

In [47]:
def f2(x, y):
    return x + y
g = doubler(f2)
try:
    g(1, 2)
except TypeError:
    print("as defined, g only takes one argument")

as defined, g only takes one argument


In [None]:
# You could do all sorts of strange tricks with this; we will only use it to produce higher-order functions whose inputs can accept arbitrary arguments

In [48]:
def doubler_correct(f):
    """works no matter what kind of inputs f expects"""
    def g(*args, **kwargs):
        """whatever arguments g is supplied, pass them through to f"""
        return 2 * f(*args, **kwargs)
    return g
g = doubler_correct(f2)
assert g(1, 2) == 6, "doubler should work now"

# Type Annotations

In [49]:
def add(a, b):
    return a + b
assert add(10, 5) == 15,                  "+ is valid for numbers"
assert add([1, 2], [3]) == [1, 2, 3],     "+ is valid for lists"
assert add("hi ", "there") == "hi there", "+ is valid for strings"
try:
    add(10, "five")
except TypeError:
    print("cannot add an int to a string")

cannot add an int to a string


In [None]:
# whereas in a statically typed language our functions and objects would have specific types

In [50]:
def add(a: int, b: int) -> int:
    return a + b
add(10, 5)           # you'd like this to be OK
add("hi ", "there")  # you'd like this to be not OK

'hi there'

### How to Write Type Annotations

In [None]:
# The typing module provides a number of parameterized types that we can use to do just this

In [51]:
from typing import List  # note capital L
def total(xs: List[float]) -> float:
    return sum(total)

In [None]:
# In such cases we will supply inline type hints

In [None]:
from typing import Optional
values: List[int] = []
best_so_far: Optional[float] = None  # allowed to be either a float or None

In [None]:
# The typing module contains many other types, only a few of which we’ll ever use

In [None]:
# the type annotations in this snippet are all unnecessary
from typing import Dict, Iterable, Tuple
# keys are strings, values are ints
counts: Dict[str, int] = {'data': 1, 'science': 2}
# lists and generators are both iterable
if lazy:
    evens: Iterable[int] = (x for x in range(10) if x % 2 == 0)
else:
    evens = [0, 2, 4, 6, 8]
# tuples specify a type for each element
triple: Tuple[int, float, int] = (10, 2.3, 5)

In [None]:
# Finally, since Python has first-class functions, we need a type to represent those as well. Here’s a pretty contrived example

In [None]:
from typing import Callable
# The type hint says that repeater is a function that takes
# two arguments, a string and an int, and returns a string.
def twice(repeater: Callable[[str, int], str], s: str) -> str:
    return repeater(s, 2)
def comma_repeater(s: str, n: int) -> str:
    n_copies = [s for _ in range(n)]
    return ', '.join(n_copies)
assert twice(comma_repeater, "type hints") == "type hints, type hints"

In [None]:
# As type annotations are just Python objects, we can assign them to variables to make them easier to refer to

In [None]:
Number = int
Numbers = List[Number]
def total(xs: Numbers) -> Number:
    return sum(xs)