In [1]:
import this

The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!


In [2]:
import re
my_regex = re.compile("[0-9]+", re.I)

In [3]:
from collections import defaultdict, Counter
lookup = defaultdict(int)
my_counter = Counter()

In [4]:
#don't do this, importing * imports all and is bad

match = 10
from re import *
print(match)

<function match at 0x7f117f6e5488>


In [5]:
def double(x):
    return x * 2

#python functions are first-class so you can assign them to variables
#and pass into functions just like any other arguments

def apply_to_one(f):
    "calls the function f with 1 as its argument"
    return f(1)

my_double = double #my_double is the function 'double'
x = apply_to_one(my_double) #function calls my_double with argument 1
print(x) 

2


In [6]:
#can create short anonymous functions called lambdas

y = apply_to_one(lambda x: x + 4)
print(y)

5


In [7]:
#can assign lambdas to variables, but you should probably use 'def' instead

another_double = lambda x: 2 * x #don't do this
def another_double(x): return 2 * x #do this instead

In [8]:
#function parameters can have default arguments

def my_print(message="my default message"):
    print(message)

In [9]:
#can specify argument by name

def subtract(a=0, b=0):
    return a - b

subtract(0, 5)
subtract(b=5) #does same as above

-5

In [10]:
#python uses backslashes to encode special characters

tab_string = "\t"
len(tab_string)

1

In [11]:
#if you actually want the backslashes, use raw strings with r""

not_tab_string = r"\t"
len(not_tab_string)

2

In [12]:
#make multi line strings using triple double quotes

multi_line_string = """This is the first line
and this is the second line
and this is the third line"""

In [13]:
#handle Python exceptions with 'try' and 'except'

try:
    print(0/0)
except ZeroDivisionError:
    print("cannot divide by zero")

cannot divide by zero


In [14]:
#get list from range

x = list(range(10))
print(x)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [15]:
#access last element via negative numbers
nine = x[-1]
eight = x[-2]

In [16]:
#square brackets to slice lists
first_three = x[:3]
three_to_end = x[3:]
last_three = x[-3:]
copy_of_x = x[:]

In [17]:
#check list membership with the 'in' operator

1 in [1,2,3]

True

In [18]:
#concat lists together

x = [1,2,3]
x.extend([4,5,6])
x

[1, 2, 3, 4, 5, 6]

In [19]:
#if you don't want to modify original list

x = [1,2,3]
y = x + [4,5,6]
y

[1, 2, 3, 4, 5, 6]

In [20]:
#most frequently just append to the list one item at a time
x = [1,2,3]
x.append(0)
x

[1, 2, 3, 0]

In [21]:
#can unpack lists if you know how many elements they contain

x, y = [1,2] #x=1, y=2

#common to use underscore for a value you're going to throw away
_, y = [1,2]

In [22]:
#tuples are immutable lists

my_list = [1,2]
my_tuple = (1,2)
other_tuple = 3,4

try:
    my_tuple[1] = 3
except TypeError:
    print("cannot modify a tuple")

cannot modify a tuple


In [23]:
#tuples are a conventient way to return multiple values from functions

def sum_and_product(x, y):
    return (x+y),(x*y)

sp = sum_and_product(2,3) #equals (5,6)
s, p = sum_and_product(5, 10) #s=15, p=50

In [24]:
#tuples can be used for multiple assignment:

x, y = 1, 2
x, y = y, x #pythonic way to swap variables

In [25]:
#dictionaries have values with keys

empty_dict = {}
grades = {"Ben": 100, "Mark": 50} #define with literal values

In [26]:
#get values using a key in square brackets

bens_grade = grades["Ben"]

#but you'll get a KeyError if you ask for a key that's not in the dictionary

try:
    peters_grade = grades["Peter"]
except KeyError:
    print("no grade for Peter!")

no grade for Peter!


In [27]:
#can check the existence of a key using 'in'

ben_has_grade = "Ben" in grades
print(ben_has_grade)

True


In [28]:
#there is a method that returns a default value (instead of an exception)
#when you look up a key that's not in the dictionary

bens_grade = grades.get("Ben", 0) #is 100 as already exists
peters_grade = grades.get("Peter", 0) #is 0 as doesn't exist
no_ones_grade = grades.get("No One") #default default is None

In [29]:
#assign key-value pairs using same square brackets

grades["Ben"] = 80 #updates an entry
grades["Peter"] = 100 #adds another entry
num_students = len(grades) #equals 3

In [30]:
#use dictionaries to represent structured data

tweet = {
 "user" : "joelgrus",
 "text" : "Data Science is Awesome",
 "retweet_count" : 100,
 "hashtags" : ["#data", "#science", "#datascience", "#awesome", "#yolo"]
}

tweet_keys = tweet.keys() #list of keys
tweet_values = tweet.values() #list of values
tweet_items = tweet.items() #list of (key, value) tuples

"user" in tweet_keys #true, but slow
"user" in tweet #faster
"joelgrus" in tweet_values #true

True

In [31]:
#imagine you're trying to count the words in a document
#create a dictionary in which the keys are words and the values are counts
#only increment count if it's already in dictionary

document = []

word_counts = {}
for word in document:
    if word in word_counts:
        word_counts[word] += 1
    else:
        word_counts[word] = 1

#could also handle exception with missing key
word_counts = {}
for word in document:
    try:
        word_counts[word] += 1
    except KeyError:
        word_counts[word] = 1

#or use get to fill in missing keys
word_counts = {}
for word in document:
    previous_count = word_counts.get(word, 0)
    word_counts[word] = previous_count + 1

In [32]:
#all the above are pretty ugly, which is why we use defaultdict
#like a normal dict but has a default argument build in

from collections import defaultdict

word_counts = defaultdict(int) # int() produces 0
for word in document:
    word_counts[word] += 1
    
#other defaults are list (produces empty list) or dict (produces empty dict)
#can specify your own defaults, such as:

dd_pair = defaultdict(lambda: [0,0])
dd_pair[2][1] = 1
print(dd_pair.items())

dict_items([(2, [0, 1])])


In [33]:
#a counter turns a sequence of values nto a defaultdict(int) like object
#that maps keys to counts, mainly used to make histograms

from collections import Counter
c = Counter([0,1,2,0])
c

Counter({0: 2, 1: 1, 2: 1})

In [34]:
#can count words in documents with:

word_counts = Counter(document) #but the document has to be split???

#counter has a 'most_common' method that is frequently useful:
for word, count in word_counts.most_common(10):
    print(word,count)

In [35]:
#a set is a data structure that contains a collection of distinct elements

s = set()
s.add(1) #s = {1}
s.add(1) #s = {1} still

In [36]:
#why sets?
#first, the 'in' operation is very fast on sets
#second, use to easily find distinct items in a collections
stopwords = ["a", "an", "at", "etc"]

"zip" in stopwords #false but has to check every element

stopwords_set = set(stopwords)

"zip" in stopwords_set #false but very fast

False

In [37]:
#perform actions conditionally using control flow
if 1 > 2:
    message = "1 is bigger than 2?"
elif 1 > 3:
    message = "elif is short for else if"
else:
    message = "when all else fails, use else"

In [38]:
#can also use ternary if-then-else
x = 5
parity = "even" if x % 2 == 0 else "odd"

In [39]:
#python has a while loop
x = 0
while x < 10:
    x += 1

In [40]:
#but most times we'll use 'for' and 'in'
for x in range(1):
    x = 1

In [41]:
#use continue and break for more complex logic
for x in range(10):
    if x == 3:
        continue #go immediately to next iteration
    if x == 5:
        break #quit loop entirely
    print(x, end=' ')

0 1 2 4 

In [42]:
#bools in Python are capitalised
one_is_less_than_two = 1 < 2 #true
true_equals_false = True == False #false

In [43]:
#uses None to indicate a nonexistent value, similar to null in other langs
x = None
print(x == None) #true, but not Pythonic
print(x is None) #true and Pythonic?

True
True


In [44]:
#the following are "Falsy"
False
None
[] #empty list
{} #empty dict
set() #empty set
"" #empty string
0
0.0

0.0

In [45]:
def some_function_that_returns_a_string():
    return "string"

s = some_function_that_returns_a_string()

if s:
    first_char = s[0]
else:
    first_char = ""

In [46]:
#simpler way of doing the same is
s = ""
first_char = s and s[0]
#returns its second value when the first is "truthy"
#first value when its not
print(first_char)




In [47]:
#similarly, if x is either a number or possibly None
safe_x = x or 0 #definitely returns a number

In [48]:
#python also has an 'all' function that only returns true when
#every element is truthy
all([True, 1, {3}])

True

In [49]:
#and an 'any' function that returns true when at least 1 element is truthy
any(["", {}, False])

False

In [50]:
#Python can sort in place and return sort
x = [4,1,2,3]
y = sorted(x) #y = [1,2,3,4], x = [4,3,2,1]
x.sort() #x = [1,2,3,4]

In [51]:
#if you want to sort from larges to smallest, you can use 'reverse=True'
#instead of comparing elements themselves you can compare the results
#of a function using a key

#sort the list by absolute value from largest to smallest
x = sorted([-4,1,-2,3], key=abs, reverse=True) #x = [-4,3,-2,1]

#sort words and counts from highest count to lowest
word_counts = {"abacus":1, "banana":2, "crab":0}
wc = sorted(word_counts.items(),
           key=lambda x: x[1],
           reverse=True)
print(wc)

[('banana', 2), ('abacus', 1), ('crab', 0)]


In [52]:
#to do list -> list transforms use list comprehensions
even_numbers = [x for x in range(5) if x % 2 == 0]
squares = [x*x for x in range(5)]
even_squares = [x*x for x in even_numbers]

In [53]:
#can turn lists into dictionaries or sets
square_dict = {x : x*x for x in range(5)}
square_set = {x*x for x in [-1, 1]}

In [54]:
#if you don't use value from the list, convention to use underscore
zeroes = [0 for _ in even_numbers]

In [55]:
#list comprehensions can have multiple fors:
pairs = [(x, y)
         for x in range(10)
         for y in range(10)]
#the later 'fors' use the results of earlier ones

In [56]:
increasing_pairs = [(x, y)
                    for x in range(10)
                    for y in range(x+1, 10)]
#only pairs with x < y
print(increasing_pairs[:5])

[(0, 1), (0, 2), (0, 3), (0, 4), (0, 5)]


In [57]:
#generators and iterators

#problem with lists is that they can grow very big
#i.e. for Range(1000000), Python will create a list of 1 million elements
#if you only need to deal with 1 at a time this is inefficient

#a generator is something you can iterate over (using 'for') 
#but the values are produced only as needed (lazily)

def lazy_range(n):
    """lazy version of range"""
    i = 0
    while i < n:
        yield i
        i += 1

In [58]:
for i in lazy_range(10):
    print(i, end=' ')
    
#python 3's 'range' function is lazy by default

0 1 2 3 4 5 6 7 8 9 

In [59]:
#second way to create generators us by using comprehensions
#that are wrapped in brackets
lazy_evens_below_20 = (i for i in lazy_range(20) if i % 2 == 0)

In [60]:
#randomness
import random

four_uniform_randoms = [random.random() for _ in range(4)]

#random.random() produces numbers uniformly between 0 and 1

In [61]:
#random module actually produces numbers pseudorandomly
#that is they are based on an internal seed state
random.seed(1)
print(random.random())
print(random.random())
random.seed(1)
print(random.random())
print(random.random())

0.13436424411240122
0.8474337369372327
0.13436424411240122
0.8474337369372327


In [62]:
#there's a random.randrange function which chooses
#elements chosing randomly from the corresponding range()
random.randrange(10) #from between [0, ..., 9]
random.randrange(3,6) #from between [3,4,5]

4

In [63]:
#random.shuffle randomly orders elements of a list in place
up_to_ten = list(range(10))
print(up_to_ten)
random.shuffle(up_to_ten)
print(up_to_ten)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[2, 4, 0, 9, 6, 5, 3, 8, 7, 1]


In [64]:
#use random.choice to pick an element from a list
my_best_friend = random.choice(["A", "B", "C"])

In [65]:
#use random.sample to pick elements without replacement (i.e. no duplicates)
lottery_numbers = list(range(60))
winning_numbers = random.sample(lottery_numbers, 6)
print(winning_numbers)

[57, 53, 24, 27, 38, 48]


In [66]:
#to sample from all elements with replacement (i.e. with duplicates)
#just use random.choice multiple times
four_with_replacement = [random.choice(list(range(10))) for _ in range(4)]
print(four_with_replacement)

[0, 7, 4, 3]


In [67]:
#regular expressions are a way of searching text
import re
print(all([ #returns true because:
    not re.match("a", "cat"), #cat doesn't start with a
    re.search("a", "cat"),    #cat does have an a in it
    not re.search("c", "dog"), #dog doesn't have a c in it
    3 == len(re.split("[ab]", "carbs")), #split on a or b to [c,r,s]
    "R-D-" == re.sub("[0-9]", "-", "R2D2") #replace digits with dashes
]))

True


In [68]:
#python has classes that encapsulate data and functions that operate on them
#example, create our own 'Set' class
#need to add items
#remove items
#check if already contains a value
#all of these will be "member" functions so need a dot after the set object

class Set:
    def __init__(self, values=None):
        """
        Constructor. Gets called when you create new Set.
        Use like: s1 = Set() for empty set for s2 = Set([1,2,2,3]) for values
        """
        
        self.dict = {}
        
        if values is not None:
            for value in values:
                self.add(value)
    
    def __repr__(self):
        """a string representation of a Set object"""
        return "Set: " + str(self.dict.keys())
    
    #represent membership by being a key in self.dict with value True
    def add(self, value):
        self.dict[value] = True
        
    #value is in the set if it's a key in the dict
    def contains(self, value):
        return value in self.dict
    
    def remove(self, value):
        del self.dict[value]

In [69]:
#can use the object like
s = Set([1,2,3])
s.add(4)
print(s.contains(4))
s.remove(3)
print(s.contains(3))
print(s)

True
False
Set: dict_keys([1, 2, 4])


In [70]:
#when using functions sometimes we want to psrtially apply (curry)
#functions to create new functions
#consider a function with two variables

def exp(base, power):
    return base ** power

#we want to use it to create a function of one variable 'two_to_the'
#with input of power and output is the result of exp(2, power)
#bad way to do it:

def two_to_the(power):
    return exp(2, power)

#better way is to use functools.partial:

import functools
two_to_the = functools.partial(exp, 2) #now a function of one variable
print(two_to_the(3))

8


In [71]:
#use partial to fill in later arguments if you specify their name
square_of = functools.partial(exp, power=2)
print(square_of(3))

9


In [72]:
#can use map, reduce and filter to provide functional alternatives
#to list comprehensions

#In Python 3+, many processes that iterate over iterables return 
#iterators themselves. In most cases, this ends up saving memory, 
#and should make things go faster.
#HOWEVER, it means we need to convert them into lists

def double(x):
    return x * 2

xs = [1,2,3,4]
twice_xs = [double(x) for x in xs]
print(twice_xs)
twice_xs = list(map(double, xs)) #same as above
print(twice_xs)
list_doubler = functools.partial(map, double) #function that doubles a list
twice_xs = list(list_doubler(xs)) #again, same as above
print(twice_xs)

[2, 4, 6, 8]
[2, 4, 6, 8]
[2, 4, 6, 8]


In [73]:
#can map with multiple arguments to return multiple lists

def multiply(x, y): return x * y

products = list(map(multiply, [1,2], [4,5])) #[1*4, 2*5] = [4,10]

print(products)

[4, 10]


In [74]:
#similarly, filter does the work of a list-comprehension 'if'

def is_even(x):
    """True if x is even, else false"""
    return x % 2 == 0

x_evens = [x for x in xs if is_even(x)]
print(x_evens)
x_evens = list(filter(is_even, xs))
print(x_evens)
list_evener = functools.partial(filter, is_even)
x_evens = list(list_evener(xs))
print(x_evens)

[2, 4]
[2, 4]
[2, 4]


In [75]:
#reduce combines the first two elements of a list and then that result
#with the third and that with the fourth, etc.

x_product = functools.reduce(multiply, xs)
print(x_product) #[1,2,3,4] -> 1*2*3*4 = 24
list_product = functools.partial(functools.reduce, multiply)
x_product = list_product(xs)
print(x_product)

24
24


In [76]:
#sometimes you want to iterate over a list and use both their elements
#and their indexes

documents = ["hello", "world", "how", "are", "you"]

def do_something(i=None, s="default"):
    return s.upper()

#not Pythonic
for i in range(len(documents)):
    document = documents[i]
    do_something(i, document)
    
#also not Pythonic
i = 0
for document in documents:
    do_something(i, document)
    i += 1
    
#pythonic solution is enumerate which produces (index, element) tuples
for i, document in enumerate(documents):
    do_something(i, document)
    
#if you just want the indexes:
for i in range(len(documents)): do_something(i) #not Pythonic

for i, _ in enumerate(documents): do_something(i) #Pythonic

In [77]:
#you can zip two or more lists together
#zip transforms multiple lists into a single of tuples of corresponding
#elements

list1 = ['a', 'b', 'c']
list2 = [1, 2, 3]
list(zip(list1, list2))

[('a', 1), ('b', 2), ('c', 3)]

In [78]:
#can also unzip a list
#the asterisk performs argument unpacking which calls the elements of 'pairs' as individual arguments to zip

pairs = [('a', 1), ('b', 2), ('c', 3)]
letters, numbers = list(zip(*pairs))
print(letters)
print(numbers)

('a', 'b', 'c')
(1, 2, 3)


In [79]:
#can use argument unpacking with any function

def add(a, b): return a + b

add(1,2) #returns 3
add(*[1,2]) #also returns 3

3

In [80]:
#args and kwargs
#higher order function that takes in a function and returns twice the value of the return of that function

def doubler(f):
    def g(x):
        return 2 * f(x)
    return g

def f1(x): return x + 1

g = doubler(f1)
print(g(3))
print(g(-1))

8
0


In [81]:
#this however doesn't work when f takes multiple arguments
#we can solve this by specifying a function that takes arbitrary arguments

def magic(*args, **kwargs):
    print("unnamed args", args)
    print("keyword args", kwargs)
    
magic(1, 2, key1='word1', key2='word2')

unnamed args (1, 2)
keyword args {'key1': 'word1', 'key2': 'word2'}


In [82]:
#can also do

x_y_list = [1,2]
z_dict = {'z':3} #this uses '3' for the 'z' argument in the function 'alt_magic'

def alt_magic(x, y, z):
    print("x",x)
    print("y",y)
    print("z",z)
    return x + y + z

print(alt_magic(*x_y_list, **z_dict))

x 1
y 2
z 3
6


In [83]:
def doubler_correct(f):
    """works no matter what kind of inputs f expects"""
    def h(*args, **kwargs):
        """whatever arguments g is supplied, pass them through to f"""
        return 2 * f(*args, **kwargs)
    return h

def f2(x, y):
    return x + y

g = doubler_correct(f2)
print(g(1,2)) # 6

6
