In [1]:
import this

The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!


# Paragraph ONE.ONE: Whitespace Formatting

In [1]:
for i in [1, 2, 3, 4, 5]:
    print(i)                     # first line in "for i" block
    for j in [1, 2, 3, 4, 5]:
        print(j)                 # first line in "for j" block
        print(i + j)             # last line in "for j" block
    print(i)                     # last line in "for i" block
print("done looping")

1
1
2
2
3
3
4
4
5
5
6
1
2
1
3
2
4
3
5
4
6
5
7
2
3
1
4
2
5
3
6
4
7
5
8
3
4
1
5
2
6
3
7
4
8
5
9
4
5
1
6
2
7
3
8
4
9
5
10
5
done looping


In [3]:
long_winded_computation = (1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 +
                           13 + 14 + 15 + 16 + 17 + 18 + 19 + 20)

In [4]:
list_of_lists = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]

easier_to_read_list_of_lists = [ [1, 2, 3],
                                 [4, 5, 6],
                                 [7, 8, 9] ]

In [5]:
two_plus_three = 2 + \
                 3

# Paragraph ONE.TWO: Modules

In [11]:
import re
my_regex = re.compile("[0-9]+", re.I)

In [12]:
import re as regex
my_regex = regex.compile("[0-9]+", regex.I)

In [13]:
import matplotlib.pyplot as plt

In [14]:
from collections import defaultdict, Counter
lookup = defaultdict(int)
my_counter = Counter()

In [15]:
match = 10
from re import *    # uh oh, re has a match function
print(match)         # "<function re.match>"

<function match at 0x10ee34730>


# Paragraph ONE.THREE: Functions

In [17]:
def double(x):
    """this is where you put an optional docstring
    that explains what the function does.
    for example, this function multiplies its input by 2"""
    return x * 2

In [18]:
def apply_to_one(f):
    """calls the function f with 1 as its argument"""
    return f(1)

my_double = double             # refers to the previously defined function
x = apply_to_one(my_double)    # equals 2

In [19]:
y = apply_to_one(lambda x: x + 4)      # equals 5

In [20]:
another_double = lambda x: 2 * x       # don't do this

In [21]:
def another_double(x): return 2 * x    # do this instead

In [23]:
def my_print(message="my default message"):
    print (message)

my_print("hello")   # prints 'hello'
my_print()          # prints 'my default message'

hello
my default message


In [24]:
def subtract(a=0, b=0):
    return a - b

subtract(10, 5) # returns 5
subtract(0, 5)  # returns -5
subtract(b=5)   # same as previous


-5

# Paragraph ONE.FOUR: Strings

In [25]:
single_quoted_string = 'data science'
double_quoted_string = "data science"

In [26]:
tab_string = "\t"       # represents the tab character
len(tab_string)         # is 1

1

In [27]:
not_tab_string = r"\t"  # represents the characters '\' and 't'
len(not_tab_string)     # is 2

2

In [28]:
multi_line_string = """This is the first line.
and this is the second line
and this is the third line"""

# Paragraph ONE.FIVE: Exceptions

In [34]:
try:
    print (0 / 0)
except ZeroDivisionError:
    print ("cannot divide by zero")

cannot divide by zero


# Paragraph ONE.SIX: Lists

In [37]:
integer_list = [1, 2, 3]
heterogeneous_list = ["string", 0.1, True]
list_of_lists = [ integer_list, heterogeneous_list, [] ]
list_length = len(integer_list)     # equals 3
list_sum    = sum(integer_list)     # equals 6

In [39]:
x = list(range(10))   # is the list [0, 1, ..., 9]
zero = x[0]     # equals 0, lists are 0-indexed
one = x[1]      # equals 1
nine = x[-1]    # equals 9, 'Pythonic' for last element
eight = x[-2]   # equals 8, 'Pythonic' for next-to-last element
x[0] = -1       # now x is [-1, 1, 2, 3, ..., 9]

In [40]:
first_three   = x[:3]               # [-1, 1, 2]
three_to_end = x[3:]                # [3, 4, ..., 9]
one_to_four = x[1:5]                # [1, 2, 3, 4]
last_three = x[-3:]                 # [7, 8, 9]
without_first_and_last = x[1:-1]    # [1, 2, ..., 8]
copy_of_x = x[:]                    # [-1, 1, 2, ..., 9]

In [41]:
1 in [1, 2, 3]    # True
0 in [1, 2, 3]    # False

False

# Paragraph ONE.SEVEN: Tuples

In [42]:
my_list = [1, 2]
my_tuple = (1, 2)
other_tuple = 3, 4
my_list[1] = 3      # my_list is now [1, 3]

try:
    my_tuple[1] = 3
except TypeError:
    print("cannot modify a tuple")

cannot modify a tuple


In [43]:
def sum_and_product(x, y):
    return (x + y),(x * y)

sp = sum_and_product(2, 3)    # equals (5, 6)
s, p = sum_and_product(5, 10) # s is 15, p is 50

In [44]:
x, y = 1, 2     # now x is 1, y is 2
x, y = y, x     # Pythonic way to swap variables; now x is 2, y is 1

# Paragraph ONE.EIGHT: Dictionaries

In [45]:
empty_dict = {}                         # Pythonic
empty_dict2 = dict()                    # less Pythonic
grades = { "Joel" : 80, "Tim" : 95 }    # dictionary literal
joels_grade = grades["Joel"]            # equals 80

In [46]:
try:
    kates_grade = grades["Kate"]
except KeyError:
    print ( "no grade for Kate!" )

no grade for Kate!


In [47]:
joel_has_grade = "Joel" in grades     # True
kate_has_grade = "Kate" in grades     # False

In [49]:
tweet = {
    "user" : "joelgrus",
    "text" : "Data Science is Awesome",
    "retweet_count" : 100,
    "hashtags" : ["#data", "#science", "#datascience", "#awesome", "#yolo"]
}

In [50]:
tweet_keys   = tweet.keys()     # list of keys
tweet_values = tweet.values()   # list of values
tweet_items  = tweet.items()    # list of (key, value) tuples

In [51]:
"user" in tweet_keys            # True, but uses a slow list in
"user" in tweet                 # more Pythonic, uses faster dict in
"joelgrus" in tweet_values      # True

True

# Paragraph ONE.EIGHT.ONE: Defaultdict

In [17]:
import re
from pathlib import Path
document = re.split('\W+',Path('test.txt').read_text())

In [4]:
word_counts = {}
for word in document:
    if word in word_counts:
        word_counts[word] += 1
    else:
        word_counts[word] = 1

In [5]:
word_counts = {}
for word in document:
    try:
        word_counts[word] += 1
    except KeyError:
        word_counts[word] = 1

In [6]:
word_counts = {}
for word in document:
    previous_count = word_counts.get(word, 0)
    word_counts[word] = previous_count + 1

In [8]:
from collections import defaultdict

word_counts = defaultdict(int)          # int() produces 0
for word in document:
    word_counts[word] += 1

In [9]:
dd_list = defaultdict(list)             # list() produces an empty list
dd_list[2].append(1)                    # now dd_list contains {2: [1]}

In [10]:
dd_dict = defaultdict(dict)             # dict() produces an empty dict
dd_dict["Joel"]["City"] = "Seattle"     # { "Joel" : { "City" : Seattle"}}

In [12]:
dd_pair = defaultdict(lambda: [0, 0])
dd_pair[2][1] = 1                       # now dd_pair contains {2: [0,1]}

# Paragraph ONE.EIGHT.TWO: Counter

In [13]:
from collections import Counter
c = Counter([0, 1, 2, 0])          # c is (basically) { 0 : 2, 1 : 1, 2 : 1 }

In [18]:
word_counts = Counter(document)

In [19]:
# print the 10 most common words and their counts
for word, count in word_counts.most_common(10):
    print ( word, count )

di 29
in 16
e 14
i 12
per 11
del 10
Italia 10
asilo 8
a 8
2 7


# Paragraph ONE.NINE: Sets

In [20]:
s = set()
s.add(1)       # s is now { 1 }
s.add(2)       # s is now { 1, 2 }
s.add(2)       # s is still { 1, 2 }
x = len(s)     # equals 2

In [21]:
y = 2 in s     # equals True
z = 3 in s     # equals False

In [23]:
stopwords_list = ["a","an","at"] + document + ["yet", "you"]

"zip" in stopwords_list     # False, but have to check every element

stopwords_set = set(stopwords_list)
"zip" in stopwords_set      # very fast to check

False

In [24]:
item_list = [1, 2, 3, 1, 2, 3]
num_items = len(item_list)                # 6
item_set = set(item_list)                 # {1, 2, 3}
num_distinct_items = len(item_set)        # 3
distinct_item_list = list(item_set)       # [1, 2, 3]

# Paragraph ONE.TEN: CONTROL FLOW
## if

In [25]:
if 1 > 2:
    message = "if only 1 were greater than two..."
elif 1 > 3:
    message = "elif stands for 'else if'"
else:
    message = "when all else fails use else (if you want to)"

In [26]:
parity = "even" if x % 2 == 0 else "odd"

## while, for

In [32]:
x = 0
while x < 10:
    print ( x, "is less than 10" )
    x += 1

0 is less than 10
1 is less than 10
2 is less than 10
3 is less than 10
4 is less than 10
5 is less than 10
6 is less than 10
7 is less than 10
8 is less than 10
9 is less than 10


In [33]:
for x in range(10):
    print ( x, "is less than 10" )

0 is less than 10
1 is less than 10
2 is less than 10
3 is less than 10
4 is less than 10
5 is less than 10
6 is less than 10
7 is less than 10
8 is less than 10
9 is less than 10


In [34]:
for x in range(10):
    if x == 3:
        continue  # go immediately to the next iteration
    if x == 5:
        break     # quit the loop entirely
    print ( x )


0
1
2
4


# Paragraph ONE.ELEVEN: TRUE and FALSE

In [35]:
one_is_less_than_two = 1 < 2          # equals True
true_equals_false = True == False     # equals False

In [37]:
x = None
print ( x == None )   # prints True, but is not Pythonic
print ( x is None )   # prints True, and is Pythonic

True
True


In [39]:
False
None
[]
{}
""
set()
0
0.0


0.0

In [43]:
def some_function_that_returns_a_string():
    return "minnie"

In [44]:
s = some_function_that_returns_a_string()
if s:
    first_char = s[0]
else:
    first_char = ""

In [45]:
first_char = s and s[0]
safe_x = x or 0

In [46]:
all([True, 1, { 3 }])   # True
all([True, 1, {}])      # False, {} is falsy
any([True, 1, {}])      # True, True is truthy
all([])                 # True, no falsy elements in the list
any([])                 # False, no truthy elements in the list

False