In [1]:
import this

The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!


In [2]:
import re
my_regex = re.compile("[0-9]+", re.I)

In [3]:
from collections import defaultdict, Counter
lookup = defaultdict(int)
my_counter = Counter()

In [4]:
#don't do this, importing * imports all and is bad

match = 10
from re import *
print(match)

<function match at 0x7f3774d9e2f0>


In [7]:
def double(x):
    return x * 2

#python functions are first-class so you can assign them to variables
#and pass into functions just like any other arguments

def apply_to_one(f):
    "calls the function f with 1 as its argument"
    return f(1)

my_double = double #my_double is the function 'double'
x = apply_to_one(my_double) #function calls my_double with argument 1
print(x) 

2


In [9]:
#can create short anonymous functions called lambdas

y = apply_to_one(lambda x: x + 4)
print(y)

5


In [10]:
#can assign lambdas to variables, but you should probably use 'def' instead

another_double = lambda x: 2 * x #don't do this
def another_double(x): return 2 * x #do this instead

In [11]:
#function parameters can have default arguments

def my_print(message="my default message"):
    print(message)

In [13]:
#can specify argument by name

def subtract(a=0, b=0):
    return a - b

subtract(0, 5)
subtract(b=5) #does same as above

-5

In [14]:
#python uses backslashes to encode special characters

tab_string = "\t"
len(tab_string)

1

In [15]:
#if you actually want the backslashes, use raw strings with r""

not_tab_string = r"\t"
len(not_tab_string)

2

In [17]:
#make multi line strings using triple double quotes

multi_line_string = """This is the first line
and this is the second line
and this is the third line"""

In [19]:
#handle Python exceptions with 'try' and 'except'

try:
    print(0/0)
except ZeroDivisionError:
    print("cannot divide by zero")

cannot divide by zero


In [21]:
#get list from range

x = list(range(10))
print(x)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [22]:
#access last element via negative numbers
nine = x[-1]
eight = x[-2]

In [23]:
#square brackets to slice lists
first_three = x[:3]
three_to_end = x[3:]
last_three = x[-3:]
copy_of_x = x[:]

In [24]:
#check list membership with the 'in' operator

1 in [1,2,3]

True

In [26]:
#concat lists together

x = [1,2,3]
x.extend([4,5,6])
x

[1, 2, 3, 4, 5, 6]

In [27]:
#if you don't want to modify original list

x = [1,2,3]
y = x + [4,5,6]
y

[1, 2, 3, 4, 5, 6]

In [29]:
#most frequently just append to the list one item at a time
x = [1,2,3]
x.append(0)
x

[1, 2, 3, 0]

In [31]:
#can unpack lists if you know how many elements they contain

x, y = [1,2] #x=1, y=2

#common to use underscore for a value you're going to throw away
_, y = [1,2]

In [32]:
#tuples are immutable lists

my_list = [1,2]
my_tuple = (1,2)
other_tuple = 3,4

try:
    my_tuple[1] = 3
except TypeError:
    print("cannot modify a tuple")

cannot modify a tuple


In [34]:
#tuples are a conventient way to return multiple values from functions

def sum_and_product(x, y):
    return (x+y),(x*y)

sp = sum_and_product(2,3) #equals (5,6)
s, p = sum_and_product(5, 10) #s=15, p=50

In [35]:
#tuples can be used for multiple assignment:

x, y = 1, 2
x, y = y, x #pythonic way to swap variables

In [38]:
#dictionaries have values with keys

empty_dict = {}
grades = {"Ben": 100, "Mark": 50} #define with literal values

In [43]:
#get values using a key in square brackets

bens_grade = grades["Ben"]

#but you'll get a KeyError if you ask for a key that's not in the dictionary

try:
    peters_grade = grades["Peter"]
except KeyError:
    print("no grade for Peter!")

no grade for Peter!


In [44]:
#can check the existence of a key using 'in'

ben_has_grade = "Ben" in grades
print(ben_has_grade)

True


In [46]:
#there is a method that returns a default value (instead of an exception)
#when you look up a key that's not in the dictionary

bens_grade = grades.get("Ben", 0) #is 100 as already exists
peters_grade = grades.get("Peter", 0) #is 0 as doesn't exist
no_ones_grade = grades.get("No One") #default default is None

In [47]:
#assign key-value pairs using same square brackets

grades["Ben"] = 80 #updates an entry
grades["Peter"] = 100 #adds another entry
num_students = len(grades) #equals 3

In [48]:
#use dictionaries to represent structured data

tweet = {
 "user" : "joelgrus",
 "text" : "Data Science is Awesome",
 "retweet_count" : 100,
 "hashtags" : ["#data", "#science", "#datascience", "#awesome", "#yolo"]
}

tweet_keys = tweet.keys() #list of keys
tweet_values = tweet.values() #list of values
tweet_items = tweet.items() #list of (key, value) tuples

"user" in tweet_keys #true, but slow
"user" in tweet #faster
"joelgrus" in tweet_values #true

True

In [51]:
#imagine you're trying to count the words in a document
#create a dictionary in which the keys are words and the values are counts
#only increment count if it's already in dictionary

document = []

word_counts = {}
for word in document:
    if word in word_counts:
        word_counts[word] += 1
    else:
        word_counts[word] = 1

#could also handle exception with missing key
word_counts = {}
for word in document:
    try:
        word_counts[word] += 1
    except KeyError:
        word_counts[word] = 1

#or use get to fill in missing keys
word_counts = {}
for word in document:
    previous_count = word_counts.get(word, 0)
    word_counts[word] = previous_count + 1

In [54]:
#all the above are pretty ugly, which is why we use defaultdict
#like a normal dict but has a default argument build in

from collections import defaultdict

word_counts = defaultdict(int) # int() produces 0
for word in document:
    word_counts[word] += 1
    
#other defaults are list (produces empty list) or dict (produces empty dict)
#can specify your own defaults, such as:

dd_pair = defaultdict(lambda: [0,0])
dd_pair[2][1] = 1
print(dd_pair.items())

dict_items([(2, [0, 1])])


In [55]:
#a counter turns a sequence of values nto a defaultdict(int) like object
#that maps keys to counts, mainly used to make histograms

from collections import Counter
c = Counter([0,1,2,0])
c

Counter({0: 2, 1: 1, 2: 1})

In [56]:
#can count words in documents with:

word_counts = Counter(document) #but the document has to be split???

#counter has a 'most_common' method that is frequently useful:
for word, count in word_counts.most_common(10):
    print(word,count)

In [57]:
#a set is a data structure that contains a collection of distinct elements

s = set()
s.add(1) #s = {1}
s.add(1) #s = {1} still

In [None]:
#why sets?
#first, the 'in' operation is very fast on sets
#second, use to easily find distinct items in a collections