# Basics

## Whitespace Formatting

In [1]:
for i in [1, 2, 3, 4, 5]:
    print(i)
    for j in [1, 2, 3, 4, 5]:
        print(j)
        print(i + j)
    print(i)
print("done looping")

1
1
2
2
3
3
4
4
5
5
6
1
2
1
3
2
4
3
5
4
6
5
7
2
3
1
4
2
5
3
6
4
7
5
8
3
4
1
5
2
6
3
7
4
8
5
9
4
5
1
6
2
7
3
8
4
9
5
10
5
done looping


In [2]:
long_winded_computation = (1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20)
long_winded_computation

210

In [3]:
list_of_lists = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
list_of_lists

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

In [4]:
easier_to_read_list_of_lists = [ [1, 2, 3],
                                 [4, 5, 6],
                                 [7, 8, 9] ]
easier_to_read_list_of_lists

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

In [5]:
two_plus_three = 2 + \
                 3
two_plus_three

5

In [6]:
for i in [1, 2, 3, 4, 5]:
    # notice the blank line
    print(i)

1
2
3
4
5


## Modules

In [7]:
import re
my_regex = re.compile("[0-9]+", re.I)
my_regex

re.compile(r'[0-9]+', re.IGNORECASE|re.UNICODE)

In [8]:
import re as regex
my_regex = regex.compile("[0-9]+", regex.I)
my_regex

re.compile(r'[0-9]+', re.IGNORECASE|re.UNICODE)

In [9]:
import matplotlib.pyplot as plt

In [10]:
from collections import defaultdict, Counter
lookup = defaultdict(int)
my_counter = Counter()

lookup
my_counter

Counter()

In [11]:
match = 10
from re import *
print(match)

<function match at 0x7fa7918ff0d0>


## Arithmetic

In [12]:
from __future__ import division

## Functions

In [13]:
def double(x):
    """this is where you put an optional docstring
    that explains what the function does.
    for example, this function multiplies its input by 2"""
    return x * 2

double(400)

800

In [14]:
def apply_to_one(f):
    """calls the function f with 1 as its argument"""
    return f(1)

my_double = double
x = apply_to_one(my_double)

x

2

In [15]:
y = apply_to_one(lambda x: x + 4)

y

5

In [16]:
# don't do this
another_double = lambda x: 2 * x
# do this instead
def another_double(x): return 2 * x

In [17]:
def my_print(message="my default message"):
    print(message)
    # my_print("hello")
    # my_print()
my_print()    

my default message


In [18]:
def subtract(a=0, b=0):
    return a - b

print(subtract(10, 5)) # returns 5
print(subtract(0, 5)) # returns -5
print(subtract(b=5))

5
-5
-5


In [19]:
single_quoted_string = 'data science'
double_quoted_string = "data science"

print(single_quoted_string)
print(double_quoted_string)

data science
data science


In [20]:
tab_string = "\t"
len(tab_string)

1

In [21]:
not_tab_string = r"\t"
len(not_tab_string)

2

In [22]:
multi_line_string = """This is the first line.
and this is the second line
and this is the third line"""

print(multi_line_string)

This is the first line.
and this is the second line
and this is the third line


## Exceptions

In [23]:
try:
    print(0 / 0)
except ZeroDivisionError:
    print("cannot divide by zero")

cannot divide by zero


## Lists

In [24]:
integer_list = [1, 2, 3]
heterogeneous_list = ["string", 0.1, True]
list_of_lists = [ integer_list, heterogeneous_list, [] ]
list_length = len(integer_list)
list_sum = sum(integer_list)

print(integer_list)  
print(heterogeneous_list)
print(list_of_lists)  
print(list_length) 
print(list_sum) 

[1, 2, 3]
['string', 0.1, True]
[[1, 2, 3], ['string', 0.1, True], []]
3
6


In [25]:
x = range(10)
zero = x[0]
one = x[1]
nine = x[-1]
eight = x[-2]

print(x) 
print(zero) 
print(one) 
print(nine) 
print(eight) 

range(0, 10)
0
1
9
8


In [26]:
first_three = x[:3]
three_to_end = x[3:]
one_to_four = x[1:5]
last_three = x[-3:]
without_first_and_last = x[1:-1]
copy_of_x = x[:]

print(first_three) 
print(three_to_end)
print(one_to_four) 
print(last_three) 
print(without_first_and_last)
print(copy_of_x)

range(0, 3)
range(3, 10)
range(1, 5)
range(7, 10)
range(1, 9)
range(0, 10)


In [27]:
a = 1 in [1, 2, 3]
b = 0 in [1, 2, 3]

print(a)
print(b)

True
False


In [28]:
x = [1, 2, 3]
x.extend([4, 5, 6])
x

[1, 2, 3, 4, 5, 6]

In [29]:
x = [1, 2, 3]
y = x + [4, 5, 6]
y 

[1, 2, 3, 4, 5, 6]

In [30]:
x = [1, 2, 3]
x.append(0)
x

[1, 2, 3, 0]

In [31]:
y = x[-1]
z = len(x)

print(y)
print(z)

0
4


In [32]:
x, y = [1, 2]

print(x)
print(y)

1
2


In [33]:
_, y = [1, 2]
print(_)
print(y)

1
2


## Tuples

In [34]:
my_list = [1, 2]
my_tuple = (1, 2)
other_tuple = 3, 4
my_list[1] = 3

print(my_list)
print(my_tuple) 
print(other_tuple)
print(my_list[1])
print(other_tuple[0], other_tuple[1])

[1, 3]
(1, 2)
(3, 4)
3
3 4


In [35]:
try:
    my_tuple[1] = 3
except TypeError:
    print("cannot modify a tuple")

cannot modify a tuple


In [36]:
def sum_and_product(x, y):
    return (x + y),(x * y)

sp = sum_and_product(2, 3)    # equals (5, 6)
s, p = sum_and_product(5, 10) # s is 15, p is 50

In [37]:
x, y = 1, 2
x, y = y, x

print(x, y)

2 1


## Dictionaries

In [38]:
empty_dict = {}
empty_dict2 = dict()
grades = { "Joel" : 80, "Ali" : 95 }

print(empty_dict)
print(empty_dict)
print(grades)

{}
{}
{'Joel': 80, 'Ali': 95}


In [39]:
joels_grade = grades["Joel"]
print(joels_grade)

80


In [40]:
try:
    kates_grade = grades["Kate"]
except KeyError:
    print("no grade for Kate!")

no grade for Kate!


In [41]:
joel_has_grade = "Joel" in grades
kate_has_grade = "Kate" in grades

print(joel_has_grade)
print(kate_has_grade)

True
False


In [42]:
joels_grade = grades.get("Joel", 0)
kates_grade = grades.get("Kate", 0)
no_ones_grade = grades.get("No One")

print(joels_grade)

80


In [43]:
grades["Tim"] = 99
grades["Kate"] = 100
num_students = len(grades)
num_students

4

In [44]:
tweet = {
    "user" : "joelgrus",
    "text" : "Data Science is Awesome",
    "retweet_count" : 100,
    "hashtags" : ["#data", "#science", "#datascience", "#awesome", "#yolo"]
}

tweet_keys = tweet.keys()
tweet_values = tweet.values()
tweet_items = tweet.items()

print("user" in tweet_keys)
print("user" in tweet)
print("joelgrus" in tweet_values)

True
True
True


### defaultdict

In [45]:
document = ["ali", "ali", "jim"]

In [46]:
word_counts = {}
for word in document:
    if word in word_counts:
        word_counts[word] += 1
    else:
        word_counts[word] = 1
        
word_counts

{'ali': 2, 'jim': 1}

In [47]:
word_counts = {}
for word in document:
    try:
        word_counts[word] += 1
    except KeyError:
        word_counts[word] = 1
word_counts

{'ali': 2, 'jim': 1}

In [48]:
word_counts = {}
for word in document:
    previous_count = word_counts.get(word, 0)
    word_counts[word] = previous_count + 1
word_counts

{'ali': 2, 'jim': 1}

In [49]:
from collections import defaultdict

word_counts = defaultdict(int)
for word in document:
    word_counts[word] += 1
    
word_counts

defaultdict(int, {'ali': 2, 'jim': 1})

In [50]:
dd_list = defaultdict(list)            # list() produces an empty list
dd_list[2].append(1)                   # now dd_list contains {2: [1]}

dd_dict = defaultdict(dict)            # dict() produces an empty dict
dd_dict["Joel"]["City"] = "Seattle"    # { "Joel" : { "City" : Seattle"}}

dd_pair = defaultdict(lambda: [0, 0])
dd_pair[2][1] = 1                      # now dd_pair contains {2: [0,1]}

### Counter

In [51]:
from collections import Counter
c = Counter([0, 1, 2, 0])
c

Counter({0: 2, 1: 1, 2: 1})

In [52]:
word_counts = Counter(document)

## Sets

In [53]:
s = set()
s.add(1)
s.add(2)
s.add(2)
x = len(s)
y = 2 in s
z = 3 in s

print(s)
print(x)
print(z)

{1, 2}
2
False


In [54]:
hundreds_of_other_words = ["hundreds", "of", "other", "words"]
stopwords_list = ["a","an","at"] + hundreds_of_other_words + ["yet", "you"]
"of" in stopwords_list

True

In [55]:
stopwords_set = set(stopwords_list)
"zip" in stopwords_set

False

In [56]:
item_list = [1, 2, 3, 1, 2, 3]
num_items = len(item_list)
item_set = set(item_list)
num_distinct_items = len(item_set)
distinct_item_list = list(item_set)

print(item_list)
print(num_items)
print(item_set)
print(num_distinct_items)
print(distinct_item_list)

[1, 2, 3, 1, 2, 3]
6
{1, 2, 3}
3
[1, 2, 3]


## Control Flow

In [57]:
if 1 > 2:
    message = "if only 1 were greater than two..."
elif 1 > 3:
    message = "elif stands for 'else if'"
else:
    message = "when all else fails use else (if you want to)"

message

'when all else fails use else (if you want to)'

In [58]:
parity = "even" if x % 2 == 0 else "odd"
parity

'even'

In [59]:
x = 0
while x < 10:
    print(x, "is less than 10")
    x += 1

0 is less than 10
1 is less than 10
2 is less than 10
3 is less than 10
4 is less than 10
5 is less than 10
6 is less than 10
7 is less than 10
8 is less than 10
9 is less than 10


In [60]:
for x in range(10):
    print (x, "is less than 10")

0 is less than 10
1 is less than 10
2 is less than 10
3 is less than 10
4 is less than 10
5 is less than 10
6 is less than 10
7 is less than 10
8 is less than 10
9 is less than 10


In [61]:
for x in range(10):
    if x == 3:
        continue # go immediately to the next iteration
    if x == 5:
        break
    # quit the loop entirely
    print(x)

0
1
2
4


## Truthiness

In [62]:
one_is_less_than_two = 1 < 2
true_equals_false = True == False

In [63]:
x = None
print (x == None)
print (x is None)

True
True


In [64]:
'''
Python lets you use any value where it expects a Boolean. The following are all “Falsy”:
• False
• None
• [] (an empty list )
• {} (an empty dict )
• ""
• set()
• 0
• 0.0
'''

'\nPython lets you use any value where it expects a Boolean. The following are all “Falsy”:\n• False\n• None\n• [] (an empty list )\n• {} (an empty dict )\n• ""\n• set()\n• 0\n• 0.0\n'

In [65]:
s = "string"
if s:
    first_char = s[0]
else:
    first_char = ""

# A simpler way of doing the same is:
first_char = s and s[0]

first_char

's'

In [66]:
safe_x = x or 0
safe_x

0

In [67]:
a = all([True, 1, { 3 }])
b = all([True, 1, {}])
c = any([True, 1, {}])
d = all([])
e = any([])

print(a)
print(b)
print(c)
print(d)
print(e)

True
False
True
True
False


# The Not-So-Basics

## Sorting

In [68]:
x = [4,1,2,3]
y = sorted(x)
x.sort()
x

[1, 2, 3, 4]

In [69]:
x = sorted([-4,1,-2,3], key=abs, reverse=True)
x

[-4, 3, -2, 1]

## List Comprehensions

In [70]:
even_numbers = [x for x in range(5) if x % 2 == 0]
squares = [x * x for x in range(5)]
even_squares = [x * x for x in even_numbers]

print(even_numbers)
print(squares)
print(even_squares)

[0, 2, 4]
[0, 1, 4, 9, 16]
[0, 4, 16]


In [71]:
square_dict = { x : x * x for x in range(5) }
square_set = { x * x for x in [1, -1] }

print(square_dict)
print(square_set)

{0: 0, 1: 1, 2: 4, 3: 9, 4: 16}
{1}


In [72]:
zeroes = [0 for _ in even_numbers]
print(zeroes)

[0, 0, 0]


In [73]:
pairs = [(x, y)
        for x in range(10)
        for y in range(10)]

# 100 pairs (0,0) (0,1) ... (9,8), (9,9)

In [74]:
increasing_pairs = [(x, y)
                    for x in range(10)
                    for y in range(x + 1, 10)]
increasing_pairs

[(0, 1),
 (0, 2),
 (0, 3),
 (0, 4),
 (0, 5),
 (0, 6),
 (0, 7),
 (0, 8),
 (0, 9),
 (1, 2),
 (1, 3),
 (1, 4),
 (1, 5),
 (1, 6),
 (1, 7),
 (1, 8),
 (1, 9),
 (2, 3),
 (2, 4),
 (2, 5),
 (2, 6),
 (2, 7),
 (2, 8),
 (2, 9),
 (3, 4),
 (3, 5),
 (3, 6),
 (3, 7),
 (3, 8),
 (3, 9),
 (4, 5),
 (4, 6),
 (4, 7),
 (4, 8),
 (4, 9),
 (5, 6),
 (5, 7),
 (5, 8),
 (5, 9),
 (6, 7),
 (6, 8),
 (6, 9),
 (7, 8),
 (7, 9),
 (8, 9)]

## Generators and Iterators

In [75]:
def lazy_range(n):
    """a lazy version of range"""
    i = 0
    while i < n:
        yield i
        i += 1

for i in lazy_range(10):
    print(i)

0
1
2
3
4
5
6
7
8
9


In [76]:
def natural_numbers():
    """returns 1, 2, 3, ..."""
    n = 1
    while True:
        yield n
        n += 1

lazy_evens_below_20 = (i for i in lazy_range(20) if i % 2 == 0)
lazy_evens_below_20

<generator object <genexpr> at 0x7fa764060888>

In [77]:
import random
four_uniform_randoms = [random.random() for _ in range(4)]
four_uniform_randoms

[0.06157116248937067,
 0.2222817634832338,
 0.923165196025376,
 0.2905130697970104]

In [78]:
random.seed(10)
print (random.random())
random.seed(10)
print (random.random())

0.5714025946899135
0.5714025946899135


In [79]:
print(random.randrange(10))
print(random.randrange(3, 6))

6
4


In [80]:
my_best_friend = random.choice(["Alice", "Bob", "Charlie"])
my_best_friend

'Charlie'

In [81]:
lottery_numbers = range(60)
winning_numbers = random.sample(lottery_numbers, 6)

print(lottery_numbers, winning_numbers)

range(0, 60) [0, 13, 29, 52, 31, 56]


In [82]:
four_with_replacement = [random.choice(range(10)) for _ in range(4)]
four_with_replacement

[4, 2, 0, 8]

## Regular Expressions

In [83]:
import re

all([
    not re.match("a", "cat"),
    re.search("a", "cat"),
    not re.search("c", "dog"),
    3 == len(re.split("[ab]", "carbs")),
    "R-D-" == re.sub("[0-9]", "-", "R2D2")
    ])

True

## Object-Oriented Programming

In [84]:
class Set:
    def __init__(self, values=None):
        self.dict = {}
        if values is not None:
            for value in values:
                self.add(value)
   
    def __repr__(self):
        return "Set: " + str(self.dict.keys())
    
    def add(self, value):
        self.dict[value] = True
        # value is in the Set if it's a key in the dictionary
   
    def contains(self, value):
        return value in self.dict
   
    def remove(self, value):
        del self.dict[value]

In [85]:
s = Set([1,2,3])
s.add(4)
print(s.contains(4))
s.remove(3)
print(s.contains(3))

True
False


## Functional Tools

In [86]:
def exp(base, power):
    return base ** power

In [87]:
def two_to_the(power):
    return exp(2, power)

In [88]:
from functools import partial
two_to_the = partial(exp, 2)
print (two_to_the(3))

8


In [89]:
square_of = partial(exp, power=2)
print (square_of(3))

9


In [90]:
def double(x):
    return 2 * x
xs = [1, 2, 3, 4]
twice_xs = [double(x) for x in xs]
twice_xs = map(double, xs)
list_doubler = partial(map, double)
twice_xs = list_doubler(xs)

# [2, 4, 6, 8]
# same as above
# *function* that doubles a list
# again [2, 4, 6, 8]

In [91]:
def multiply(x, y): return x * y
products = map(multiply, [1, 2], [4, 5])
products # [1 * 4, 2 * 5] = [4, 10]

<map at 0x7fa79021d978>

In [92]:
def is_even(x):
    """True if x is even, False if x is odd"""
    return x % 2 == 0

In [93]:
x_evens = [x for x in xs if is_even(x)]
x_evens = filter(is_even, xs)
list_evener = partial(filter, is_even)
x_evens = list_evener(xs)

# [2, 4]
# same as above
# *function* that filters a list
# again [2, 4]

## enumerate

In [94]:
documents = ["ali", "bob"]
# not Pythonic
for i in range(len(documents)):
    document = documents[i]
    print(i, document)
# also not Pythonic
i = 0
for document in documents:
    print(i, document)
    i += 1

0 ali
1 bob
0 ali
1 bob


In [95]:
for i, document in enumerate(documents):
    print(i, document)

0 ali
1 bob


In [96]:
for i in range(len(documents)): print(i)
for i, _ in enumerate(documents): print(i)

0
1
0
1


## zip and Argument Unpacking

In [99]:
list1 = ['a', 'b', 'c']
list2 = [1, 2, 3]
zip(list1, list2) # is [('a', 1), ('b', 2), ('c', 3)]

<zip at 0x7fa790217308>

In [102]:
pairs = [('a', 1), ('b', 2), ('c', 3)]
letters, numbers = zip(*pairs)
letters

('a', 'b', 'c')

In [107]:
a = zip(('a', 1), ('b', 2), ('c', 3)) # [('a','b','c'), ('1','2','3')]

In [113]:
def add(a, b): return a + b
add(1, 2)
add(*[1, 2])

3

## args and kwargs

In [115]:
def doubler(f):
    def g(x):
        return 2 * f(x)
    return g

In [117]:
def f1(x):
    return x + 1

g = doubler(f1)
print (g(3))
print (g(-1))

8
0


In [122]:
def magic(*args, **kwargs):
    print("unnamed args:", args)
    print("keyword args:", kwargs)
magic(1, 2, key="word", key2="word2")

unnamed args: (1, 2)
keyword args: {'key': 'word', 'key2': 'word2'}


In [125]:
def other_way_magic(x, y, z):
    return x + y + z

x_y_list = [1, 2]
z_dict = { "z" : 3 }
print (other_way_magic(*x_y_list, **z_dict))

6


In [127]:
def doubler_correct(f):
    """works no matter what kind of inputs f expects"""

    def g(*args, **kwargs):
        """whatever arguments g is supplied, pass them through to f"""
        return 2 * f(*args, **kwargs)
    return g
g = doubler_correct(f2)
print (g(1, 2)) # 6

6
