# Chapter 2 - A Crash Course in Python

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

### Whitespace Formatting

In [2]:
for i in [1, 2, 3, 4, 5]:
    print(i)
    for j in [1, 2, 3, 4, 5]:
        print(j)
        print(i + j)
    print(i)
print("Done looping")

1
1
2
2
3
3
4
4
5
5
6
1
2
1
3
2
4
3
5
4
6
5
7
2
3
1
4
2
5
3
6
4
7
5
8
3
4
1
5
2
6
3
7
4
8
5
9
4
5
1
6
2
7
3
8
4
9
5
10
5
Done looping


In [3]:
long_winded_computation = (1, + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 +
                          17 + 18 + 19 + 20)

In [4]:
list_of_lists = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]

In [5]:
easier_to_read_list_of_lists = [[1, 2, 3],
                                [4, 5, 6],
                                [7, 8, 9]]

In [6]:
two_plus_three = 2 + \
                 3

In [7]:
for i in [1, 2, 3, 4, 5]:
    print(i)

1
2
3
4
5


### Modules

In [3]:
import re as regex

In [9]:
# my_regex = re.compile("[0-9]+", regex.I)

In [10]:
# plt.plot(...)
# plt.show()

In [11]:
from collections import defaultdict, Counter
lookup = defaultdict(int)
my_counter = Counter()

### Functions

In [12]:
def double(x):
    return x * 2

In [13]:
double(3)

6

In [14]:
def apply_to_one(f):
    return f(1)

In [15]:
my_double = double
x = apply_to_one(my_double)

In [16]:
y = apply_to_one(lambda x: x + 4)

In [17]:
y

5

In [18]:
# another_double = lambda x: 2 * x -> DON'T DO THIS

In [19]:
def another_double(x):
    return 2 * x

In [20]:
another_double(4)

8

In [21]:
def my_print(message = "my default message"):
    print(message)

In [22]:
my_print()

my default message


In [23]:
my_print("Hello")

Hello


In [24]:
def full_name(first = "What's-his-name", last = "Soemthing"):
    return first + " " + last

In [25]:
full_name("Joaquin", "Romero Flores")

'Joaquin Romero Flores'

### Strings

In [26]:
single_quoted_string = 'data science'
double_quoted_string = "data science"

In [27]:
tab_string = "\t"
len(tab_string)

1

In [28]:
not_tab_string = r"\t"
len(not_tab_string)

2

In [29]:
multi_line_string = """ This is the first line
abd this is the second line
and this is the third line"""

In [30]:
first_name = "Joaquin"
last_name = "Romero"

In [31]:
full_name1 = first_name + " " + last_name
full_name2 = "{0} {1}".format(first_name, last_name)

In [32]:
full_name3 = f"{first_name} {last_name}"
full_name3

'Joaquin Romero'

### Exceptions

In [33]:
try:
    print(0 / 0)
except ZeroDivisionError:
    print("Cannot divide by zero")

Cannot divide by zero


### List

In [34]:
integer_list = [1, 2, 3]
heterogeneous_list = ["string", 0.1, True]
list_of_lists = [integer_list, heterogeneous_list, []]

In [35]:
list_length = len(integer_list)
list_length

3

In [36]:
list_sum = sum(integer_list)
list_sum

6

In [37]:
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [38]:
zero = x[0]
zero

0

In [39]:
one = x[1]
one

1

In [40]:
nine = x[-1]
nine

9

In [41]:
eight = x[-2]

In [42]:
x[0] = -1

In [43]:
x[0]

-1

In [44]:
first_three = x[:3]
first_three

[-1, 1, 2]

In [45]:
three_to_end = x[3:]
three_to_end

[3, 4, 5, 6, 7, 8, 9]

In [46]:
one_to_four = x[1:5]
one_to_four

[1, 2, 3, 4]

In [47]:
last_three = x[-3:]
last_three

[7, 8, 9]

In [48]:
without_first_and_last = x[1:-1]
without_first_and_last

[1, 2, 3, 4, 5, 6, 7, 8]

In [49]:
copy_of_x = x[:]
copy_of_x

[-1, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [50]:
every_three = x[::3]

In [51]:
five_to_three = x[5:2:-1]
five_to_three

[5, 4, 3]

In [52]:
1 in [1, 2, 3]

True

In [53]:
0 in [1, 2, 3]

False

In [54]:
x = [1, 2, 3]
x.extend([4, 5, 6])

In [55]:
x = [1, 2, 3]
y = x + [4, 5, 6]

In [56]:
y

[1, 2, 3, 4, 5, 6]

In [57]:
x = [1, 2, 3]

In [58]:
x.append(0)

In [59]:
y = x[-1]
y

0

In [60]:
z = len(x)
z

4

In [61]:
x, y = [1, 2]

In [62]:
_, y = [1, 2]

### Tuples

In [63]:
my_list = [1, 2]
my_tuple = (1, 2)
other_tuple = 3, 4

In [64]:
my_list[1] = 3

In [65]:
try:
    my_tuple[1] = 3
except TypeError:
    print("cannot modify a tuple")

cannot modify a tuple


In [66]:
def sum_and_product(x, y):
    return (x + y), (x * y)

In [67]:
sp = sum_and_product(2, 3)

In [68]:
s, p = sum_and_product(5, 10)

In [69]:
x, y = 1, 2
x, y = y, x

### Dictionaries

In [70]:
empty_dict = {}
empty_dict2 = dict()
grades = {"Joel": 80, "Tim": 95}

In [71]:
joels_grade = grades["Joel"]

In [72]:
try:
    kates_grade = grades["Kate"]
except KeyError:
    print("No grade for Kate!")

No grade for Kate!


In [73]:
joel_has_grade = "Joel" in grades
joel_has_grade

True

In [74]:
kate_has_grade = "Kate" in grades
kate_has_grade

False

In [75]:
joels_grade = grades.get("Joel", 0)
Kate_grade = grades.get("Kate", 0)
no_ones_grade = grades.get("No One")

In [76]:
grades["Tim"] = 99
grades["kate"] = 100
num_students = len(grades)

In [77]:
tweet = {
    "user" : "joelgrus",
    "text" : "Data Science is Awesome",
    "retweet_count" : 100,
    "hashtags" : ["#data", '#science', '#datascience', '#awesome', '#yolo']
}

In [78]:
tweet_keys = tweet.keys()
tweet_keys

dict_keys(['user', 'text', 'retweet_count', 'hashtags'])

In [79]:
tweet_values = tweet.values()
tweet_values

dict_values(['joelgrus', 'Data Science is Awesome', 100, ['#data', '#science', '#datascience', '#awesome', '#yolo']])

In [80]:
tweet_items = tweet.items()
tweet_items

dict_items([('user', 'joelgrus'), ('text', 'Data Science is Awesome'), ('retweet_count', 100), ('hashtags', ['#data', '#science', '#datascience', '#awesome', '#yolo'])])

#### defauldict

In [81]:
# word_counts = {}
# for word in document:
#     if word in word_counts:
#         word_counts[word] += 1
#     else:
#         word_counts[word] = 1

In [82]:
# word_counts = {}
# for word in document:
#     try:
#         word_counts[word] += 1
#     except KeyError:
#         word_counts[word] = 1

In [83]:
# word_counts = {}
# for word in document:
#     previous_count = word_counts.get(word, 0)
#     word_counts[word] = previous_count + 1

In [4]:
from collections import defaultdict

In [85]:
# word_counts = defaultdict(int)
# for word in document:
#     word_counts[word] += 1

In [86]:
dd_list = defaultdict(list)

In [87]:
dd_list[2].append(1)
dd_list

defaultdict(list, {2: [1]})

In [88]:
dd_dict = defaultdict(dict)
dd_dict["joel"]["City"] = "Seattle"
dd_dict

defaultdict(dict, {'joel': {'City': 'Seattle'}})

In [89]:
dd_pair = defaultdict(lambda: [0, 0])
dd_pair[2][1] = 1
dd_pair

defaultdict(<function __main__.<lambda>()>, {2: [0, 1]})

### Counter

In [5]:
from collections import Counter
c = Counter([0, 1, 2, 0])

In [91]:
# words_counts = Counter(document)

In [92]:
# for word, count in word_counts,most_common(10):
#     print(word, count)

### Sets

In [93]:
prime_below_10 = {2, 3, 5, 7}

In [94]:
s = set()
s.add(1)
s.add(2)
s.add(3)
x = len(s)
y = 2 in s
z = 3 in s

In [95]:
# stopwords_list = ["a", "an", "at"] + hundreds_of_other_words + ["yet", "you"]
# "zip" in stopwords_set

# stopwords_set = set(stopwords_list)
# "zip" in stopwords_set

In [96]:
item_list = [1, 2, 3, 1, 2, 3]
num_items = len(item_list)
item_set = set(item_list)
num_distinct_items = len(item_set)
distinct_item_list = list(item_set)

### Contro Flow

In [97]:
if 1 > 2:
    message = "if only 1 were greater than two..."
elif 1 > 3:
    message: "elif stands for 'else if' "
else:
    message = "when all else fails use else (if you want to)"

In [98]:
parity = "even" if x % 2 == 0 else "odd"

In [99]:
x = 0
while x < 10:
    print(f"{x} is less than 10")
    x += 1

0 is less than 10
1 is less than 10
2 is less than 10
3 is less than 10
4 is less than 10
5 is less than 10
6 is less than 10
7 is less than 10
8 is less than 10
9 is less than 10


In [100]:
for x in range(10):
    print(f"{x} is less than 10")

0 is less than 10
1 is less than 10
2 is less than 10
3 is less than 10
4 is less than 10
5 is less than 10
6 is less than 10
7 is less than 10
8 is less than 10
9 is less than 10


In [101]:
for x in range(10):
    if x == 3:
        continue
    if x == 5:
        break
    print(x)

0
1
2
4


### Thuthiness

In [102]:
one_is_less_than_two = 1 < 2
true_equals_false = True == False

In [103]:
x = None
assert x == None, "this is not the Pythonic way to check for None"
assert x is None, "this is the Pythonic way to check for None"

In [104]:
# s = some_function_that_returns_a_string()
# if s:
#     first_char = s[0]
# else:
#     first_char = ""

In [105]:
# first_char = s and s[0]

In [106]:
safe_x = x or 0

In [107]:
safe_x = x if x is not None else 0

In [108]:
all([True, 1, [3]])
all([True, 1, {}])
any([True, 1, {}])
all([])
any([])

False

### Sorting

In [109]:
x = [4, 1, 2, 3]
y = sorted(x)
x.sort()

In [110]:
x = sorted([-4, 1, -2, 3], key=abs, reverse=True)

In [111]:
# wc = sorted(word_counts.items(),
#             key=lambda word_and_count: word_and_count[1],
#             reverse=True
#            )

### List Comprehension

In [112]:
even_numbers = [x for x in range(5)if x % 2 == 0]
squares = [x * x for x in range(5)]
even_squares = [x * x for x in even_numbers]

In [113]:
square_dict = {x: x *x for x in range(5)}
square_set = {x * x for x in [1, -1]}

In [114]:
zeros = [0 for _ in even_numbers]

In [115]:
pairs = [(x, y)
         for x in range(10)
         for y in range(10)
        ]

In [117]:
increasing_pairs = [(x, y)
                    for x in range(10)
                    for y in range(x + 1, 10)]

### Automated Testing & assert

In [6]:
assert 1 + 1 == 2
assert 1 + 1 == 2, "1 + 1 should equal 2 but didn't"

In [7]:
def smallest_item(xs):
    return min(xs)

assert smallest_item([10, 20, 5, 40]) == 5
assert smallest_item([1, 0, -1, 2]) == -1

In [8]:
def smallest_item(xs):
    assert xs, "empty list has no smallest item"
    return min(xs)

### Object-Oriented Programming

In [9]:
class CountingClicker:
    """A class can/should have a docstring, just like a function"""

In [10]:
def __init__(self, count = 0):
    self.count = count
    
    clicker1 = CountingClicker()
    clicker2 = CountingClicker(100)
    clicker3 = CountingClicker(count=100)

In [11]:
def __repr__(self):
    return f"CountingClicker(count={self.count})"

In [12]:
def click(self, num_times = 1):
    """click the clicker some number of times."""
    self.count += num_times


In [13]:
def read(self):
    return self.count

In [14]:
def reset(self):
    self.count = 0

In [15]:
# clicker = CountingClicker()
# assert clicker.read() == 0, "clicker should start with count 0"
# clicker.click()
# clicker.click()
# assert clicker.read() == 2, "after two clicks, clicker should have count 2"
# clicker.reset()
# assert clicker.read() == 0, "after reset, clicker should be back to 0"

In [16]:
# class NoResetClicker(CountingClicker):
#     # This class has all the same methods as CountingClicker
    
#     # Except that it has a reset method that does nothing.
#     def reset(self):
#         pass
    
# clicker2 = NoResetClicker()
# assert clicker2.read() == 0
# clicker2.click()
# assert clicker2.read() == 1
# clicker.reset()
# assert clicker2.read() == 1, "reset shouldn't do anything"

### Iterables & Generators

In [17]:
def generate_range(n):
    i = 0
    while i < n:
        yield i
        i += 1
        
for i in generate_range(10):
    print(f"i:{i}")

i:0
i:1
i:2
i:3
i:4
i:5
i:6
i:7
i:8
i:9


In [18]:
def natural_numbers():
    """returns 1, 2, 3, ..."""
    n = 1
    while True:
        yield n
        n += 1

In [19]:
even_below_20 = (i for i in generate_range(20) if i % 2 == 0)

In [20]:
data = natural_numbers()
evens = (x for x in data if x % 2 == 0)
even_squares = (x ** 2 for x in evens)
even_squares_ending_in_six = (x for x in even_squares if x % 10 == 6)

In [21]:
names = ["Alice", "Bob", "Charlie", "Debbie"]

In [22]:
for i, name in enumerate(names):
    print(f"name {i} is {name}")

name 0 is Alice
name 1 is Bob
name 2 is Charlie
name 3 is Debbie


### Randomness

In [23]:
import random 

In [24]:
random.seed(10)

In [26]:
four_uniform_randoms = [random.random() for _ in range(4)]
four_uniform_randoms

[0.81332125135732, 0.8235888725334455, 0.6534725339011758, 0.16022955651881965]

In [28]:
random.seed(10)
print(random.random())

0.5714025946899135


In [29]:
random.randrange(10)

6

In [30]:
random.randrange(3, 6)

4

In [32]:
up_to_ten = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
random.shuffle(up_to_ten)
print(up_to_ten)

[7, 9, 5, 8, 1, 3, 10, 4, 2, 6]


In [34]:
my_best_friend = random.choice(["Alice", "Bob", "Charlie"])
my_best_friend

'Bob'

In [36]:
lottery_numbers = range(60)
winning_numbers = random.sample(lottery_numbers, 6)
winning_numbers

[43, 19, 42, 23, 8, 29]

In [37]:
four_with_replacement = [random.choice(range(10)) for _ in range(4)]
print(four_with_replacement)

[3, 7, 9, 6]


### Regular Expresions

In [38]:
import re

In [41]:
re_examples = [
    not re.match("a", "cat"),
    re.search("a", "cat"),
    not re.search("c", "dog"),
    3 == len(re.split("[ab]", "carbs")),
    "R-D" == re.sub("[0-9]", "-", "R2D2")
]

In [43]:
# assert all(re_examples)

### Zip & Argument Unpacking

In [44]:
list1 = ['a', 'b', 'c'] 
list2 = [1, 2, 3]

In [45]:
[pair for pair in zip(list1, list2)]

[('a', 1), ('b', 2), ('c', 3)]

In [46]:
pairs = [('a', 1), ('b', 2), ('c', 3)]
letters, numbers = zip(*pairs)

In [47]:
letters, numbers = zip(('a', 1), ('b', 2), ('c', 3))

In [48]:
def add(a, b):
    return a + b

In [50]:
add(1, 2)
try:
    add([1, 2])
except TypeError:
    print("add expects two inputs")
add(*[1, 2])

add expects two inputs


3

### args & kwargs

In [51]:
def doubler(f):
    def g(x):
        return 2 * f(x)
    
    return g

In [54]:
def f1(x):
    return x + 1
    
g = doubler(f1)
assert g(3) == 8, "(3 + 1) * 2 should equal 8"
assert g(-1) == 0, "(-1 + 1) * 2 should equal 0"

In [55]:
def f2(x, y):
    return x + y

In [56]:
g = doubler(f2)
try:
    g(1, 2)
except TypeError:
    print("as defined, g only takes one argument")

as defined, g only takes one argument


In [58]:
def magic(*args, **kwargs):
    print("unnamed args:", args)
    print("keyword args:", kwargs)
    
magic(1, 2, key="word", key2 = "word2")

unnamed args: (1, 2)
keyword args: {'key': 'word', 'key2': 'word2'}


In [59]:
def other_way_magic(x, y, z):
    return x + y + z

In [63]:
# x_y_list = [1, 2]
# z_dict = {"z": 3}
# asser other_way_magic(*x_y_list, **z_dict) == 6, "1 + 2 + 3 should be 6"

### Type Annotations

In [64]:
def add(a, b):
    return a + b

In [68]:
# assert add(10, 5) == 15
# assert add([1, 2], [3]) == [1, 2, 3]
# assert add("hi", "there") == "hi there"

# try:
#     add(10, "five")
# except TypeError:
#     print("cannot add an int to a string")

In [69]:
def add(a: int, b: int) -> int:
    return a + b

In [70]:
add(10, 5)

15

In [73]:
add("hi ", "there")

'hi there'

In [75]:
from typing import Union

In [76]:
def secretly_ugly_function(value, operation):
    pass


In [78]:
def ugly_function(value: int,
                  operation: Union[str, int, float, bool]) -> int:
    pass

### How to write Type Annotations

In [79]:
def total(xs: list) -> float:
    return sum(total)

In [80]:
from typing import List

In [81]:
def total(xs: List[float]) -> float:
    return sum(total)

In [83]:
x: int = 5

In [84]:
values = []
best_so_far = None

In [85]:
from typing import Optional

In [87]:
values: List[int] = []
best_so_far: Optional[float] = None