# CHAPTER 02 - A CRASH COURSE IN PYTHON

## THE ZEN OF PYTHON

In [2]:
import this

The Zen of Python, by Tim Peters

Beautiful is better than ugly.
Explicit is better than implicit.
Simple is better than complex.
Complex is better than complicated.
Flat is better than nested.
Sparse is better than dense.
Readability counts.
Special cases aren't special enough to break the rules.
Although practicality beats purity.
Errors should never pass silently.
Unless explicitly silenced.
In the face of ambiguity, refuse the temptation to guess.
There should be one-- and preferably only one --obvious way to do it.
Although that way may not be obvious at first unless you're Dutch.
Now is better than never.
Although never is often better than *right* now.
If the implementation is hard to explain, it's a bad idea.
If the implementation is easy to explain, it may be a good idea.
Namespaces are one honking great idea -- let's do more of those!


## GETTING PYTHON

[Anaconda Individual Edition](https://www.anaconda.com/products/individual)

## VIRTUAL ENVIRONMENTS

In [9]:
# create a Python 3.6 environment named "dsfs"
# !conda create -n dsfs python=3.6

^C


In [None]:
#
# To activate this environment, use:
# > source activate dsfs
#
# To deactivate an active environment, use:
# > source deactivate
#

In [None]:
# !source activate dsfs

In [None]:
# !python -m pip install ipython

In [None]:
## WHITESPACE FORMATTIN

In [14]:
# The pound sign marks the start of a comment. Python itself
# ignores the comments, but they're helpful for anyone reading the code.
for i in [1, 2, 3, 4, 5]:
    print(i)                        # first line in "for i" block
    for j in [1, 2, 3, 4, 5]:
        print(j)                    # first line in "for j" block
        print(i + j)                # last line in "for j" block
    print(i)                        # last line in "for i" block
print('done looping')

1
1
2
2
3
3
4
4
5
5
6
1
2
1
3
2
4
3
5
4
6
5
7
2
3
1
4
2
5
3
6
4
7
5
8
3
4
1
5
2
6
3
7
4
8
5
9
4
5
1
6
2
7
3
8
4
9
5
10
5
done looping


In [15]:
long_winded_computation = (1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 
                           12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20)

In [17]:
list_of_lists = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]

In [18]:
easier_to_read_list_of_lists = [[1, 2, 3],
                                [4, 5, 6],
                                [7, 8, 9]]

In [19]:
two_plus_three = 2 + \
                 3

## MODULES

In [22]:
import re
my_regex = re.compile("[0-9]+", re.I)

In [23]:
import re as regex
my_regex = regex.compile("[0-9]+", regex.I)

In [32]:
from collections import defaultdict, Counter
lookup = defaultdict(int)
my_counter = Counter()

In [33]:
match = 10
from re import *        # uh oh, re has a match function
print(match)            # "<function match at 0x10281e6a8>"

<function match at 0x000001F30CEA61F0>


## FUNCITONS

In [35]:
def double(x):
    """
    This is where you put an optional docstring that explains what the
    function does. For example, this function multiplies its input by 2.
    """
    return x * 2

In [36]:
def apply_to_one(f):
    """Calls the function f with 1 as its argument"""
    return f(1)

In [37]:
my_double = double # refers to the previously defined function
x = apply_to_one(my_double) # equals 2

In [38]:
y = apply_to_one(lambda x: x + 4) # equals 5

In [39]:
another_double = lambda x: 2 * x # don't do this

In [40]:
def another_double(x):
    """Do this instead"""
    return 2 * x

In [41]:
def my_print(message = "my default message"):
    print(message)

In [42]:
my_print("hello") # prints 'hello'

hello


In [43]:
my_print() # prints 'my default message'

my default message


In [44]:
def full_name(first = "What's-his-name", last = "Something"):
    return first + " " + last

In [45]:
full_name("Joel", "Grus") # "Joel Grus"

'Joel Grus'

In [46]:
full_name("Joel") # "Joel Something"

'Joel Something'

In [47]:
full_name(last="Grus") # "What's-his-name Grus"

"What's-his-name Grus"

## STRINGS

In [50]:
single_quoted_string = 'data science'

In [51]:
double_quoted_string = "data science"

In [52]:
tab_string = "\t" # represents the tab character

In [53]:
len(tab_string) # is 1

1

In [54]:
not_tab_string = r"\t" # represents the characters '\' and 't'

In [55]:
len(not_tab_string) # is 2

2

In [65]:
multi_line_string = """This is the first line. \
and this is the second line \
and this is the third line"""

In [66]:
multi_line_string

'This is the first line. and this is the second line and this is the third line'

In [67]:
first_name = "Joel"

In [68]:
last_name = "Grus"

In [69]:
full_name1 = first_name + " " + last_name # string addition

In [70]:
full_name2 = "{0} {1}".format(first_name, last_name) # string.format

In [71]:
full_name3 = f"{first_name} {last_name}"

## EXECPTIONS

In [73]:
try:
    print(0 / 0)
except ZeroDivisionError:
    print("cannot divide by zero")

cannot divide by zero


## LISTS

In [75]:
integer_list = [1, 2, 3]

In [76]:
heterogeneous_list = ["string", 0.1, True]

In [77]:
list_of_lists = [integer_list, heterogeneous_list, []]

In [86]:
list_length = len(integer_list) # equals 3
list_length

3

In [87]:
list_sum = sum(integer_list) # equals 6
list_sum

6

In [88]:
x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
x

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [89]:
zero = x[0] # equals 0, lists are 0-indexed
zero

0

In [90]:
one = x[1] # equals 1
one

1

In [91]:
nine = x[-1] # equals 9, 'Pythonic' for last element
nine

9

In [92]:
eight = x[-2] # equals 8, 'Pythonic' for next-to-last element
eight

8

In [93]:
x[0] = -1 # now x is [-1, 1, 2, 3, ..., 9]
x

[-1, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [99]:
first_three = x[:3] # [-1, 1, 2]
first_three

[-1, 1, 2]

In [101]:
three_to_end = x[3:] # [3, 4, ..., 9]
three_to_end

[3, 4, 5, 6, 7, 8, 9]

In [102]:
one_to_four = x[1:5] # [1, 2, 3, 4]
one_to_four

[1, 2, 3, 4]

In [103]:
last_three = x[-3:] # [7, 8, 9]
last_three

[7, 8, 9]

In [104]:
without_first_and_last = x[1:-1] # [1, 2, ..., 8]
without_first_and_last

[1, 2, 3, 4, 5, 6, 7, 8]

In [105]:
copy_of_x = x[:] # [-1, 1, 2, ..., 9]
copy_of_x

[-1, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [107]:
every_third = x[::3] # [-1, 3, 6, 9]
every_third

[-1, 3, 6, 9]

In [108]:
five_to_three = x[5:2:-1] # [5, 4, 3]
five_to_three

[5, 4, 3]

In [109]:
1 in [1, 2, 3] # True

True

In [110]:
0 in [1, 2, 3] # False

False

In [111]:
x = [1, 2, 3]

In [112]:
y = x + [4, 5, 6] # y is [1, 2, 3, 4, 5, 6]; x is unchanged

In [113]:
y

[1, 2, 3, 4, 5, 6]

In [116]:
x = [1, 2, 3]
x

[1, 2, 3]

In [117]:
x.append(0) # x is now [1, 2, 3, 0]
x

[1, 2, 3, 0]

In [118]:
y = x[-1] # equals 0
y

0

In [119]:
z = len(x) # equals 4

In [120]:
x, y = [1, 2] # now x is 1, y is 2

In [121]:
x

1

In [122]:
y

2

In [123]:
_, y = [1, 2] # now y == 2, didn't care about the first element

In [124]:
y

2

## TUPLES

In [127]:
my_list = [1, 2]
my_list

[1, 2]

In [128]:
my_tuple = (1, 2)
my_tuple

(1, 2)

In [130]:
other_tuple = 3, 4
other_tuple

(3, 4)

In [131]:
my_list[1] = 3

In [132]:
try:
    my_tuple[1] = 3
except TypeError:
    print("cannot modify a tuple")

cannot modify a tuple


In [133]:
def sum_and_product(x, y):
    return (x + y), (x * y)

In [134]:
sp = sum_and_product(2, 3) # sp is (5, 6)
sp

(5, 6)

In [135]:
s, p = sum_and_product(5, 10) # s is 15, p is 50

In [136]:
s

15

In [137]:
p

50

In [142]:
x, y = 1, 2 # now x is 1, y is 2
x, y

(1, 2)

In [143]:
x, y = y, x # Pythonic way to swap variables; now x is 2, y is 1

In [144]:
x, y

(2, 1)

## DICTIONARIES

In [150]:
empty_dict = {} # Pythonic
empty_dict

{}

In [151]:
empty_dict2 = dict() # less Pythonic
empty_dict2

{}

In [152]:
grades = {"Joel": 80, "Tim": 95} # dictionary literal
grades

{'Joel': 80, 'Tim': 95}

In [153]:
joels_grade = grades["Joel"] # equals 80
joels_grade

80

In [154]:
try:
    kates_grade = grades["Kate"]
except KeyError:
    print("no grade for Kate!")

no grade for Kate!


In [155]:
joel_has_grade = "Joel" in grades # True

In [156]:
joel_has_grade

True

In [157]:
kate_has_grade = "Kate" in grades # False
kate_has_grade

False

In [158]:
joels_grade = grades.get("Joel", 0) # equals 80
joels_grade

80

In [159]:
kates_grade = grades.get("Kate", 0) # equals 0
kate_has_grade

False

In [160]:
no_ones_grade = grades.get("No One") # default is None
no_ones_grade

In [161]:
grades["Tim"] = 99 # replaces the old value
grades["Kate"] = 100 # adds a third entry

In [162]:
num_students = len(grades) # equals 3
num_students

3

In [163]:
tweet = {
    "user" : "joelgrus",
    "text" : "Data Science is Awesome",
    "retweet_count" : 100,
    "hashtags" : ["#data", "#science", "#datascience", "#awesome", "#yolo"]
}

In [164]:
tweet_keys = tweet.keys() # iterable for the keys
tweet_keys

dict_keys(['user', 'text', 'retweet_count', 'hashtags'])

In [165]:
tweet_values = tweet.values() # iterable for the values
tweet_values

dict_values(['joelgrus', 'Data Science is Awesome', 100, ['#data', '#science', '#datascience', '#awesome', '#yolo']])

In [166]:
tweet_items = tweet.items() # iterable for the (key, value) tuples
tweet_items

dict_items([('user', 'joelgrus'), ('text', 'Data Science is Awesome'), ('retweet_count', 100), ('hashtags', ['#data', '#science', '#datascience', '#awesome', '#yolo'])])

In [167]:
"user" in tweet_keys # True, but not Pythonic

True

In [168]:
"user" in tweet # Pythonic way of checking for keys

True

In [169]:
"joelgrus" in tweet_values # True (slow but the only way to check)

True

In [173]:
"joelgrus" in tweet.values()

True

## DEFAULTDICT

In [175]:
word_counts = {}
for word in document:
    if word in word_counts:
        word_counts[word] += 1
    else:
        word_counts[word] = 1

NameError: name 'document' is not defined