# The basics of Python 
----------------------
### (little experience is required though)

## Variables pass by reference

In [3]:
#Everything is an object
a = [1, 2, 3]
b= a                  # b point to a
a.append(4)
print b

[1, 2, 3, 4]


## Functions
functions are objects

In [1]:
from matplotlib import pyplot as plt

In [2]:
def double(x):
    """Optional docstring, the first function"""
    return x*2

Python functions are *first-class*, which means that we can assign
them to variables and pass them into functions

In [5]:
def apply_to_one(f):
    return f(1)

x= apply_to_one(double)  
x

2

Default arguments, only need to be specified when you want a value other than the default parameter

In [6]:
def my_print(message="default message"):
    print message
    
my_print(message="hello")
my_print()

hello
default message


## Lambda functions 
(anonymous functions)

In [7]:
y = apply_to_one(lambda x: x + 4)     # 5

In [29]:
#sort a collection of strings by the number of distinc letters
#in each string

strings = ['foo', 'card', 'bar', 'aaaa', 'abab']
strings.sort(key= lambda x: len(set(list(x))))
strings

['aaaa', 'foo', 'abab', 'bar', 'card']

## Exceptions

When something goes wrong, Python raises an *exception*.

In [8]:
try:
    print 0/0
except ZeroDivisionError:
    print "cannot divide by zero"

cannot divide by zero


f = open(path, 'w')

    try:
        write_to_file(f)  
    except:
        print 'Failed'
    else:
        print 'Succeeded'
    finally:
        f.close()

## Lists  
(~arrays)

In [9]:
integer_list = [1,2,3]
list_lenght = len(integer_list)     #3
list_sim = sum(integer_list)        #6

In [8]:
x = range(10)           # see xrange for faster use     
print x

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


In [14]:
first_three = x[:3]
last_three = x[-3:]
without_first_and_last = x[1:-1]
print first_three, last_three, without_first_and_last

[0, 1, 2] [7, 8, 9] [1, 2, 3, 4, 5, 6, 7, 8]


In [16]:
# Multiple assignments
print 1 in x, -1 in x                 

True False


#### Concatenate (append) lists
extend, append, insert

In [26]:
x = [1, 2, 3]
x.extend([4,5])       #Append elements to the iterable
print x

[1, 2, 3, 4, 5]


In [22]:
x = [1,2,3]          # append **object** at the end
x.append([4,5])          # append is faster, as + creates another list 
print x

[1, 2, 3, [4, 5]]


In [28]:
x = [1,2,3]
x.insert(0, [4,5])
print x

[[4, 5], 1, 2, 3]


In [20]:
x, y = [1,2]
print x, y

1 2


In [21]:
_, y = [1,2]         # y =2, don't care about the rest

### Binary seach

In [6]:
# implements a binary search and insertion into a sorted list
import bisect


c = [1, 2, 2, 2, 3, 4, 7]
bisect.bisect(c,6)
bisect.insort(c, 6)
c

[1, 2, 2, 2, 3, 4, 6, 7]

## Tuples
(are lists' **immutable** cousins)

In [24]:
my_tuple = (1, 2)
try: 
    my_tuple[1] = 3
except TypeError:
    print 'cannot modify a tuple'

cannot modify a tuple


Convenient way to return multiple values from functions

In [None]:
def sum_and_product(x, y):
    return (x + y), (x * y)

s, p = sum_and_product(2, 3)      # 5, 6

In [25]:
x, y = 1, 2
x, y = y, x                       #way to swap variables
                                  #See idiomatic python

In [17]:
#Multiple a tuple by an integer
a =('foo', 'bar')*4
a.count('foo')

4

## Dictionaries
Associate *values* with *keys* and allows you to quickly retrieve the
value corresponding the a given key

In [33]:
empty_dict = {}
grades = {"Joel": 80, "Tim": 95}
joel_grade = grades["Joel"]               # 80
beto = {'tuple':(2,1)}
print beto['tuple'][1]

1


Check for the existence of a key using in

In [37]:
joel_has_grade =  "Joel" in grades      #True
kate_has_grade =  "Kate" in grades      #False

#### get
Dictionaries have a *get* method that returns a default value 
(instead of raising an exception)

In [43]:
joels_grade = grades.get("Joel", 0)   # 80
kates_grade = grades.get("Kate", 0)   #  0 

Assigning key-value pairs

In [None]:
grades["Tim"] = 99
grades["Kate"] = 100         # adds a third entry
num_students = len(grades)  # 3

Use dictionaries as a simple way to represent structured data

In [1]:
tweet = {
    "user" : "joelgrus",
    "text" : "Data Science",
    "retweet_count" : 100,
    "hashtags" : ["#data", "#science", "#yolo"]
}

In [2]:
tweet_keys   = tweet.keys()            # List of keys
tweet_values = tweet.values()        # List of values
tweet_items  = tweet.items()
print tweet_items

[('text', 'Data Science'), ('retweet_count', 100), ('hashtags', ['#data', '#science', '#yolo']), ('user', 'joelgrus')]


In [None]:
"user" in tweet_keys
"user" in tweet                    #Pythonic way 

#### Valid dict key types
the keys have to be immutable objects like scalar types (int,float,string)
or tuples. The technical term here is **hashability**

In [10]:
hash('string'), hash((1, 2, [2,3]))

TypeError: unhashable type: 'list'

## defaultdict

Count the words in a document

```
word_counts ={}
for word in document:
    if word in world_counts:
        word_counts[word] += 1
    else:
        word_counts[word] = 1
```

defaultsdict : when you try to look up a key it doesn't contain,
it firts adds a value for it using a zero-argument function you
provided when you created it.

```
from collections import defaultdict

word_counts = defaultdict(int)       #int() produces 0
for word in document:
    word_counts[word] += 1
```

Can also be useful with *list* or *dict* or even your own functions

In [47]:
from collections import defaultdict

dd_list = defaultdict(list)        #list() produces an empty list
dd_list[2].append(1)

dd_dict = defaultdict(dict)         #dict() produces an empty dict
dd_dict["Joel"]["City"] = "Seatle"

dd_pair = defaultdict(lambda: [0, 0])
dd_pair[2][1] = 1

print dd_list, dd_dict, dd_pair

defaultdict(<type 'list'>, {2: [1]}) defaultdict(<type 'dict'>, {'Joel': {'City': 'Seatle'}}) defaultdict(<function <lambda> at 0x106a00b90>, {2: [0, 1]})


## Counter
Turns a sequence of values into a **default(int)**- like object mapping 
keys to count

In [48]:
from collections import Counter

c = Counter([0, 1, 2, 0])         # c is {0: 2, 1: 1, 2: 1}

solves out **world_counts** problem:
```
word_counts = Counter(document)
```

has a **most_common** method frequently useful
```
for word, count in word_counts.most_common(10):
```

## Sets
unordered collection of *unique* elements

In [15]:
s = set([5,3,4,1,1,1])        
s.add(1)               
print s

x = len(s)             # 4
y = 2 in s             # False

set([1, 3, 4, 5])


**in** is a very fast operation on sets.
```
stopwords_list = ["a","an"] + hundreds_of_other_words + ["you"]

"zip" in stopwords_list     #False, but have to check every element

stopwords_set = set(stopwords_list)
"zip" in stopwords_set       # Very fast to check
```

To find *distinct* items in a collection

In [None]:
items_list = [1, 2, 3, 1, 2, 3]
num_items = len(items_list)             #6
items_set = set(items_list)             #{1, 2, 3}
distinct_item_list = list(items_set)    # [1, 2, 3]

In [18]:
a_set = {1, 2, 3, 4, 5}

{1, 2, 3}.issubset(a_set), a_set.issuperset({1, 2, 3})

(True, True)

## Loops 
(continue, pass and break)

In [11]:
for x in range(10):
    if x == 3:
        continue                 # go to the next iteration
    if x == 4:
        pass                     # do nothing
    if x == 5:
        break                   # quit the loop entirely
    print x

0
1
2
4


## Ternary Expressions

You write a *ternary* if-then-else on one line

In [12]:
parity = "even" if x % 2 else "odd"

## Truthiness

In [None]:
one_is_less_that_two = 1 < 2      # equals True

In [51]:
x = None
print x is None

True


The following are Falsy:
```
False, None, [], {}, "", set(), 0, 0.0
```

```
first_char = s and s[0]
```
**and** returns its second value when the first is "truthy", 
the first when it's not.
Similarly, if x is either a number or possibly **None**:
```
safe_x = x or 0
```

In [None]:
all([True, 1, {3}])                 #True, No falsy elements
any([True, 1, {}])                  #True
any([])                              #False, no truthy elements