# The Not-so basics of Python 
----------------------

## Sorting

In [12]:
x = [4, 1, 2, 3]     
y = sorted(x)              # =[1,2,3,4] x unchanged
x.sort()                   # now x = [1,2,3,4]"

list(reversed(x))

[4, 3, 2, 1]

In [3]:
x = sorted([-4, 1, -2, 3], key=abs, reverse=True)

Sort the words and count from highest to lowest

In [8]:
from collections import Counter

Document = ['hi', 'bye', 'hi', 'morning', 'hi']
word_counts = Counter(Document)

wc = sorted(word_counts.items(),
            key=lambda(word, count): count, reverse=True)
print wc

[('hi', 3), ('bye', 1), ('morning', 1)]


## List Comprehensions
Transfor a list into another list

In [9]:
even_numbers = [x for x in range(5) if x%2 == 0]    # [0,2,4]
even_square = [x * x for x in even_numbers]         # [0,4,8]   

In [10]:
square_dict = {x: x*x for x in range(4)}     #{0: 0, 1: 1, 2: 4, 3: 9}

In [12]:
zeros = [0 for _ in even_numbers]
zeros

[0, 0, 0]

A list comprehension can include multiple forms:

In [25]:
pairs = [(x, y)
         for x in range(10)
         for y in range(10)]        #100 pairs [(0, 0), (0, 1)..(9, 9)]

#### Nested list

In [16]:
all_data = [['Tom', 'Billy', 'Andrew', 'Steven'], ['Susie', 'Casey', 'Jill', 'Eva']]

result = [name for names in all_data for name in names
        if name.count('e') >= 2]
result

['Steven']

## Generator
A *generator* is something that you can iterate over but
whose values are produced only as needed (*lazily*). Python comes
with a *lazy range* function called *xrange*

To create a generator, use the **yield** keyword instead of **return** in a fucntion.

In [15]:
def lazy_range(n):
    i =0
    while i<n:
        yield i
        i+= 1 
x = xrange(3)
x

xrange(3)

### itertools module
see also: imap, ifilter, combinations, permutations, groupby

In [27]:
names = all_data = ['Tom', 'Billy', 'Andrew', 'Steven', 'Susie', 'Casey', 'Jill', 'Eva']

import itertools

first_letter = lambda x: x[0]
for letter, names in itertools.groupby(names, first_letter):
    print letter, list(names)                               #names is a generator

T ['Tom']
B ['Billy']
A ['Andrew']
S ['Steven', 'Susie']
C ['Casey']
J ['Jill']
E ['Eva']


## Randomness

In [30]:
import random

random.seed(10)
four_uniform_randoms = [random.random() for _ in range(4)]
print four_uniform_randoms

[0.5714025946899135, 0.4288890546751146, 0.5780913011344704, 0.20609823213950174]


In [33]:
random.randrange(10)                    #choose from  [0,1,..,9]
random.randrange(3,6)                   #choose from  [3,4,5]

4

In [34]:
up_to_ten = range(10)
random.shuffle(up_to_ten)               #[2,5,4,9 ..] 

In [36]:
my_best_friend = random.choice(["Alice","Bob","Charlie"])

Choose a sample of elements *without* and *with* replacements

In [40]:
lottery_numbers = range(10)
winning_numbers = random.sample(lottery_numbers, 5)
four_with_replacement = [random.choice(range(10))
                        for _ in range(4)]
print winning_numbers, four_with_replacement

[1, 2, 3, 0, 8] [8, 6, 5, 8]


## Regular Expressions

In [21]:
import re
print all([
        not re.match("a","cat"),
        re.search("a", "cat"),
        3 == len(re.split("[ab]","carbs")),
        "R-D-" == re.sub("[0-9]","-","R2D2")
    ])

True


In [22]:
states = ['   Alabama', 'Georgia!', 'Georgia', 'georgia',
             'carolina#', 'virginia?']

def remove_punctuation(value):
    return re.sub('[!#?]', '', value)

clean_ops = [str.strip, remove_punctuation, str.title]

def clean_strings(strings, ops):
    result = []
    for value in strings:
        for function in ops:
            value = function(value)
        result.append(value)
    return result

clean_strings(states, clean_ops)

['Alabama', 'Georgia', 'Georgia', 'Georgia', 'Carolina', 'Virginia']

In [23]:
map(remove_punctuation, states)

['   Alabama', 'Georgia', 'Georgia', 'georgia', 'carolina', 'virginia']

## Object-Oriented Programming

In [10]:
class Det:
    
    def __init__(self, values=None):
        self.dict = {}
        if values is not None:
            for value in values:
                self.add(value)
    
    def __repr_(self):       #representation function, -> print out
        return "Set:" + str(self.dict.keys())
    
    def add(self, value):
        self.dict[value] = True
    
    def contains(self, value):
        return value in self.dict
    
    def remove(self, value):
        del self.dict[value]

                # and use like
                # s = Set([1,2,3])
                # s.add(4)
                # s.remove(3)

## Functional Tools

In [14]:
def exp(base, power):
    return base**power

from functools import partial

two_to_the = partial(exp, 2)
square_of  = partial(exp, power = 2)

print two_to_the(3), square_of(3)

8 9


### map, reduce, filter, partial
currying: deriving new functions from existing ones by
*partial argument application.*

In [16]:
from functools import partial

def double(x):
    return 2*x

xs = [1, 2, 3, 4]

twice_xs = [double(x) for x in xs]           #[2, 4, 6, 8]
twice_xs = map(double, xs)
    # or
list_doubler = partial(map, double)
twice_xs = list_doubler(xs)


Celsius = [39.2, 36.5, 37.3, 37.8]
Fahrenheit = map(lambda x: (float(9)/5)*x + 32, Celsius)

def multiply(x, y):
    return x*y

products = map(multiply, [1,2], [4,5])       #[1*4,2*5] = [4,10]

def is_even(x):
    return x%2 == 0

x_evens = [x for x in xs if is_even(x)]       #[2,4]
x_evens = filter(is_even, xs)
    # or
list_evener = partial(filter, is_even)        #Only if f returns True will the element 
x_evens = list_evener(xs)                     #of the list be included in the result list
                        

x_product = reduce(multiply, xs)               # = 1*2*3*4 = 24
    # or
list_product = partial(reduce, multiply)
x_product = list_product(xs)


#Determining the maximum of a list of numerical values
f = lambda a,b: a if (a > b) else b
reduce(f, [47,11,42,102,13])                  #102

#Calculating the sum of the numbers from 1 to 100
sum_num = reduce(lambda x, y: x+y, range(1,101))   #5050



## Enumerate

In [2]:
some_list = ['foo', 'bar', 'baz']
mapping = dict((v, i) for i, v in enumerate(some_list))
mapping

{'bar': 1, 'baz': 2, 'foo': 0}

``` 
for i, document in enumerate(documents):
        do_something(i, document)
    
for i, _ in enumerate(documents): do_something(i)
```

## zip and Argument Unpacking

In [34]:
list1 = ['a','b','c']
list2 = [1,2,3]
zip(list1, list2)

[('a', 1), ('b', 2), ('c', 3)]

In [35]:
# Unzip the sequence

pairs = [('a', 1), ('b', 2), ('c', 3)]
letters, numbers = zip(*pairs)

In [38]:
def add(a, b): return a + b
add(1,2)                            # 3
#add([1,2])                         # Error
add(*[1,2])                         #3

3

In [3]:
variance = [1,2,4,8,16,32,64,128,256]
bias_squared = [256,128,64,32,16,8,4,2,1]
total_error = [x + y for x,y in zip(variance, bias_squared)]

## Decorators
[link](http://thecodeship.com/patterns/guide-to-python-function-decorators/)

In [17]:

def p_decorate(func):
   def func_wrapper(name):
       return "<p>{0}</p>".format(func(name))
   return func_wrapper

def strong_decorate(func):
    def func_wrapper(name):
        return "<strong>{0}</strong>".format(func(name))
    return func_wrapper

def div_decorate(func):
    def func_wrapper(name):
        return "<div>{0}</div>".format(func(name))
    return func_wrapper

In [18]:
#we could do this:
# get_text = div_decorate(p_decorate(strong_decorate(get_text)))
# but instead

@div_decorate
@p_decorate
@strong_decorate
def get_text(name):
   return "lorem ipsum, {0} dolor sit amet".format(name)

print get_text("John")

<div><p><strong>lorem ipsum, John dolor sit amet</strong></p></div>


## Dates and times

In [1]:
from datetime import datetime, date, time

In [2]:
dt =  datetime(2011, 10, 29, 20, 30, 21)

In [4]:
dt.day, dt.minute

(29, 30)

In [6]:
dt.date(), dt.time()

(datetime.date(2011, 10, 29), datetime.time(20, 30, 21))

In [7]:
# Format a datetime as a string
dt.strftime('%m/%d/%Y %H:%M')

'10/29/2011 20:30'

In [8]:
# Strings can be converted (parsed) into datetime object
datetime.strptime('20091031', '%Y%m%d')

datetime.datetime(2009, 10, 31, 0, 0)

In [9]:
#replace fields of a series of datetimes
dt.replace(minute = 0, second= 0)

datetime.datetime(2011, 10, 29, 20, 0)

In [12]:
#Take the difference of two datetime objects
dt2 = datetime(2011, 11, 15, 22, 30)

delta = dt2 - dt
print delta

17 days, 1:59:39
