# Default arguments

In [None]:
def default_arg(x, exponent=2):
    return x**exponent

In [None]:
default_arg(2)

In [None]:
default_arg(2, 3)

In [None]:
default_arg(2, exponent = 5)

In [None]:
def return_two():
    return 1, 2

In [None]:
return_two()

In [None]:
foo = 12
bar = 42

In [None]:
foo, bar = bar, foo

In [None]:
foo, bar

In [None]:
one, two, three = range(1, 4)

In [None]:
one, two, three

# Dictionaries

In [None]:
my_dict = {1000: 'a', 1024: 'b'}
my_dict

In [None]:
my_dict[1000]

In [None]:
my_dict.keys()

In [None]:
my_dict.values()

In [None]:
my_dict[500] = []

In [None]:
my_dict

In [None]:
hash(12)

In [None]:
hash(2.3)

In [None]:
hash('hello')

In [None]:
hash((1, 'foo'))

In [None]:
hash([1, 'foo'])

In [None]:
hash({'foo': 12})

In [None]:
fns = {'sum': sum,
       'len': len}

In [None]:
fns['sum']([1,2,3])

In [None]:
fns['len']([1,2,3])

In [None]:
my_dict.pop(500)

In [None]:
my_dict

In [None]:
my_dict['foo']

In [None]:
my_dict.get('foo', "Not Here!")

In [None]:
my_dict

In [None]:
my_dict.setdefault(5, 5*5)

In [None]:
my_dict

In [None]:
my_dict.setdefault(5, 5*5 - 1)

In [None]:
5 in my_dict

# Dict comprehensions

In [None]:
[i*2 for i in [1,2,3]]

In [None]:
{i: i**2 for i in [2,3,4]}

# Special dictionaries

In [None]:
from collections import defaultdict, Counter

In [None]:
int

In [None]:
int()

In [None]:
float()

In [None]:
count_dict = defaultdict(int)

In [None]:
count_dict['apple']

In [None]:
count_dict['orange'] += 1

In [None]:
count_dict

In [None]:
counter = Counter([1,2,2,2,3,3,5])

In [None]:
counter

In [None]:
counter.most_common(2)

# Gotchas

In [None]:
def add_one(some_list):
    some_list.append(1)

In [None]:
my_list = []

In [None]:
add_one(my_list)

In [None]:
my_list

In [None]:
def default_list(li = []):
    li.append(1)
    return li

In [None]:
default_list()

In [None]:
default_list()

In [None]:
def better_list(li = None):
    if li == None:
        li = []
    li.append(1)
    return li

In [None]:
better_list(), better_list(), better_list()

In [None]:
my_dict[[1,2]]

In [None]:
{sum: 5}

In [None]:
def my_sum(*args):
    return sum(args)

In [None]:
my_sum(1)

In [None]:
my_sum(1,2,3,4,5,6)

In [None]:
range(10)

In [None]:
my_sum(*range(10))

In [None]:
def print_many(*args):
    for i in args:
        print i

In [None]:
print_many(1,4,'foo')

In [None]:
def apply_to_many(fn, *args):
    return fn(args)

In [None]:
apply_to_many(sum, 1, 2, 3)

In [None]:
def many_named_args(**args):
    print args

In [None]:
many_named_args(arg1=42, arg2=9, name="James")

In [None]:
def wrapped(fn, *args, **kwargs):
    if fn == sum or fn == len:
        return fn(*args, **kwargs)
    else:
        return None

In [None]:
wrapped(sum, [1,2,3])

In [None]:
wrapped(int, 3, key=5)

# Decorators

In [None]:
def cached(fn):
    result_cache = {}
    def inner(*args):
        print result_cache
        if args in result_cache:
            return result_cache[args]
        else:
            result = fn(*args)
            result_cache[args] = result
            return result
    return inner

In [None]:
my_cached_sum = cached(my_sum)

In [None]:
my_cached_sum(1,2,3)

In [None]:
my_cached_sum(1,2,3)

In [None]:
@cached
def my_cached_sum(*args):
    return sum(args)

In [None]:
def my_cached_sum(*args):
    return sum(args)

my_cached_sum = cached(my_cached_sum)

# More on reading .csv

In [None]:
import csv

In [None]:
data = []
with open('data/trends.csv') as trends:
    reader = csv.DictReader(trends)
    for i in reader:
        data.append(i)

In [None]:
data[:5]

# JSON

In [None]:
import json

In [None]:
json.loads("""
{"foo": 12,
 "bar": [1,2,5]}""")

In [None]:
data = json.loads("""
{"foo": 12,
 "bar": [1,2,5]}""")

In [None]:
data, type(data)

In [None]:
print json.dumps(data, indent=2)

# Objects

In [None]:
class Person(object):
    pass

In [None]:
kirk = Person()

In [None]:
kirk

In [None]:
kirk.firstname = "James"

In [None]:
kirk.middlename = "Tiberius"

In [None]:
kirk.lastname = "Kirk"

In [None]:
kirk.firstname

In [None]:
spock = Person()

In [None]:
spock.lastname

In [None]:
class BetterPerson(object):
    is_better = True

In [None]:
guy = BetterPerson()

In [None]:
guy.is_better

In [None]:
guy.is_better = False

In [None]:
guy.is_better

In [None]:
other_guy = BetterPerson()

In [None]:
other_guy.is_better

In [None]:
class EvenBetterPerson(object):
    def __init__(self, firstname, lastname):
        self.firstname = firstname
        self.lastname = lastname

In [None]:
kirk_v2 = EvenBetterPerson("James T.", "Kirk")

In [None]:
kirk_v2.firstname, kirk_v2.lastname

In [None]:
EvenBetterPerson()

In [None]:
class EvenBetterPersonWithPrint(object):
    def __init__(self, firstname, lastname):
        self.firstname = firstname
        self.lastname = lastname
    def print_me(self):
        #print "Person: {me.lastname}, {me.firstname}".format(me=self)
        print "Person: " + self.lastname + ", " + self.firstname

In [None]:
p = EvenBetterPersonWithPrint("Stephen", "Hawking")
p.print_me()

In [None]:
p.firstname

In [None]:
p.lastname

In [None]:
p_prime = p

In [None]:
p_prime.lastname, p_prime.firstname

In [None]:
del p_prime

In [None]:
p_prime

# Inheritance

In [None]:
class PersonWithFullName(EvenBetterPersonWithPrint):
    def get_full_name(self):
        return self.firstname + " " + self.lastname

In [None]:
p = PersonWithFullName("Donald", "Trump")

In [None]:
p.print_me()

In [None]:
p.get_full_name()

In [None]:
type(p)

In [None]:
isinstance(p, EvenBetterPersonWithPrint)

# Web scraping

- Be nice.
- Follow the rules.
- Read the terms and conditions.
- Read the robots.txt.

In [None]:
html = open('example.html').read()

In [None]:
print html

In [None]:
from bs4 import BeautifulSoup

In [None]:
soup = BeautifulSoup(html, 'lxml')

In [None]:
soup.body

In [None]:
soup.head

In [None]:
soup.li

In [None]:
soup('li')

In [None]:
soup.ul

In [None]:
soup.ul.li

In [None]:
soup.li.text

In [None]:
soup.img['src']

In [None]:
soup.img.name

In [None]:
soup.img.parent.name

In [None]:
soup.ul

In [None]:
list(soup.ul.children)

In [None]:
soup('h2')

In [None]:
soup('h2', {'id': 'list-header'})

In [None]:
soup('a')

# Scrapy

You will find the scraping examples at https://github.com/dhesse/stk_inf_scraping.

In [None]:
from urlparse import urljoin

In [None]:
urljoin('http://localhost:8888/files/example.html', 'example2.html')