# Python Functions

In [1]:
import numpy as np

## Custom functions

### Anatomy

name, arguments, docstring, body, return statement

In [2]:
def func_name(arg1, arg2):
    """Docstring starts wtih a short description.
    
    May have more information here.
    
    arg1 = something
    arg2 = somehting
    
    Returns something
    
    Example usage:
    
    func_name(1, 2)     
    """
    result = arg1 + arg2
    
    return result

In [3]:
help(func_name)

Help on function func_name in module __main__:

func_name(arg1, arg2)
    Docstring starts wtih a short description.
    
    May have more information here.
    
    arg1 = something
    arg2 = somehting
    
    Returns something
    
    Example usage:
    
    func_name(1, 2)



### Function arguments

place, keyword, keyword-only, defaults, mutatble an immutable arguments

In [2]:
def f(a, b, c, *args, **kwargs):
    return a, b, c, args, kwargs

In [3]:
f(1, 2, 3, 4, 5, 6, x=7, y=8, z=9)

(1, 2, 3, (4, 5, 6), {'x': 7, 'y': 8, 'z': 9})

In [4]:
def g(a, b, c, *, x, y, z):
    return a, b, c, x, y, z

In [7]:
try:
    g(1,2,3,4,5,6)
except TypeError as e:
    print(e)

g() takes 3 positional arguments but 6 were given


In [5]:
g(1,2,3,x=4,y=5,z=6)

(1, 2, 3, 4, 5, 6)

In [9]:
def h(a=1, b=2, c=3): #default
    return a, b, c

In [10]:
h()

(1, 2, 3)

In [11]:
h(b=9)

(1, 9, 3)

In [12]:
h(7,8,9)

(7, 8, 9)

In [12]:
def add(x, y):
    """Adds the auguments"""

    return x + y

In [13]:
add(3,4)

7

### Default mutable argumnet

binding is fixed at function definition, the default=None idiom

In [14]:
def f(a, x=[]):
    x.append(a)
    return x #no deterministic and have side effect--because x is list-mutable

In [15]:
f(1)

[1]

In [15]:
f(2)

[1, 2]

In [None]:
f(2,[])

In [16]:
def f(a, x=None): ##make this immutable.
    if x is None:
        x = []
    x.append(a)
    return x

In [17]:
f(1)

[1]

In [18]:
f(2)

[2]

## Pure functions

deterministic, no side effects

In [18]:
def f1(x):
    """Pure."""
    return x**2

In [19]:
def f2(x):
    """Pure if we ignore local state change.
    
    The x in the function baheaves like a copy.
    """
    x = x**2 
    return x

In [20]:
def f3(x):
    """Impure if x is mutable. 
    
    Augmented assignemnt is an in-place operation for mutable structures."""
    x **= 2
    return x

In [22]:
import numpy as np
a = 2
b = np.array([1,2,3])

In [23]:
f1(a), a

(4, 2)

In [24]:
f1(b), b

(array([1, 4, 9]), array([1, 2, 3]))

In [25]:
f2(a), a

(4, 2)

In [26]:
f2(b), b

(array([1, 4, 9]), array([1, 2, 3]))

In [27]:
f3(a), a

(4, 2)

In [28]:
f3(b), b

(array([1, 4, 9]), array([1, 4, 9]))

In [30]:
x=1
x **=2
x

1

In [29]:
def f4():
    """Stochastic functions are tehcnically impure 
    since a global seed is changed between function calls."""
    
    import random
    return random.randint(0,10)

In [30]:
f4(), f4(), f4()

(1, 0, 7)

## Recursive functions

function is defined by itself.


need to define base case +recursive case


Euclidean GCD algorithm
```
gcd(a, 0) = a
gcd(a, b) = gcd(b, a mod b)
```

In [31]:
def factorial(n):
    """Simple recursive funciton."""
    if n == 0:
        return 1
    else:
        return n * factorial(n-1)

In [32]:
factorial(4)
#python cannot do too many recursion:because it store the function each time

24

In [33]:
def factorial1(n):
    """Non-recursive version."""
    s = 1
    for i in range(1, n+1):
        s *= i
    return s

In [34]:
factorial1(4)

1

In [35]:
def gcd(a, b):
    if b == 0:
        return a
    else:
        return gcd(b, a % b)

In [36]:
gcd(16, 24)

8

## Generators

yield and laziness, infinite streams

In [37]:
def count(n=0):
    while True:
        yield n #yield make a generator
        n += 1

In [38]:
for i in count(10):
    print(i)
    if i >= 15:
        break

10
11
12
13
14
15


In [39]:
from itertools import islice

In [40]:
list(islice(count(), 10, 15))

[10, 11, 12, 13, 14]

In [41]:
def updown(n):
    yield from range(n)
    yield from range(n, 0, -1)

In [42]:
updown(5)

<generator object updown at 0x7fc5698a85c8>

In [43]:
list(updown(5))

[0, 1, 2, 3, 4, 5, 4, 3, 2, 1]

## First class functions

functions as arguments, functions as return values

In [44]:
def double(x):
    return x*2

def twice(x, func):
    return func(func(x))

In [45]:
twice(3, double)

12

Example from standard library

In [46]:
xs = 'banana apple guava'.split()

In [47]:
xs

['banana', 'apple', 'guava']

In [48]:
sorted(xs)

['apple', 'banana', 'guava']

In [52]:
sorted(xs, key=lambda s: s.count('a')) #shortcut lambda--use once. the function has no name

['apple', 'guava', 'banana']

In [51]:
lambda(1)

SyntaxError: invalid syntax (<ipython-input-51-37e878958abf>, line 1)

In [54]:
#function can be as input and output
def f(n):
    def g():
        print("hello")
    def h():
        print("goodbye")
    if n == 0:
        return g
    else:
        return h

In [55]:
fool = f(0)
fool()

hello


In [56]:
fool2 = f(1)
fool2()

goodbye


## Function dispatch

Poor man's switch statement

In [57]:
def add(x, y):
    return x + y

def mul(x, y):
    return x * y

In [58]:
ops = {
    'a': add,
    'm': mul
}

In [59]:
items = zip('aammaammam', range(10), range(10))

In [62]:
#zip(xs,ys,zs)
#output:
#(x0,y0,z0)
#(x1,y1,z1)
#stop at the shortest length
#how to print(items)??

<zip object at 0x7fc569860188>


In [56]:
for item in items:
    key, x, y = item
    op = ops[key]
    print(key, x, y, op(x, y))

a 0 0 0
a 1 1 2
m 2 2 4
m 3 3 9
a 4 4 8
a 5 5 10
m 6 6 36
m 7 7 49
a 8 8 16
m 9 9 81


## Closure

Capture of argument in enclosing scope

In [63]:
def f(x):
    def g(y):
        return x + y
    return g

In [64]:
f1 = f(0)
f2 = f(10)

In [65]:
f1(5), f2(5)

(5, 15)

## Decorators

A timing decorator

In [66]:
def timer(f):
    import time
    def g(*args, **kwargs):
        tic = time.time()
        res = f(*args, **kwargs)
        toc = time.time()
        return res, toc-tic
    return g
#decorator:closure on function

In [67]:
def f(n):
    s = 0
    for i in range(n):
        s += i
    return s

In [62]:
timed_f = timer(f)

In [63]:
timed_f(100000)

(4999950000, 0.008098125457763672)

Decorator syntax

In [64]:
@timer
def g(n):
    s = 0
    for i in range(n):
        s += i
    return s

In [65]:
g(100000) #use @timer to decorator g() def before! --  and store in g() --cover it

(4999950000, 0.008046865463256836)

In [None]:
#@numba.jit

## Anonymous functions

Short, one-use lambdas

In [66]:
f = lambda x: x**2

In [67]:
f(3)

9

In [68]:
g = lambda x, y: x+y #name the anonymous function

In [69]:
g(3,4)

7

## Map, filter and reduce

Funcitonal building blocks

map as for loop -- generator -- as map in R

filter as filter in R 

reduce as reduction

most case first do filter then map. filter can reduce computation a lot.

In [68]:
xs = range(10)
list(map(lambda x: x**2, xs))

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [69]:
list(filter(lambda x: x%2 == 0, xs))

[0, 2, 4, 6, 8]

In [73]:
from functools import reduce

In [73]:
reduce(lambda x, y: x+y, xs)

45

In [74]:
reduce(lambda x, y: x+y, xs, 100)

145

## Functional modules in the standard library

itertools, functional and operator

In [74]:
import operator as op

In [75]:
reduce(op.add, range(10))

45

In [76]:
import itertools as it

In [77]:
list(it.islice(it.cycle([1,2,3]), 1, 10))

[2, 3, 1, 2, 3, 1, 2, 3, 1]

In [79]:
list(it.permutations('abc', 2))

[('a', 'b'), ('a', 'c'), ('b', 'a'), ('b', 'c'), ('c', 'a'), ('c', 'b')]

In [80]:
list(it.combinations('abc', 2))

[('a', 'b'), ('a', 'c'), ('b', 'c')]

In [81]:
from functools import partial, lru_cache

In [82]:
def f(a, b, c):
    return a + b + c

In [84]:
g = partial(f, b = 2, c=3) #change the number for auguments

In [85]:
g(1)

6

In [86]:
def fib(n, trace=False): #fibonacci seq
    if trace: #to show the function be called for recuresive function
        print("fib(%d)" % n, end=',')
    if n <= 2:
        return 1
    else:
        return fib(n-1, trace) + fib(n-2, trace)

In [86]:
fib(10, True)

fib(10),fib(9),fib(8),fib(7),fib(6),fib(5),fib(4),fib(3),fib(2),fib(1),fib(2),fib(3),fib(2),fib(1),fib(4),fib(3),fib(2),fib(1),fib(2),fib(5),fib(4),fib(3),fib(2),fib(1),fib(2),fib(3),fib(2),fib(1),fib(6),fib(5),fib(4),fib(3),fib(2),fib(1),fib(2),fib(3),fib(2),fib(1),fib(4),fib(3),fib(2),fib(1),fib(2),fib(7),fib(6),fib(5),fib(4),fib(3),fib(2),fib(1),fib(2),fib(3),fib(2),fib(1),fib(4),fib(3),fib(2),fib(1),fib(2),fib(5),fib(4),fib(3),fib(2),fib(1),fib(2),fib(3),fib(2),fib(1),fib(8),fib(7),fib(6),fib(5),fib(4),fib(3),fib(2),fib(1),fib(2),fib(3),fib(2),fib(1),fib(4),fib(3),fib(2),fib(1),fib(2),fib(5),fib(4),fib(3),fib(2),fib(1),fib(2),fib(3),fib(2),fib(1),fib(6),fib(5),fib(4),fib(3),fib(2),fib(1),fib(2),fib(3),fib(2),fib(1),fib(4),fib(3),fib(2),fib(1),fib(2),

55

In [87]:
%timeit -r1 -n100 fib(20)

2.9 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 100 loops each)


In [88]:
@lru_cache(3)
def fib1(n, trace=False):
    if trace:
        print("fib(%d)" % n, end=',')
    if n <= 2:
        return 1
    else:
        return fib1(n-1, trace) + fib1(n-2, trace)

In [89]:
fib1(10, True)

fib(10),fib(9),fib(8),fib(7),fib(6),fib(5),fib(4),fib(3),fib(2),fib(1),

55

In [90]:
%timeit -r1 -n100 fib1(20)

343 ns ± 0 ns per loop (mean ± std. dev. of 1 run, 100 loops each)


## Using `toolz`

funcitonal power tools

In [91]:
import toolz as tz
import toolz.curried as c

Find the 5 most common sequences of length 3 in the dna variable.

In [92]:
dna = np.random.choice(list('ACTG'), (10,80), p=[.1,.2,.3,.4])

In [93]:
dna

array([['C', 'G', 'A', 'T', 'C', 'G', 'T', 'A', 'C', 'G', 'A', 'G', 'G',
        'A', 'G', 'T', 'G', 'G', 'C', 'C', 'G', 'G', 'T', 'G', 'C', 'A',
        'G', 'T', 'G', 'T', 'T', 'A', 'C', 'G', 'G', 'A', 'G', 'G', 'T',
        'G', 'T', 'G', 'T', 'A', 'G', 'A', 'G', 'G', 'C', 'C', 'G', 'C',
        'T', 'T', 'T', 'G', 'A', 'T', 'G', 'C', 'T', 'C', 'G', 'T', 'G',
        'G', 'T', 'C', 'G', 'G', 'T', 'G', 'T', 'G', 'C', 'A', 'G', 'C',
        'G', 'T'],
       ['G', 'G', 'G', 'G', 'G', 'T', 'G', 'T', 'A', 'C', 'G', 'G', 'C',
        'C', 'G', 'A', 'T', 'C', 'T', 'G', 'G', 'G', 'T', 'T', 'A', 'C',
        'T', 'G', 'G', 'T', 'T', 'G', 'C', 'T', 'G', 'G', 'A', 'G', 'G',
        'G', 'T', 'T', 'G', 'G', 'G', 'C', 'C', 'T', 'G', 'G', 'G', 'C',
        'G', 'A', 'A', 'C', 'G', 'T', 'G', 'T', 'A', 'G', 'T', 'T', 'C',
        'C', 'A', 'C', 'G', 'G', 'A', 'G', 'T', 'G', 'G', 'T', 'G', 'C',
        'T', 'T'],
       ['T', 'C', 'A', 'G', 'G', 'C', 'G', 'G', 'T', 'C', 'G', 'C', 'C',
        'A', 

In [94]:
tz.pipe(
    dna,
    c.map(lambda s: ''.join(s)),
    list
)

['CGATCGTACGAGGAGTGGCCGGTGCAGTGTTACGGAGGTGTGTAGAGGCCGCTTTGATGCTCGTGGTCGGTGTGCAGCGT',
 'GGGGGTGTACGGCCGATCTGGGTTACTGGTTGCTGGAGGGTTGGGCCTGGGCGAACGTGTAGTTCCACGGAGTGGTGCTT',
 'TCAGGCGGTCGCCAGGGCAGACGGGGTCCCGGGGGGTTGTGTAACACCTTGGTGAGATGGCGAGTCTGCCTCTGCGGGGG',
 'CGGCGTGACGTGGCAGTGTGGGGGGGGCTTTGAGCGTTATGGGCCACCGGGGCATTGTTTCTGCGGCGTCGCTGTAGCAG',
 'GCTAGGGGTGGGTAGCGCGTTGGGCTGAGTGGCTCACTGAGGGTTGCGGAGGGTTCAGGCGTTGATGGGTGGCGGTCAGT',
 'CCGTCCTCGGCAAACATGTAGGGGATGGCCAGCGTTGGTAGCATTAGGGCGTGTAGCGCGTGCCCAGCGCGTTGTTGTGG',
 'CAGCGGAAGCTGTCGCTGCCCGGTTCTTGGTCGGGGTCTGGTTGTTGCGGGGGGGTCTTGATATCTGCCGGTATGTACCT',
 'GGTGATCGTTGGTGCCTTTGGCTCTTGCGGCTTTGGGTCGGTGGTCAATGGTGTTGGGACGCGTCCTCGGGTCGCGTGGG',
 'GGCTGCTTTGTGACGTACGCAGATGTTTAGTTTTGTGCGGCGTGGGGTGCGTGTGGGTTAGGTGGTTGCACACTGTAGTG',
 'GCTTATTGGGTGTGCTCGCCGTTAGGGAGAACGACGGTTCGGCGCGGCGTTGTGCTCGCCCGGTGCCCATTTTAGGGGCT']

In [95]:
res = tz.pipe(
    dna,
    c.map(lambda s: ''.join(s)),
    lambda s: ''.join(s),
    c.sliding_window(3),
    c.map(lambda s: ''.join(s)),
    tz.frequencies
)

In [96]:
[(k,v) for i, (k, v) in enumerate(sorted(res.items(), key=lambda x: -x[1])) if i < 5]

[('GGG', 56), ('GTG', 42), ('GGT', 40), ('TGG', 37), ('GGC', 32)]