In [1]:
### DUKE Computational Statistics Course
https://people.duke.edu/~ccc14/sta-663/index.html

In [1]:
# The factorial function is perhaps the simplest classic example of recursion.

In [2]:
def fact(n):
    if n==1:
        return 1
    return(n*fact(n-1))

In [3]:
fact(4)

24

In [4]:
[fact(n) for n in list(range(1,10))]

[1, 2, 6, 24, 120, 720, 5040, 40320, 362880]

In [5]:
# The Fibonacci sequence is another classic recursion example

In [6]:
def fib(n):
    if n==1 or n==0:
        return 1;   
    return (fib(n-1)+fib(n-2))

In [7]:
[fib(n) for n in list(range(1,10))]

[1, 2, 3, 5, 8, 13, 21, 34, 55]

In [8]:
### Without recursion

In [9]:
f=1
s=1
def fibo(n,f,s):
    for i in list(range(3,n)):
        t=f+s;
        f=s
        s=t
    return t
        

In [10]:
[fibo(n,f,s) for n in list(range(4,10))]

[2, 3, 5, 8, 13, 21]

In [11]:
%timeit fib(20)
%timeit fibo(20,f,s)

100 loops, best of 3: 3.66 ms per loop
1000000 loops, best of 3: 1.68 µs per loop


### Iterators

In [12]:
# Iterators can be created from sequences with the built-in function iter()

xs = [1,2,3]
x_iter = iter(xs)

In [13]:
print(x_iter.__next__())
print(x_iter.__next__())
print(x_iter.__next__())

1
2
3


In [14]:
# Most commonly, iterators are used (automatically) within a for loop
# which terminates when it encouters a StopIteration exception

x_iter = iter(xs)
for x in x_iter:
    print (x)

1
2
3


### Generators
Generators create iterator streams.

In [15]:
# Functions containing the 'yield' keyword return iterators
# After yielding, the function retains its previous state
def count_down(n):
    for i in range(n,0,-1):
        print(i,end=",")
    

In [16]:
count_down(10)

10,9,8,7,6,5,4,3,2,1,

In [17]:
def count_down(n):
    for i in range(n, 0, -1):
        yield i

In [18]:
counter=count_down(10)
print(counter.__next__())
print(counter.__next__())

10
9


In [87]:
for count in counter:
     print(count,end=",")

8,7,6,5,4,3,2,1,

In [92]:
# Iterators can also be created with 'generator expressions'
# which can be coded similar to list generators but with parenthesis
# in place of square brackets

xs1 = [x*x for x in range(5)]
print (xs1)

xs2 = (x*x for x in range(5))

[0, 1, 4, 9, 16]


In [93]:
for xs in xs2:
    print(xs,end=",")

0,1,4,9,16,

In [94]:
# Iterators can be used for infinte functions

def fib():
    a, b = 0, 1
    while True:
        yield a
        a, b = b, a+b

In [95]:
fib()

<generator object fib at 0x104ae4d58>

In [97]:
for i in fib():
    if i>1000:
        break;
    print(i,end=",")

0,1,1,2,3,5,8,13,21,34,55,89,144,233,377,610,987,

### Generators and comprehensions

In [103]:
# A generator expression
print(x for x in range(10))

# A list Comprehension
print([x for x in range(10)])

# A set Comprehension
print({x for x in range(10)})

# A dictionary Comprehension
print({x:x*x for x in range(10)})

<generator object <genexpr> at 0x104ae49e8>
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}
{0: 0, 1: 1, 2: 4, 3: 9, 4: 16, 5: 25, 6: 36, 7: 49, 8: 64, 9: 81}


In [113]:
deepak=(x for x in range(10))
print(deepak.__next__())

for d in deepak:
    print(d,end=",")

0
1,2,3,4,5,6,7,8,9,

## Utilites - enumerate, zip and the ternary if-else operator
Two useful functions and an unusual operator.

In [117]:
# In many programming languages, loops use an index.
# This is possible in Python, but it is more
# idiomatic to use the enumerate function.

# using and index in a loop
xs=[10,20,30,40]
for i in range(len(xs)):
    print(i,xs[i])

0 10
1 20
2 30
3 40


In [120]:
for x in xs:
    print(x,end=",")

10,20,30,40,

In [121]:
# using enumerate
for index,value in enumerate(xs):
    print(index,value)

0 10
1 20
2 30
3 40


In [123]:
# zip is useful when you need to iterate over matched elements of
# multiple lists

xs = [1, 2, 3, 4]
ys = [10, 20, 30, 40]
zs = ['a', 'b', 'c', 'd', 'e']

for x,y,z in zip(xs,ys,zs):
    print(x,y,z)
    
# Note that zip stops when the shortest list is exhausted

1 10 a
2 20 b
3 30 c
4 40 d


In [124]:
# For list comprehensions, the ternary if-else operator is sometimes very useful
[x**2 if x%2 == 0 else x**3 for x in range(10)]

[0, 1, 4, 27, 16, 125, 36, 343, 64, 729]

In [125]:
### Decorators

Decorators are a type of HOF that take a function and return a wrapped function that provides additional useful properties.

Examples:

* logging
* profiling
* Just-In-Time (JIT) compilation

In [128]:
# Here is a simple decorator to time an arbitrary function

def func_timer(func):
    """Times how long the function took."""

    def f(*args, **kwargs):
        import time
        start = time.time()
        results = func(*args, **kwargs)
        print ("Elapsed: %.2fs" % (time.time() - start))
        return results

    return f

In [130]:
# There is a special shorthand notation for decorating functions

@func_timer
def sleepy(msg, sleep=1.0):
    """Delays a while before answering."""
    import time
    time.sleep(sleep)
    print (msg)

sleepy("Hello", 1.5)

Hello
Elapsed: 1.50s


### The operator module
The operator module provides “function” versions of common Python operators (+, *, [] etc) that can be easily used where a function argument is expected.

In [139]:
import operator as op
import functools
# Here is another way to express the sum function
print (functools.reduce(op.add, range(10)))

# The pattern can be generalized
print (functools.reduce(op.mul, range(1, 10)))

45
362880


In [140]:
my_list = [('a', 1), ('bb', 4), ('ccc', 2), ('dddd', 3)]

# standard sort
print (sorted(my_list))

# return list sorted by element at position 1 (remember Python counts from 0)
print (sorted(my_list, key=op.itemgetter(1)))

# the key argument is quite flexible
print (sorted(my_list, key=lambda x: len(x[0]), reverse=True))

[('a', 1), ('bb', 4), ('ccc', 2), ('dddd', 3)]
[('a', 1), ('ccc', 2), ('dddd', 3), ('bb', 4)]
[('dddd', 3), ('ccc', 2), ('bb', 4), ('a', 1)]


In [147]:
sorted(list([1,2,33,3,4]))

[1, 2, 3, 4, 33]

In [148]:
# This is extremely useful to create functions
# that expect a fixed number of arguments

import scipy.stats as stats

def compare(x, y, func):
    """Returne p-value for some appropriate comparison test."""
    return func(x, y)[1]

In [152]:
import numpy as np

x, y = np.random.normal(0, 1, (100,2)).T
print ("p value assuming equal variance    =%.8f" % compare(x, y, stats.ttest_ind))
test = partial(stats.ttest_ind, equal_var=False)
print ("p value not assuming equal variance=%.8f" % compare(x, y, test))


p value assuming equal variance    =0.11390918
p value not assuming equal variance=0.11394092


In [157]:
stats.ttest_ind?

### The itertools module
This provides many essential functions for working with iterators. The permuations and combinations generators may be particularly useful for simulations, and the groupby gnerator is useful for data analyiss.

In [161]:
from itertools import permutations,combinations,islice,cycle,groupby
print (list(islice(cycle('abcd'), 0, 10)))
print (list(islice(cycle('1234'), 0, 10)))

['a', 'b', 'c', 'd', 'a', 'b', 'c', 'd', 'a', 'b']
['1', '2', '3', '4', '1', '2', '3', '4', '1', '2']


In [167]:
animals = sorted(['pig', 'cow', 'giraffe', 'elephant',
                  'dog', 'cat', 'hippo', 'lion', 'tiger'], key=len)
for k,g in groupby(animals,key=len):
    print(k,list(g))

3 ['pig', 'cow', 'dog', 'cat']
4 ['lion']
5 ['hippo', 'tiger']
7 ['giraffe']
8 ['elephant']


In [176]:
print ([''.join(p) for p in permutations('abc')])
print([''.join(num) for num in permutations(list(["cat ","dog ","kitten "]))])

['abc', 'acb', 'bac', 'bca', 'cab', 'cba']
['cat dog kitten ', 'cat kitten dog ', 'dog cat kitten ', 'dog kitten cat ', 'kitten cat dog ', 'kitten dog cat ']


In [184]:
print ([p for p in combinations('abcdef',r=2)])
print([''.join(num) for num in combinations(list(["cat ","dog ","kitten "]),r=3)])

[('a', 'b'), ('a', 'c'), ('a', 'd'), ('a', 'e'), ('a', 'f'), ('b', 'c'), ('b', 'd'), ('b', 'e'), ('b', 'f'), ('c', 'd'), ('c', 'e'), ('c', 'f'), ('d', 'e'), ('d', 'f'), ('e', 'f')]
['cat dog kitten ']


### The toolz, fn and funcy modules


In [185]:
# Here is a small example to convert the DNA of a
# bacterial enzyme into the protein sequence
# using the partition function to generate
# cddons (3 nucleotides) for translation.

codon_table = {
    'ATA':'I', 'ATC':'I', 'ATT':'I', 'ATG':'M',
    'ACA':'T', 'ACC':'T', 'ACG':'T', 'ACT':'T',
    'AAC':'N', 'AAT':'N', 'AAA':'K', 'AAG':'K',
    'AGC':'S', 'AGT':'S', 'AGA':'R', 'AGG':'R',
    'CTA':'L', 'CTC':'L', 'CTG':'L', 'CTT':'L',
    'CCA':'P', 'CCC':'P', 'CCG':'P', 'CCT':'P',
    'CAC':'H', 'CAT':'H', 'CAA':'Q', 'CAG':'Q',
    'CGA':'R', 'CGC':'R', 'CGG':'R', 'CGT':'R',
    'GTA':'V', 'GTC':'V', 'GTG':'V', 'GTT':'V',
    'GCA':'A', 'GCC':'A', 'GCG':'A', 'GCT':'A',
    'GAC':'D', 'GAT':'D', 'GAA':'E', 'GAG':'E',
    'GGA':'G', 'GGC':'G', 'GGG':'G', 'GGT':'G',
    'TCA':'S', 'TCC':'S', 'TCG':'S', 'TCT':'S',
    'TTC':'F', 'TTT':'F', 'TTA':'L', 'TTG':'L',
    'TAC':'Y', 'TAT':'Y', 'TAA':'_', 'TAG':'_',
    'TGC':'C', 'TGT':'C', 'TGA':'_', 'TGG':'W',
    }


In [187]:
gene = """
>ENA|BAE76126|BAE76126.1 Escherichia coli str. K-12 substr. W3110 beta-D-galactosidase
ATGACCATGATTACGGATTCACTGGCCGTCGTTTTACAACGTCGTGACTGGGAAAACCCT
GGCGTTACCCAACTTAATCGCCTTGCAGCACATCCCCCTTTCGCCAGCTGGCGTAATAGC
GAAGAGGCCCGCACCGATCGCCCTTCCCAACAGTTGCGCAGCCTGAATGGCGAATGGCGC
TTTGCCTGGTTTCCGGCACCAGAAGCGGTGCCGGAAAGCTGGCTGGAGTGCGATCTTCCT
GAGGCCGATACTGTCGTCGTCCCCTCAAACTGGCAGATGCACGGTTACGATGCGCCCATC
TACACCAACGTGACCTATCCCATTACGGTCAATCCGCCGTTTGTTCCCACGGAGAATCCG
ACGGGTTGTTACTCGCTCACATTTAATGTTGATGAAAGCTGGCTACAGGAAGGCCAGACG
CGAATTATTTTTGATGGCGTTAACTCGGCGTTTCATCTGTGGTGCAACGGGCGCTGGGTC
GGTTACGGCCAGGACAGTCGTTTGCCGTCTGAATTTGACCTGAGCGCATTTTTACGCGCC
GGAGAAAACCGCCTCGCGGTGATGGTGCTGCGCTGGAGTGACGGCAGTTATCTGGAAGAT
CAGGATATGTGGCGGATGAGCGGCATTTTCCGTGACGTCTCGTTGCTGCATAAACCGACT
ACACAAATCAGCGATTTCCATGTTGCCACTCGCTTTAATGATGATTTCAGCCGCGCTGTA
CTGGAGGCTGAAGTTCAGATGTGCGGCGAGTTGCGTGACTACCTACGGGTAACAGTTTCT
TTATGGCAGGGTGAAACGCAGGTCGCCAGCGGCACCGCGCCTTTCGGCGGTGAAATTATC
GATGAGCGTGGTGGTTATGCCGATCGCGTCACACTACGTCTGAACGTCGAAAACCCGAAA
CTGTGGAGCGCCGAAATCCCGAATCTCTATCGTGCGGTGGTTGAACTGCACACCGCCGAC
GGCACGCTGATTGAAGCAGAAGCCTGCGATGTCGGTTTCCGCGAGGTGCGGATTGAAAAT
GGTCTGCTGCTGCTGAACGGCAAGCCGTTGCTGATTCGAGGCGTTAACCGTCACGAGCAT
CATCCTCTGCATGGTCAGGTCATGGATGAGCAGACGATGGTGCAGGATATCCTGCTGATG
AAGCAGAACAACTTTAACGCCGTGCGCTGTTCGCATTATCCGAACCATCCGCTGTGGTAC
ACGCTGTGCGACCGCTACGGCCTGTATGTGGTGGATGAAGCCAATATTGAAACCCACGGC
ATGGTGCCAATGAATCGTCTGACCGATGATCCGCGCTGGCTACCGGCGATGAGCGAACGC
GTAACGCGAATGGTGCAGCGCGATCGTAATCACCCGAGTGTGATCATCTGGTCGCTGGGG
AATGAATCAGGCCACGGCGCTAATCACGACGCGCTGTATCGCTGGATCAAATCTGTCGAT
CCTTCCCGCCCGGTGCAGTATGAAGGCGGCGGAGCCGACACCACGGCCACCGATATTATT
TGCCCGATGTACGCGCGCGTGGATGAAGACCAGCCCTTCCCGGCTGTGCCGAAATGGTCC
ATCAAAAAATGGCTTTCGCTACCTGGAGAGACGCGCCCGCTGATCCTTTGCGAATACGCC
CACGCGATGGGTAACAGTCTTGGCGGTTTCGCTAAATACTGGCAGGCGTTTCGTCAGTAT
CCCCGTTTACAGGGCGGCTTCGTCTGGGACTGGGTGGATCAGTCGCTGATTAAATATGAT
GAAAACGGCAACCCGTGGTCGGCTTACGGCGGTGATTTTGGCGATACGCCGAACGATCGC
CAGTTCTGTATGAACGGTCTGGTCTTTGCCGACCGCACGCCGCATCCAGCGCTGACGGAA
GCAAAACACCAGCAGCAGTTTTTCCAGTTCCGTTTATCCGGGCAAACCATCGAAGTGACC
AGCGAATACCTGTTCCGTCATAGCGATAACGAGCTCCTGCACTGGATGGTGGCGCTGGAT
GGTAAGCCGCTGGCAAGCGGTGAAGTGCCTCTGGATGTCGCTCCACAAGGTAAACAGTTG
ATTGAACTGCCTGAACTACCGCAGCCGGAGAGCGCCGGGCAACTCTGGCTCACAGTACGC
GTAGTGCAACCGAACGCGACCGCATGGTCAGAAGCCGGGCACATCAGCGCCTGGCAGCAG
TGGCGTCTGGCGGAAAACCTCAGTGTGACGCTCCCCGCCGCGTCCCACGCCATCCCGCAT
CTGACCACCAGCGAAATGGATTTTTGCATCGAGCTGGGTAATAAGCGTTGGCAATTTAAC
CGCCAGTCAGGCTTTCTTTCACAGATGTGGATTGGCGATAAAAAACAACTGCTGACGCCG
CTGCGCGATCAGTTCACCCGTGCACCGCTGGATAACGACATTGGCGTAAGTGAAGCGACC
CGCATTGACCCTAACGCCTGGGTCGAACGCTGGAAGGCGGCGGGCCATTACCAGGCCGAA
GCAGCGTTGTTGCAGTGCACGGCAGATACACTTGCTGATGCGGTGCTGATTACGACCGCT
CACGCGTGGCAGCATCAGGGGAAAACCTTATTTATCAGCCGGAAAACCTACCGGATTGAT
GGTAGTGGTCAAATGGCGATTACCGTTGATGTTGAAGTGGCGAGCGATACACCGCATCCG
GCGCGGATTGGCCTGAACTGCCAGCTGGCGCAGGTAGCAGAGCGGGTAAACTGGCTCGGA
TTAGGGCCGCAAGAAAACTATCCCGACCGCCTTACTGCCGCCTGTTTTGACCGCTGGGAT
CTGCCATTGTCAGACATGTATACCCCGTACGTCTTCCCGAGCGAAAACGGTCTGCGCTGC
GGGACGCGCGAATTGAATTATGGCCCACACCAGTGGCGCGGCGACTTCCAGTTCAACATC
AGCCGCTACAGTCAACAGCAACTGATGGAAACCAGCCATCGCCATCTGCTGCACGCGGAA
GAAGGCACATGGCTGAATATCGACGGTTTCCATATGGGGATTGGTGGCGACGACTCCTGG
AGCCCGTCAGTATCGGCGGAATTCCAGCTGAGCGCCGGTCGCTACCATTACCAGTTGGTC
TGGTGTCAAAAATAA
"""
from toolz import partition

# convert FASTA into single DNA sequence
dna = ''.join(line for line in gene.strip().split('\n')
              if not line.startswith('>'))

# partition DNA into codons (of length 3) and translate to amino acid
codons = (''.join(c) for c in partition(3, dna))
''.join(codon_table[codon] for codon in codons)


'MTMITDSLAVVLQRRDWENPGVTQLNRLAAHPPFASWRNSEEARTDRPSQQLRSLNGEWRFAWFPAPEAVPESWLECDLPEADTVVVPSNWQMHGYDAPIYTNVTYPITVNPPFVPTENPTGCYSLTFNVDESWLQEGQTRIIFDGVNSAFHLWCNGRWVGYGQDSRLPSEFDLSAFLRAGENRLAVMVLRWSDGSYLEDQDMWRMSGIFRDVSLLHKPTTQISDFHVATRFNDDFSRAVLEAEVQMCGELRDYLRVTVSLWQGETQVASGTAPFGGEIIDERGGYADRVTLRLNVENPKLWSAEIPNLYRAVVELHTADGTLIEAEACDVGFREVRIENGLLLLNGKPLLIRGVNRHEHHPLHGQVMDEQTMVQDILLMKQNNFNAVRCSHYPNHPLWYTLCDRYGLYVVDEANIETHGMVPMNRLTDDPRWLPAMSERVTRMVQRDRNHPSVIIWSLGNESGHGANHDALYRWIKSVDPSRPVQYEGGGADTTATDIICPMYARVDEDQPFPAVPKWSIKKWLSLPGETRPLILCEYAHAMGNSLGGFAKYWQAFRQYPRLQGGFVWDWVDQSLIKYDENGNPWSAYGGDFGDTPNDRQFCMNGLVFADRTPHPALTEAKHQQQFFQFRLSGQTIEVTSEYLFRHSDNELLHWMVALDGKPLASGEVPLDVAPQGKQLIELPELPQPESAGQLWLTVRVVQPNATAWSEAGHISAWQQWRLAENLSVTLPAASHAIPHLTTSEMDFCIELGNKRWQFNRQSGFLSQMWIGDKKQLLTPLRDQFTRAPLDNDIGVSEATRIDPNAWVERWKAAGHYQAEAALLQCTADTLADAVLITTAHAWQHQGKTLFISRKTYRIDGSGQMAITVDVEVASDTPHPARIGLNCQLAQVAERVNWLGLGPQENYPDRLTAACFDRWDLPLSDMYTPYVFPSENGLRCGTRELNYGPHQWRGDFQFNISRYSQQQLMETSHRHLLHAEEGTWLNIDGFHMGIGGDDS

###  Exercises
1. Rewrite the following nested loop as a list comprehension

In [189]:
ans = []
for i in range(3):
    for j in range(4):
        ans.append((i, j))
print (ans)

[(0, 0), (0, 1), (0, 2), (0, 3), (1, 0), (1, 1), (1, 2), (1, 3), (2, 0), (2, 1), (2, 2), (2, 3)]


In [190]:
[(i,j) for i in range(3) for j in range(4)]

[(0, 0),
 (0, 1),
 (0, 2),
 (0, 3),
 (1, 0),
 (1, 1),
 (1, 2),
 (1, 3),
 (2, 0),
 (2, 1),
 (2, 2),
 (2, 3)]

In [194]:
### 2. Rewrite the following as a list comprehension

ans = map(lambda x: x*x, filter(lambda x: x%2 == 0, range(5)))
print(ans.)

<map object at 0x10b843b38>


In [196]:
## differntiate a polynomial
poly=[1, 0, 0, 5, 0, -29]
#     1 + 5x3 - 29x5

In [206]:
[((power)*coef) for power,coef in enumerate(poly)]

[0, 0, 0, 15, 0, -145]

In [205]:
[(power,coef) for power,coef in enumerate(poly)]

[(0, 1), (1, 0), (2, 0), (3, 5), (4, 0), (5, -29)]