# The Itertools Module

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import itertools
import numpy as np
import operator

In [14]:
import numpy as np


def sigmoid(x):
    """
    Calculate sigmoid
    """
    return 1 / (1 + np.exp(-x))


x = np.array([0.5, 0.1, -0.2])
target = 0.6
learnrate = 0.5

weights_input_hidden = np.array([[0.5, -0.6],
                                 [0.1, -0.2],
                                 [0.1, 0.7]])

weights_hidden_output = np.array([0.1, -0.3])

In [43]:
weights_input_hidden

array([[ 0.5, -0.6],
       [ 0.1, -0.2],
       [ 0.1,  0.7]])

In [47]:
output_error_term

0.028730669543515018

In [None]:
## Forward pass
hidden_layer_input = np.dot(x, weights_input_hidden)
hidden_layer_output = sigmoid(hidden_layer_input)

output_layer_in = np.dot(hidden_layer_output, weights_hidden_output)
output = sigmoid(output_layer_in)

## Backwards pass
## TODO: Calculate output error
error = target - output

# TODO: Calculate error term for output layer
output_error_term = error * output * (1 - output)

# TODO: Calculate error term for hidden layer
hidden_error_term = np.dot(output_error_term, weights_hidden_output) * \
                    hidden_layer_output * (1 - hidden_layer_output)

# TODO: Calculate change in weights for hidden layer to output layer
delta_w_h_o = learnrate * output_error_term * hidden_layer_output

# TODO: Calculate change in weights for input layer to hidden layer
delta_w_i_h = learnrate * hidden_error_term * x[:, None]

print('Change in weights for hidden layer to output layer:')
print(delta_w_h_o)
print('Change in weights for input layer to hidden layer:')
print(delta_w_i_h)



In [6]:
letters = ['a', 'b', 'c', 'd', 'e', 'f']
booleans = [1, 0, 1, 0, 0, 1]
numbers = [23, 20, 44, 32, 7, 12]
decimals = [0.1, 0.7, 0.4, 0.4, 0.5]

**list(itertools.chain(list1, list2))** is used to combine multiple lists, in order, into one list

In [37]:
print list(itertools.chain(letters,booleans,numbers,decimals))

['a', 'b', 'c', 'd', 'e', 'f', 1, 0, 1, 0, 0, 1, 23, 20, 44, 32, 7, 12, 0.1, 0.7, 0.4, 0.4, 0.5]


In [38]:
print list(itertools.chain(decimals,booleans))

[0.1, 0.7, 0.4, 0.4, 0.5, 1, 0, 1, 0, 0, 1]


**list(itertools.ifilter(lambda x: f(x), list))** <br>filters and returns list elements xj that evaluate to true in f(xj), if f(x) returns either True or False (1 or 0) for each list element.  Or returns list with f(x) applied to each list element.

In [39]:
print list(itertools.ifilter(lambda x: x>20, numbers))

[23, 44, 32]


In [41]:
print list(itertools.ifilter(lambda x: x+2, numbers))

[23, 20, 44, 32, 7, 12]


**list(itertools.count(start, step))** <br>returns a list that starts at _start_, and whose subsequent elements are incremented by _step_, such that <br>
[start, start+step, start+2*step, etc].<br>
This list does goes on infinitely, so you need a for loop with a break, as shown below.

In [42]:
x=[]
for i in itertools.count(10, 0.25):
    if i < 20:
        x.append(i)
    else:
        break
print x

[10, 10.25, 10.5, 10.75, 11.0, 11.25, 11.5, 11.75, 12.0, 12.25, 12.5, 12.75, 13.0, 13.25, 13.5, 13.75, 14.0, 14.25, 14.5, 14.75, 15.0, 15.25, 15.5, 15.75, 16.0, 16.25, 16.5, 16.75, 17.0, 17.25, 17.5, 17.75, 18.0, 18.25, 18.5, 18.75, 19.0, 19.25, 19.5, 19.75]


**list(itertools.compress(numbers,boolean))** <br>returns values in the first list for which values in the second list are True

In [45]:
list(itertools.compress(numbers,booleans))

[23, 44, 12]

**list(itertools.imap(function, list1, list2))** <br>
throws each pair of elements (xj, yj) from two lists at the function to create a list that is the result of the function [f(x1,y1), f(x2,y2).  If function=None, that groups iterables as tuples.

In [60]:
#Multiply (xj,yj) from the two lists using the operator module
print list(itertools.imap(operator.mul, numbers, decimals))

[2.3000000000000003, 14.0, 17.6, 12.8, 3.5]


# The Counter Module

In [4]:
from collections import Counter 

In [18]:
#you can use the for loop to update values in a dictionary
dict_words={'red':0, 'blue':0,'green':0}
words=['red', 'blue', 'red', 'green', 'blue', 'blue']
for j in words:
    dict_words[j]+=1
dict_words

{'blue': 3, 'green': 1, 'red': 2}

In [19]:
#you CAN'T use the for loop to update a value which doesn't already exist
dict_words={}
words=['red', 'blue', 'red', 'green', 'blue', 'blue']
for j in words:
    dict_words[j]+=1
dict_words

KeyError: 'red'

Counters are a subclass of dictionaries and consist of an unordered collection of elements stored as dictionary keys and their counts stored as dictionary values.  Counters return a zero count for missing items instead of an error, unlike dictionaries.  

In [5]:
# when using adding to a counter object, you can use += even if the key j doesn't exist yet
cnt=Counter()
words=['red', 'blue', 'red', 'green', 'blue', 'blue']
for j in words:
    cnt[j]+=1
cnt

Counter({'blue': 3, 'green': 1, 'red': 2})

In [32]:
print Counter("We're going out!")
Counter(a=1,b=2)
Counter({'a':1,'b':3})

Counter({' ': 2, 'e': 2, 'g': 2, 'o': 2, '!': 1, "'": 1, 'i': 1, 'n': 1, 'r': 1, 'u': 1, 't': 1, 'W': 1})


Counter({'a': 1, 'b': 2})

Counter({'a': 1, 'b': 3})

In [35]:
c=Counter({'a':1,'b':3})
del c['a'] #unlike dictionaries, you can't set a key's value to zero to delete it.  You have to use 'del'
c

Counter({'b': 3})

# elements() method

Returns each element repeated as many times as its count.

In [9]:
Counter(a=4,b=2).elements()
list(Counter(a=4,b=2).elements())

<itertools.chain at 0x1100a9450>

['a', 'a', 'a', 'a', 'b', 'b']

# most_common() method

Returns the n most common elements and their counts (if n is specified).

In [12]:
words=['red', 'blue', 'red', 'green', 'blue', 'blue']
Counter(words).most_common(1)

[('blue', 3)]

# subtract() method

In [25]:
c = Counter(a=4, b=2, c=0, d=-2)
d = Counter(a=1, b=2, c=3, d=4)
c.subtract(d) #produes no output
c

Counter({'a': 3, 'b': 0, 'c': -3, 'd': -6})

In [None]:
http://blog.lerner.co.il/is-it-hashable-fun-and-games-with-hashing-in-python/
https://docs.python.org/2/library/collections.html#collections.Counter    

# Learning Python - Chapter 20 - Lists and Comprehensions

ord() returns the ASCII value of a character
chr() returns the character associated with an ASCII value.
ASCII represents all characters, numbers, symbols, etc as ASCII digits

In [9]:
ord('h')
chr(104)

104

'h'

List comprehensions:  <br>
[function(x) for x in iterable] <br>
For each value in the iterable, apply the function function(x) to that value and spit out a new iterable.

In [12]:
map(ord,'spam')
[ord(x) for x in 'spam']

[115, 112, 97, 109]

[115, 112, 97, 109]

In [23]:
[x**2 for x in range(0,5)] #this accomplishes the below


def f(x):
    return x**2
map(f,range(0,5))

map(lambda x: x**2,range(0,5))  #the lambda function is typically used though

[0, 1, 4, 9, 16]

[0, 2, 4]

[0, 1, 4, 9, 16]

[0, 2, 4]

[0, 1, 4, 9, 16]

In [28]:
[x for x in range(0,5) if x%2==0]

def f1(x):
    if x%2==0:
        return True
filter(f1,range(0,5))

filter(lambda x: x%2==0, range(0,5)) #the lambda fntion returns True when that condition holds

[0, 2, 4]

[0, 2, 4]

[0, 2, 4]

In [8]:
#map the lambda function to the filtered list which only includes even numbers
map(   (lambda x: x**2)   ,   filter((lambda x: x % 2 == 0), range(10))   )

[0, 4, 16, 36, 64]

## Nesting for clauses in a list comprehension

In [13]:
[x+y for x in [0,1,2] for y in [100,200,300]]

# the above as the same as below
temp=[]
for x in [0,1,2]:
    for y in [100,200,300]:
        temp.append(x+y)
temp

[100, 200, 300, 101, 201, 301, 102, 202, 302]

[100, 200, 300, 101, 201, 301, 102, 202, 302]

In [4]:
[x + y + z for x in 'spam' if x in 'sm'
           for y in 'SPAM' if y in ('P', 'A')
           for z in '123' if z > '1']

[(x,y) for x in range(5) if x%2==0 
       for y in range(5) if y%2==1]

['sP2', 'sP3', 'sA2', 'sA3', 'mP2', 'mP3', 'mA2', 'mA3']

[(0, 1), (0, 3), (2, 1), (2, 3), (4, 1), (4, 3)]

##  Using list comprehensions to create matrices as nested list structures.

In [33]:
M=[[1, 2, 3], 
   [4, 5, 6], 
   [7, 8, 9]]

N=[[2, 2, 2], 
   [3, 3, 3], 
   [4, 4, 4]]

[M[row][col] * N[row][col] for row in range(3) for col in range(3)]
[[M[row][col] * N[row][col] for col in range(3)] for row in range(3)]
[[(mj*nj) for (mj,nj) in zip(m,n)] for (m,n) in zip(M,N)]

[2, 4, 6, 12, 15, 18, 28, 32, 36]

[[2, 4, 6], [12, 15, 18], [28, 32, 36]]

[[2, 4, 6], [12, 15, 18], [28, 32, 36]]

## Using list comps and map() to pull the jth tuple element from every tuple in a list of tuples of size j

In [36]:
M=[[1, 2, 3], 
   [4, 5, 6], 
   [7, 8, 9]]

[y for (x,y,z) in M]
map(lambda x: x[1], M)

[2, 5, 8]

[2, 5, 8]

# Generator Functions

In [114]:
x=[1,2,3]
x==iter(x)
x=iter(x)
next(x),next(x),next(x)

False

(1, 2, 3)

In [117]:
def gensquares(N):
    for i in range(N):
        yield i ** 2

x=gensquares(5) #generators are iterators 
x==iter(x)
next(x),next(x),next(x),next(x) #        

True

(0, 1, 4, 9)

In [17]:
def ups(line):
    for sub in line.split(','): 
        yield sub.upper()

x='aaa,bbb,ccc,ddd,eee,fff'
x=ups(x)
next(x),next(x),next(x)

('AAA', 'BBB', 'CCC')

In [37]:
t=gensquares(10) #0,1,2,3 -> 0, 1, 4
next(t)
t.send(0)
next(t)

0

1

4

### We can include two generators.  The output of the first generator is shown first:

In [83]:
def both(N):
    for i in range(N): yield i
    for i in (x ** 2 for x in range(N)): yield i #generator expression used for another purpose
list(both(5))

[0, 1, 2, 3, 4, 0, 1, 4, 9, 16]

# Generator Expressions

Generators are just like list comprehensions, but are enclosed in parentheses.  Building the generator expression returns just a generator object that supports the iteration protocol (using it in a for loop and next(generator object)).  Thus, unlike a list comprehension, which returns the result list and saves all of it in memory, generator expressions don't return the result list and so are faster and save space, and whose elements can be called one at a time rather than all at once via next or a for loop.

Generator expressions thus:
   1. save memory space bc they do not require the entire result list to be generated at once.
   2. they divide the output into smaller slices so you don't have to wait for the full output
   3. they're potentially slightly slower than list comprehensions when the dataset is small

Generator expressions are comprehension expressions enclosed in parentheses.  Generator functions are functions that contain a yield statement.  Both return a generator object with an __iter__() method that simply returns the generator object itself (since it's already an iterator), but this method means it's an iterable in that it can be looped over.  <br>

The returned object also has the next method (or __next__ in python 3), which starts the implied loop or resumes where it last left off, and results in a StopIteration when finished producing results. <br>

list() forces a generator to produce all of its results, meaning to do automatic iteration.  <br> next() forces a generator to produce results one at a time, meaning to do manual iteration.

In [36]:
a=(x ** 2 for x in range(4))
iter(a)==a
next(a),next(a),next(a)

True

(0, 1, 4)

In [46]:
a=(x ** 2 for x in range(4))
for j in a:
    print(j)

0
1
4
9


In [17]:
a=(j.upper() for j in 'aaa,bbb,ccc'.split(','))
next(a)
','.join(a)

'AAA'

'BBB,CCC'

## Generator expressions do not require parentheses if they are the only item used for another purpose:

In [24]:
sum(x for x in [1,2,3])

6

# Comparing Generator Expressions to other methods

In [34]:
list(map(lambda x: x * 2, (1, 2, 3, 4)))   #map
list(x * 2 for x in (1, 2, 3, 4))          #generator expression excluding ()
[x * 2 for x in (1, 2, 3, 4)]              #list comprehension

','.join(map(str.upper, 'aaa,bbb,ccc'.split(',')))      #map
','.join(x.upper() for x in 'aaa,bbb,ccc'.split(','))   #generator expression
','.join([x.upper() for x in 'aaa,bbb,ccc'.split(',')]) #list comprehension

#split -> ['aaa','bbb','ccc']    upper-> ['AAA','BBB','CCC']   
#','.join() -> 'AAA,BBB,CCC'

[2, 4, 6, 8]

[2, 4, 6, 8]

[2, 4, 6, 8]

'AAA,BBB,CCC'

'AAA,BBB,CCC'

'AAA,BBB,CCC'

map and generator expressions can be nested within one another!

In [41]:
map(lambda x: x**2, map(abs,[-2,-1,0,1,2]))              #map
list(x**2 for x in (abs(x) for x in [-2,-1,0,1,2]))      #generator expression
[x**2 for x in [abs(x) for x in [-2,-1,0,1,2]]]          #list comprehension
map(lambda x: x**2, (abs(x) for x in [-2,-1,0,1,2]))     #generator expression within map

[4, 1, 0, 1, 4]

[4, 1, 0, 1, 4]

[4, 1, 0, 1, 4]

[4, 1, 0, 1, 4]

In [32]:
x=(abs(x) for x in [-2,-1,0,1,2])
x.next(), x.next(), x.next()

(2, 1, 0)

In [56]:
import pandas as pd
df=pd.DataFrame({'hi':[1,2,3,4],'bye':[22,33,44,55]})
df.columns.tolist()
df['do'].any()

KeyError: 'do'

# Generator Functions vs Expressions

Both support automatic (via list) and manual (via next) iteration.  They are very similar.  Expressions are often shorter. <br>

Generators are **single iteration** objects.  After one iteration, they can't be iterated through again.  You can't set another object equal to them (that object just points to the already iterated through generator object).  

In [66]:
line = 'aa bbb c'

''.join(x.upper() for x in line.split() if len(x)>1) #generator expression is used 

def upper(y):
    for x in line.split():
        if len(x)>1:
            yield x.upper()
''.join(upper(line))    #generator function is used


'AABBB'

'AABBB'

In [78]:
G = (c * 4 for c in 'SPAM')
I=G
A=iter(G) #using iter makes no difference since iter(G) ==G
next(A),next(G), next(I),next(A)

('SSSS', 'PPPP', 'AAAA', 'MMMM')

## Dictionaries are iterables that can be turned into iterators

In [85]:
D = {'a':1, 'b':2, 'c':3}
iter(D)==D

False

Notice below that D has the \__iter\__ method, so it is an iterable that can be looped over and can also be turned into an *iterator* (using iter).  As an iterator, it has state retention and manual iteration can be done using next().

In [96]:
print dir(D) #notice _iter__ but not __next__, so it's an iterable, which can 
             #return an iterator, but is itself not actually an iterator

['__class__', '__cmp__', '__contains__', '__delattr__', '__delitem__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getitem__', '__gt__', '__hash__', '__init__', '__iter__', '__le__', '__len__', '__lt__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__setitem__', '__sizeof__', '__str__', '__subclasshook__', 'clear', 'copy', 'fromkeys', 'get', 'has_key', 'items', 'iteritems', 'iterkeys', 'itervalues', 'keys', 'pop', 'popitem', 'setdefault', 'update', 'values', 'viewitems', 'viewkeys', 'viewvalues']


In [105]:
D2=D.__iter__() #create iterator
D3=iter(D) #create iterator
next(D2),next(D2),next(D3) #notice how dictionary iterators are single iteration objects

('a', 'c', 'a')