### Using Dictionary Objects

Containers are objects that can hold any number of arbitrary objects.

In [1]:
# 1.Load a variable with sentences
sentence = "Peter Piper picked a peck of pickled peppers A peck of pickled\
peppers Peter Piper picked If Peter Piper picked a peck of pickled\
peppers Wheres the peck of pickled peppers Peter Piper picked"

In [2]:
# 2.Initialize a dictionary object
word_dict = {}

In [3]:
# 3.Perform the word count
for word in sentence.split():    
    if word not in word_dict:        
        word_dict[word] = 1    
    else:          
        word_dict[word]+=1

In [4]:
# 4.print the output
print (word_dict)

{'a': 2, 'A': 1, 'Peter': 4, 'of': 4, 'Piper': 4, 'pickled': 2, 'pickledpeppers': 2, 'picked': 4, 'peppers': 2, 'the': 1, 'peck': 4, 'Wheres': 1, 'If': 1}


Rewriting using a default dict

In [5]:
del word_dict
word_dict = {}
for word in sentence.split():
    word_dict.setdefault(word,0)
    word_dict[word]+=1

In [6]:
print (word_dict)

{'a': 2, 'A': 1, 'Peter': 4, 'of': 4, 'Piper': 4, 'pickled': 2, 'pickledpeppers': 2, 'picked': 4, 'peppers': 2, 'the': 1, 'peck': 4, 'Wheres': 1, 'If': 1}


Can use the defaultdict class instead

In [7]:
from collections import defaultdict

In [8]:
word_dict = defaultdict(int)
for word in sentence.split():    
    word_dict[word]+=1
print word_dict

defaultdict(<type 'int'>, {'a': 2, 'A': 1, 'Peter': 4, 'of': 4, 'Piper': 4, 'pickled': 2, 'pickledpeppers': 2, 'picked': 4, 'peppers': 2, 'the': 1, 'peck': 4, 'Wheres': 1, 'If': 1})


Loop through keys and values of a dict

In [10]:
for key, value in word_dict.items():
    print key,value

a 2
A 1
Peter 4
of 4
Piper 4
pickled 2
pickledpeppers 2
picked 4
peppers 2
the 1
peck 4
Wheres 1
If 1


Another option is the Counter function

In [11]:
from collections import Counter

In [14]:
words = sentence.split()
word_count = Counter(words)
print word_count
print word_dict  

Counter({'Peter': 4, 'of': 4, 'Piper': 4, 'picked': 4, 'peck': 4, 'a': 2, 'pickled': 2, 'pickledpeppers': 2, 'peppers': 2, 'A': 1, 'the': 1, 'Wheres': 1, 'If': 1})
defaultdict(<type 'int'>, {'a': 2, 'A': 1, 'Peter': 4, 'of': 4, 'Piper': 4, 'pickled': 2, 'pickledpeppers': 2, 'picked': 4, 'peppers': 2, 'the': 1, 'peck': 4, 'Wheres': 1, 'If': 1})


### Working with a dictionary of dictionaries

User movie ratings data

In [15]:
from collections import defaultdict
user_movie_rating = defaultdict(lambda :defaultdict(int))
# Initialize ratings for Alice
user_movie_rating["Alice"]["LOR1"] =  4
user_movie_rating["Alice"]["LOR2"] =  5
user_movie_rating["Alice"]["LOR3"] =  3
user_movie_rating["Alice"]["SW1"]  =  5
user_movie_rating["Alice"]["SW2"]  =  3
print user_movie_rating

defaultdict(<function <lambda> at 0x0000000003ABBF28>, {'Alice': defaultdict(<type 'int'>, {'SW1': 5, 'SW2': 3, 'LOR1': 4, 'LOR3': 3, 'LOR2': 5})})


### Working with tuples

In [16]:
# 1.Ways of creating a tuple
a_tuple = (1,2,'a')
b_tuple =1,2,'c'

In [17]:
# 2.Accessing elements of a tuple through index
print b_tuple[0]
print b_tuple[-1]

1
c


In [18]:
# 3.It is not possible to change the value of an item in a tuple,
# for example the next statement will result in an error.
try:    
    b_tuple[0] = 20
except:    
    print "Cannot change value of tuple by index"   

Cannot change value of tuple by index


In [19]:
# 4.Though tuples are immutable
# But elements of a tuple can be mutable objects,
# for instance a list, as in the following line of code
c_tuple =(1,2,[10,20,30])
c_tuple[2][0] = 100

In [20]:
c_tuple

(1, 2, [100, 20, 30])

In [21]:
# 5.Tuples once created cannot be extended like list, 
# however two tuples can be concatenated.
print a_tuple + b_tuple

(1, 2, 'a', 1, 2, 'c')


In [22]:
# 6 Slicing of tuples
a =(1,2,3,4,5,6,7,8,9,10)
print a[1:]
print a[1:3]
print a[1:6:2]
print a[:-1]

(2, 3, 4, 5, 6, 7, 8, 9, 10)
(2, 3)
(2, 4, 6)
(1, 2, 3, 4, 5, 6, 7, 8, 9)


In [23]:
# 7.Tuple min max
print min(a),max(a)

1 10


In [24]:
# 8.in and not in
if 1 in a:    
    print "Element 1 is available in tuple a"
else:
    print "Element 1 is available in tuple a"

Element 1 is available in tuple a


Tuples are immutable

In [25]:
print a[::-1]

(10, 9, 8, 7, 6, 5, 4, 3, 2, 1)


namedtuple

In [27]:
from collections import namedtuple

vector = namedtuple("Dimension",'x y z')
vec_1 = vector(1,1,1)
vec_2 = vector(1,0,1)
manhattan_distance = abs(vec_1.x - vec_2.x) + abs(vec_1.y - vec_2.y)+ abs(vec_1.z - vec_2.z)
print "Manhattan distance between vectors = %d"%(manhattan_distance)

Manhattan distance between vectors = 1


### Using sets

In [1]:
# 1.Initialize two sentences.
st_1 = "dogs chase cats"
st_2 = "dogs hate cats"

In [2]:
# 2.Create set of words from strings
st_1_wrds = set(st_1.split())
st_2_wrds = set(st_2.split())

In [3]:
# 3.Find out the number of unique words in each set, vocabulary size.
no_wrds_st_1 = len(st_1_wrds)
no_wrds_st_2 = len(st_2_wrds)

In [4]:
# 4.Find out the list of common words between the two sets.
# Also find out the count of common words.
cmn_wrds = st_1_wrds.intersection(st_2_wrds)
no_cmn_wrds = len(st_1_wrds.intersection(st_2_wrds))

In [5]:
# 5.Get a list of unique words between the two sets.
# Also find out the count of unique words.
unq_wrds = st_1_wrds.union(st_2_wrds)
no_unq_wrds = len(st_1_wrds.union(st_2_wrds))

In [6]:
# 6.Calculate Jaccard similarity 
similarity = no_cmn_wrds / (1.0 * no_unq_wrds)

In [7]:
# 7.Let us now print to grasp our output.
print "No words in sent_1 = %d"%(no_wrds_st_1)
print "Sentence 1 words =", st_1_wrds
print "No words in sent_2 = %d"%(no_wrds_st_2)
print "Sentence 2 words =", st_2_wrds
print "No words in common = %d"%(no_cmn_wrds)
print "Common words =", cmn_wrds
print "Total unique words = %d"%(no_unq_wrds)
print "Unique words=",unq_wrds
print "Similarity = No words in common/No unique words, %d/%d = %.2f"%(no_cmn_wrds,no_unq_wrds,similarity)

No words in sent_1 = 3
Sentence 1 words = set(['cats', 'dogs', 'chase'])
No words in sent_2 = 3
Sentence 2 words = set(['cats', 'hate', 'dogs'])
No words in common = 2
Common words = set(['cats', 'dogs'])
Total unique words = 4
Unique words= set(['cats', 'hate', 'dogs', 'chase'])
Similarity = No words in common/No unique words, 2/4 = 0.50


In [8]:
a =(1,2,1)
set(a)

{1, 2}

In [9]:
b =[1,2,1]
set(b)

{1, 2}

Use sklearn library instead

In [11]:
# Load libraries
from sklearn.metrics import jaccard_similarity_score
# 1.Initialize two sentences.
st_1 = "dogs chase cats"
st_2 = "dogs hate cats"

In [14]:
# 2.Create set of words from strings
st_1_wrds = set(st_1.split())
st_2_wrds = set(st_2.split())
unq_wrds = st_1_wrds.union(st_2_wrds)
a  = [ 1 if w in st_1_wrds else 0 for w in unq_wrds ]
b  = [ 1 if w in st_2_wrds else 0 for w in unq_wrds]
print a
print b
print jaccard_similarity_score(a,b)

[1, 0, 1, 1]
[1, 1, 1, 0]
0.5


### Writing a list

In [1]:
# 1.Let us look at a quick example of list creation. 
a = range(1,10)
print a
b = ["a","b","c"]
print b

[1, 2, 3, 4, 5, 6, 7, 8, 9]
['a', 'b', 'c']


In [2]:
# 2.List can be accessed through indexing. Indexing starts at 0.
print a[0]

1


In [3]:
# 3.With negative indexing the elements of a list are accessed from backwards.
a[-1]

9

In [4]:
# 4.Slicing is accessing a subset of list by providing two indices.
print a[1:3]  # prints [2, 3]
print a[1:]   # prints [2, 3, 4, 5, 6, 7, 8, 9]
print a[-1:]  # prints [9]
print a[:-1]  # prints [1, 2, 3, 4, 5, 6, 7, 8]

[2, 3]
[2, 3, 4, 5, 6, 7, 8, 9]
[9]
[1, 2, 3, 4, 5, 6, 7, 8]


In [5]:
#5.List concatenation
a = [1,2]
b = [3,4]
print a + b # prints [1, 2, 3, 4]

[1, 2, 3, 4]


In [6]:
# 6.List  min max
print min(a),max(a)

1 2


In [7]:
# 7.in and not in
if 1 in a:    
    print "Element 1 is available in list a"
else:    
    print "Element 1 is available in tuple a"

Element 1 is available in list a


In [8]:
# 8. Appending and extending list
a = range(1,10)
print a
a.append(10)
print a

[1, 2, 3, 4, 5, 6, 7, 8, 9]
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]


In [9]:
# 9.List as a stack
a_stack = []
a_stack.append(1)
a_stack.append(2)
a_stack.append(3)
print a_stack.pop()
print a_stack.pop()
print a_stack.pop()

3
2
1


In [10]:
# 10.List as queue
a_queue = []
a_queue.append(1)
a_queue.append(2)
a_queue.append(3)
print a_queue.pop(0)
print a_queue.pop(0)
print a_queue.pop(0)

1
2
3


In [11]:
# 11.List sort and reverse
from random import shuffle
a = range(1,20)
shuffle(a)
print a
a.sort()
print a
a.reverse()
print a

[13, 6, 16, 4, 3, 15, 17, 19, 8, 1, 7, 2, 14, 10, 5, 9, 18, 12, 11]
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
[19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]


### Creating a list from another list - list comprehension

In [1]:
# 1.Let us define a simple list with some positive and negative numbers.
a = [1,2,-1,-2,3,4,-3,-4]

In [2]:
# 2.Now let us write our list comprehension.
# pow() a power function takes two input and
# its output is the first variable raised to the power of the second.
b = [pow(x,2) for x in a if x < 0]

In [3]:
# 3.Finally let us see the output, i.e. the newly created list b.
print b

[1, 4, 9, 16]


For dictionaries

In [4]:
a = {'a':1,'b':2,'c':3}
b = {x:pow(y,2) for x,y in a.items()}
print b

{'a': 1, 'c': 9, 'b': 4}


Tuple Comprehension

In [6]:
def process(x):    
    if isinstance(x,str):        
        return x.lower()    
    elif isinstance(x,int):        
        return x*x    
    else:        
        return -9
    
a = (1,2,-1,-2,'D',3,4,-3,'A')
b = tuple(process(x) for x in a )
print b

(1, 4, 1, 4, 'd', 9, 16, 9, 'a')


### Using iterators

In [1]:
# 1.Let us write a simple iterator.
class SimpleCounter(object):    
    def __init__(self, start, end):        
        self.current = start        
        self.end = end    
    def __iter__(self):        
        'Returns itself as an iterator object'        
        return self    
    def next(self):        
        'Returns the next value till current is lower than end'        
        if self.current > self.end:            
            raise StopIteration        
        else:            
            self.current += 1            
            return self.current - 1

In [2]:
# 2.Now let us try to access the iterator
c = SimpleCounter(1,3)
print c.next()
print c.next()
print c.next()
print c.next()

1
2
3


StopIteration: 

In [3]:
# 3.Another way to access
for entry in iter(c):    
    print entry     

In [7]:
#f = open(some_file_of_interest)
f = open('file_of_interest.txt')
for l in iter(f):
    print l
f.close()

a

b

c

d


### Generating an iterator and a generator

In [8]:
SimpleCounter  = (x**2 for x in range(1,10))
tot = 0
for val in SimpleCounter:    
    tot+=val
print tot    

285


In [9]:
def my_gen(low,high):    
    for x in range(low,high):        
        yield x**2
        
tot = 0        
for val in my_gen(1,10):    
        tot+=val
print tot    

285


Both generators and iterables produce iterators

In [1]:
gen = (x**2 for x in range(1,10))
for val in iter(gen):    
    print val

1
4
9
16
25
36
49
64
81


Generator - can only iterate over data once!

### Using iterables

Similar to generators except can iterate on and on

In [3]:
# 1.Let us define a simple class with __iter__ method.
class SimpleIterable(object):    
    def __init__(self, start, end):        
        self.start = start        
        self.end = end    
    def __iter__(self):        
        for x in range(self.start,self.end):            
            yield x**2
            
#  Now let us invoke this class and iterate over its values two times.
c = SimpleIterable(1,10)

In [4]:
# First iteration
tot = 0
for val in iter(c):    
    tot+=val
print tot

285


In [5]:
# Second iteration
tot =0
for val in iter(c):    
    tot+=val
print tot

285


### Passing a function as a variable

In [1]:
# 1.Let us define a simple function.
def square_input(x):    
    return x*x

# We will follow it by assigning that function to a variable
square_me = square_input
# And finally invoke the variable
print square_me(5)    

25


### Embedding functions in another function

In [2]:
# 1.Let us define a function of function to find the sum of squares of the given input
def sum_square(x):    
    def square_input(x):        
        return x*x    
    return sum([square_input(x1) for x1 in x])

# Print the output to check for correctness
print sum_square([2,4,5])   

45


### Passing a function as a parameter

In [3]:
from math import log

def square_input(x):    
    return x*x

# 1.Define a generic function, which will take another function as input
# and will apply it on the given input sequence.
def apply_func(func_x,input_x):    
    return map(func_x,input_x)

# Let us try to use the apply_func() and verify the results  
a = [2,3,4]

print apply_func(square_input,a)
print apply_func(log,a)    

[4, 9, 16]
[0.6931471805599453, 1.0986122886681098, 1.3862943611198906]


### Returning a function

In [4]:
# 1.Let us define a function which will explain our
#  concept of function returning a function.
def cylinder_vol(r):    
    pi = 3.141    
    def get_vol(h):        
        return pi * r**2 * h    
    return get_vol

In [5]:
# 2.Let us define a radius and find get a volume function,
#  which can now find out the volume for the given radius and any height.
radius = 10
find_volume = cylinder_vol(radius)

In [6]:
# 3.Let us try to find out the volume for different heights
height = 10
print "Volume of cylinder of radius %d and height %d = %.2f  cubic units" \
%(radius,height,find_volume(height))        

height = 20
print "Volume of cylinder of radius %d and height %d = %.2f  cubic units" \
%(radius,height,find_volume(height))  

Volume of cylinder of radius 10 and height 10 = 3141.00  cubic units
Volume of cylinder of radius 10 and height 20 = 6282.00  cubic units


### Altering the function behavior with decorators

In [7]:
from string import punctuation

def pipeline_wrapper(func):    
    def to_lower(x):        
        return x.lower()        
    def remove_punc(x):        
        for p in punctuation:            
            x = x.replace(p,'')        
        return x            
    def wrapper(*args,**kwargs):        
        x = to_lower(*args,**kwargs)        
        x = remove_punc(x)        
        return func(x)
    return wrapper        

In [8]:
@pipeline_wrapper        
def tokenize_whitespace(inText):    
    return inText.split()

In [9]:
s = "string. With. Punctuation?"    
print tokenize_whitespace(s)

['string', 'with', 'punctuation']


### Creating anonymous functions with lambda

In [1]:
# 1.Create a simple list and a function similar to the
# one in functions as parameter section.
a = [10,20,30]

def do_list(a_list,func):    
    total = 0    
    for element in a_list:        
        total+=func(element)    
    return total

print do_list(a,lambda x:x**2)   
print do_list(a,lambda x:x**3)   

b = [lambda x: x%3 ==0  for x in a]

1400
36000


### Using the map function

In [4]:
#First let us declare a list.
a =[10,20,30]
# Let us now call the map function in our Print statement.
print map(lambda x:x**2,a)   

[100, 400, 900]


In [5]:
print map(lambda x:x**3,a)

[1000, 8000, 27000]


In [6]:
print sum(map(lambda x:x**2,a))
print sum(map(lambda x:x**3,a))

1400
36000


Map expects an N-argument function if we have N-sequences.

In [7]:
a =[10,20,30]
b = [1,2,3]
print map(pow,a,b) 

[10, 400, 27000]


### Working with filters

In [8]:
# Let us declare a list.
a = [10,20,30,40,50]
# Let us apply Filter function on all the elements of the list.
print filter(lambda x:x>10,a)

[20, 30, 40, 50]


### Using zip and izip

In [10]:
print zip(range(1,5), range(1,5))

[(1, 1), (2, 2), (3, 3), (4, 4)]


In [12]:
out = zip(range(1,5), range(1,5))

\* unpacks a collection. \** unpacks a dictionary

In [13]:
x, y = zip(*out)
print x, y

(1, 2, 3, 4) (1, 2, 3, 4)


In [16]:
a =(2,3)
print pow(*a)

8


In [17]:
a_dict = {"x":10,"y":10,"z":10,"x1":10,"y1":10,"z1":10}
print a_dict

{'z': 10, 'y1': 10, 'y': 10, 'x': 10, 'x1': 10, 'z1': 10}


In [19]:
def dist(x,y,z,x1,y1,z1):
    return abs((x-x1)+(y-y1)+(z-z1))

print dist(**a_dict) 

0


Can now write a function without restrictions on number of variables it can ingest

In [None]:
def any_sum(*args):
    tot = 0
    for arg in args:
        tot+=arg
    return tot

print any_sum(1,2)
print any_sum(1,2,3)

### Processing arrays from the tabular data

In [2]:
# 1.Let us simulate a small tablular input using StringIO
import numpy as np
from StringIO import StringIO
in_data = StringIO("10,20,30\n56,89,90\n33,46,89")

In [3]:
# 2.Read the input using numpy's genfromtext to create a nummpy array.
data = np.genfromtxt(in_data,dtype=int,delimiter=",")
print data

# cases where we may not need to use some columns.
in_data = StringIO("10,20,30\n56,89,90\n33,46,89")
data = np.genfromtxt(in_data,dtype=int,delimiter=",",usecols=(0,1))
print data

# providing column names
in_data = StringIO("10,20,30\n56,89,90\n33,46,89")
data = np.genfromtxt(in_data,dtype=int,delimiter=",",names="a,b,c")
print data

# using column names from data
in_data = StringIO("a,b,c\n10,20,30\n56,89,90\n33,46,89")
data = np.genfromtxt(in_data,dtype=int,delimiter=",",names=True)
print data

[[10 20 30]
 [56 89 90]
 [33 46 89]]
[[10 20]
 [56 89]
 [33 46]]
[(10, 20, 30) (56, 89, 90) (33, 46, 89)]
[(10, 20, 30) (56, 89, 90) (33, 46, 89)]


Default values

genfromtxt(fname, dtype=<type 'float'>, comments='#', delimiter=None, skiprows=0, skip_header=0, skip_footer=0, converters=None, missing='', missing_values=None, filling_values=None, usecols=None, names=None, excludelist=None, deletechars=None, replace_space='_', autostrip=False, case_sensitive=True, defaultfmt='f%i', unpack=None, usemask=False, loose=True, invalid_raise=True)

The csv library can be very useful. For example, with large CSV files.

In [4]:
import csv
help(csv.Sniffer.sniff)

Help on method sniff in module csv:

sniff(self, sample, delimiters=None) unbound csv.Sniffer method
    Returns a dialect (or None) corresponding to the sample



In [6]:
with open('csv_file.csv') as t:
    dialect = csv.Sniffer().sniff(t.read(1024))
    t.seek(0)
    reader = csv.reader(t, dialect)

In [9]:
print dialect
print reader

csv.dialect
<_csv.reader object at 0x0000000003FECAC8>


In [11]:
print dialect.delimiter
print dialect.doublequote
print dialect.escapechar
print dialect.lineterminator
print dialect.quotechar
print dialect.quoting
print dialect.skipinitialspace

,
False
None


"
0
False


In [14]:
with open('csv_file.csv') as t:
    print csv.Sniffer().has_header(t.read(1024))

True


### Preprocessing the columns

In [15]:
in_data = StringIO("30kg,inr2000,31.11,56.33,1\n52kg,inr8000.35,12,16.7,2")
data = np.genfromtxt(in_data,delimiter=",")
print data

[[   nan    nan  31.11  56.33   1.  ]
 [   nan    nan  12.    16.7    2.  ]]


In [22]:
import numpy as np
from StringIO import StringIO

# Define a data set
in_data = StringIO("30kg,inr2000,31.11,56.33,1\n52kg,inr8000.35,12,16.7,2")

In [23]:
# 1.Let us define two data pre-processing using lambda functions,
strip_func_1 = lambda x : float(x.rstrip("kg"))
strip_func_2 = lambda x : float(x.lstrip("inr"))

In [24]:
# 2.Let us now create a dictionary of these functions,
convert_funcs = {0:strip_func_1,1:strip_func_2}

In [25]:
# 3.Now provide this dictionary of functions to genfromtxt.
data = np.genfromtxt(in_data,delimiter=",", converters=convert_funcs)
print data

[[  3.00000000e+01   2.00000000e+03   3.11100000e+01   5.63300000e+01
    1.00000000e+00]
 [  5.20000000e+01   8.00035000e+03   1.20000000e+01   1.67000000e+01
    2.00000000e+00]]


In [26]:
# Using a lambda function to handle conversions
in_data = StringIO("10,20,30\n56,,90\n33,46,89")
mss_func = lambda x: float(x.strip() or -999)
data = np.genfromtxt(in_data,delimiter=",", converters={1:mss_func})
print data

[[  10.   20.   30.]
 [  56. -999.   90.]
 [  33.   46.   89.]]


### Sorting lists

In [27]:
# Let us look at a very small code snippet, which does sorting of a given list.
a = [8, 0, 3, 4, 5, 2, 9, 6, 7, 1]
b = [8, 0, 3, 4, 5, 2, 9, 6, 7, 1]

print a
a.sort()
print a

print b
b_s = sorted(b)
print b_s

[8, 0, 3, 4, 5, 2, 9, 6, 7, 1]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
[8, 0, 3, 4, 5, 2, 9, 6, 7, 1]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]


Use reverse=True in .sort to get descending order

Can only use .sort with lists - need to use sorted with other iterables, e.g. tuples

### Sorting with a key

In [29]:
#1.The first step is to create a list of tuples, which we will use to test our sorting.

employee_records = [ ('joe',1,53),('beck',2,26), \
                    ('ele',6,32),('neo',3,45),  \
                    ('christ',5,33),('trinity',4,29), \
                   ]
print employee_records

[('joe', 1, 53), ('beck', 2, 26), ('ele', 6, 32), ('neo', 3, 45), ('christ', 5, 33), ('trinity', 4, 29)]


In [31]:
#2.Let us now sort it by employee name
print sorted(employee_records,key=lambda emp : emp[0])
"""It prints as follows [('beck', 2, 26), ('christ', 5, 33), ('ele', 6, 32), ('joe', 1, 53), ('neo', 3, 45), ('trinity', 4, 29)]"""

[('beck', 2, 26), ('christ', 5, 33), ('ele', 6, 32), ('joe', 1, 53), ('neo', 3, 45), ('trinity', 4, 29)]


"It prints as follows [('beck', 2, 26), ('christ', 5, 33), ('ele', 6, 32), ('joe', 1, 53), ('neo', 3, 45), ('trinity', 4, 29)]"

In [32]:
# 3.Let us now sort it by employee id
print sorted(employee_records,key=lambda emp : emp[1])
"""It prints as follows[('joe', 1, 53), ('beck', 2, 26), ('neo', 3, 45), ('trinity', 4, 29), ('christ', 5, 33), ('ele', 6, 32)]"""

[('joe', 1, 53), ('beck', 2, 26), ('neo', 3, 45), ('trinity', 4, 29), ('christ', 5, 33), ('ele', 6, 32)]


"It prints as follows[('joe', 1, 53), ('beck', 2, 26), ('neo', 3, 45), ('trinity', 4, 29), ('christ', 5, 33), ('ele', 6, 32)]"

In [33]:
# 4.Finally we sort it with employee age
print sorted(employee_records,key=lambda emp : emp[2])
"""Its prints as follows[('beck', 2, 26), ('trinity', 4, 29), ('ele', 6, 32), ('christ', 5, 33), ('neo', 3, 45), ('joe', 1, 53)]"""

[('beck', 2, 26), ('trinity', 4, 29), ('ele', 6, 32), ('christ', 5, 33), ('neo', 3, 45), ('joe', 1, 53)]


"Its prints as follows[('beck', 2, 26), ('trinity', 4, 29), ('ele', 6, 32), ('christ', 5, 33), ('neo', 3, 45), ('joe', 1, 53)]"

operator module has itemgetter, attrgetter and methodcalled functions

In [35]:
from operator import itemgetter
employee_records = [ ('joe',1,53),('beck',2,26), \
                    ('ele',6,32),('neo',3,45),  \
                    ('christ',5,33),('trinity',4,29), \
                   ]
print sorted(employee_records,key=itemgetter(0))
print sorted(employee_records,key=itemgetter(1))
print sorted(employee_records,key=itemgetter(2))

[('beck', 2, 26), ('christ', 5, 33), ('ele', 6, 32), ('joe', 1, 53), ('neo', 3, 45), ('trinity', 4, 29)]
[('joe', 1, 53), ('beck', 2, 26), ('neo', 3, 45), ('trinity', 4, 29), ('christ', 5, 33), ('ele', 6, 32)]
[('beck', 2, 26), ('trinity', 4, 29), ('ele', 6, 32), ('christ', 5, 33), ('neo', 3, 45), ('joe', 1, 53)]


In [37]:
sorted(employee_records,key=itemgetter(0,1)) # sorts by the 1st columns (name) and then the 2nd column (id)

[('beck', 2, 26),
 ('christ', 5, 33),
 ('ele', 6, 32),
 ('joe', 1, 53),
 ('neo', 3, 45),
 ('trinity', 4, 29)]

The attrgetter and methodcaller come in handy when the elements of our iterable are class objects

In [38]:
# Let us now enclose the employee records as class objects,
class employee(object):    
    def __init__(self,name,id,age):        
        self.name = name        
        self.id = id        
        self.age = age    
    def pretty_print(self):       
        print self.name,self.id,self.age

In [39]:
# Now let us populate a list with these class objects.
employee_records = []
emp1 = employee('joe',1,53)
emp2 = employee('beck',2,26)
emp3 = employee('ele',6,32)

employee_records.append(emp1)
employee_records.append(emp2)
employee_records.append(emp3)

In [40]:
# Print the records
for emp in employee_records:    
    emp.pretty_print()

joe 1 53
beck 2 26
ele 6 32


In [41]:
from operator import attrgetter
employee_records_sorted = sorted(employee_records,key=attrgetter('age'))
# Now let us print the sorted list,
for emp in employee_records_sorted:    
    emp.pretty_print()

beck 2 26
ele 6 32
joe 1 53


Methodcaller

In [42]:
class employee(object):    
    def __init__(self,name,id,age):        
        self.name = name        
        self.id = id        
        self.age = age    
    def pretty_print(self):       
        print self.name,self.id,self.age
    def random_method(self):       
        return self.age / self.id 

In [43]:
# Populate data
employee_records = []

emp1 = employee('joe',1,53)
emp2 = employee('beck',2,26)
emp3 = employee('ele',6,32)

employee_records.append(emp1)
employee_records.append(emp2)
employee_records.append(emp3)

In [44]:
from operator import methodcaller
employee_records_sorted = sorted(employee_records,key=methodcaller('random_method'))
for emp in employee_records_sorted:    
    emp.pretty_print() 

ele 6 32
beck 2 26
joe 1 53


### Working with itertools

In [1]:
# Load libraries
from itertools import chain,compress,combinations,count,izip,islice

In [2]:
# 1.Chain example, where different iterables can be combined together.
a = [1,2,3]
b = ['a','b','c']
print list(chain(a,b)) # prints [1, 2, 3, 'a', 'b', 'c']

[1, 2, 3, 'a', 'b', 'c']


In [3]:
# 2.Compress example, a data selector, where the data in the first iterator
#  is selected based on the second iterator.
a = [1,2,3]
b = [1,0,1]
print list(compress(a,b)) # prints [1, 3]

[1, 3]


In [4]:
# 3.From a given list, return n length sub sequences.
a = [1,2,3,4]
print list(combinations(a,2)) # prints [(1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]

[(1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]


In [5]:
# 4.A counter which produces infinite consequent integers, given a start integer,
a = range(5)
b = izip(count(1),a)
for element in b:    
    print element

(1, 0)
(2, 1)
(3, 2)
(4, 3)
(5, 4)


In [55]:
# 5.Extract an iterator from another iterator, 
# let us say we want an iterator which only returns every 
# alternate elements from the input iterator
a = range(100)
b = islice(a,0,100,2)
print list(b)

[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98]
