### Using Dictionary Objects

Containers are objects that can hold any number of arbitrary objects.

In [1]:
# 1.Load a variable with sentences
sentence = "Peter Piper picked a peck of pickled peppers A peck of pickled\
peppers Peter Piper picked If Peter Piper picked a peck of pickled\
peppers Wheres the peck of pickled peppers Peter Piper picked"

In [2]:
# 2.Initialize a dictionary object
word_dict = {}

In [3]:
# 3.Perform the word count
for word in sentence.split():    
    if word not in word_dict:        
        word_dict[word] = 1    
    else:          
        word_dict[word]+=1

In [4]:
# 4.print the output
print (word_dict)

{'a': 2, 'A': 1, 'Peter': 4, 'of': 4, 'Piper': 4, 'pickled': 2, 'pickledpeppers': 2, 'picked': 4, 'peppers': 2, 'the': 1, 'peck': 4, 'Wheres': 1, 'If': 1}


Rewriting using a default dict

In [5]:
del word_dict
word_dict = {}
for word in sentence.split():
    word_dict.setdefault(word,0)
    word_dict[word]+=1

In [6]:
print (word_dict)

{'a': 2, 'A': 1, 'Peter': 4, 'of': 4, 'Piper': 4, 'pickled': 2, 'pickledpeppers': 2, 'picked': 4, 'peppers': 2, 'the': 1, 'peck': 4, 'Wheres': 1, 'If': 1}


Can use the defaultdict class instead

In [7]:
from collections import defaultdict

In [8]:
word_dict = defaultdict(int)
for word in sentence.split():    
    word_dict[word]+=1
print word_dict

defaultdict(<type 'int'>, {'a': 2, 'A': 1, 'Peter': 4, 'of': 4, 'Piper': 4, 'pickled': 2, 'pickledpeppers': 2, 'picked': 4, 'peppers': 2, 'the': 1, 'peck': 4, 'Wheres': 1, 'If': 1})


Loop through keys and values of a dict

In [10]:
for key, value in word_dict.items():
    print key,value

a 2
A 1
Peter 4
of 4
Piper 4
pickled 2
pickledpeppers 2
picked 4
peppers 2
the 1
peck 4
Wheres 1
If 1


Another option is the Counter function

In [11]:
from collections import Counter

In [14]:
words = sentence.split()
word_count = Counter(words)
print word_count
print word_dict  

Counter({'Peter': 4, 'of': 4, 'Piper': 4, 'picked': 4, 'peck': 4, 'a': 2, 'pickled': 2, 'pickledpeppers': 2, 'peppers': 2, 'A': 1, 'the': 1, 'Wheres': 1, 'If': 1})
defaultdict(<type 'int'>, {'a': 2, 'A': 1, 'Peter': 4, 'of': 4, 'Piper': 4, 'pickled': 2, 'pickledpeppers': 2, 'picked': 4, 'peppers': 2, 'the': 1, 'peck': 4, 'Wheres': 1, 'If': 1})


### Working with a dictionary of dictionaries

User movie ratings data

In [15]:
from collections import defaultdict
user_movie_rating = defaultdict(lambda :defaultdict(int))
# Initialize ratings for Alice
user_movie_rating["Alice"]["LOR1"] =  4
user_movie_rating["Alice"]["LOR2"] =  5
user_movie_rating["Alice"]["LOR3"] =  3
user_movie_rating["Alice"]["SW1"]  =  5
user_movie_rating["Alice"]["SW2"]  =  3
print user_movie_rating

defaultdict(<function <lambda> at 0x0000000003ABBF28>, {'Alice': defaultdict(<type 'int'>, {'SW1': 5, 'SW2': 3, 'LOR1': 4, 'LOR3': 3, 'LOR2': 5})})


### Working with tuples

In [16]:
# 1.Ways of creating a tuple
a_tuple = (1,2,'a')
b_tuple =1,2,'c'

In [17]:
# 2.Accessing elements of a tuple through index
print b_tuple[0]
print b_tuple[-1]

1
c


In [18]:
# 3.It is not possible to change the value of an item in a tuple,
# for example the next statement will result in an error.
try:    
    b_tuple[0] = 20
except:    
    print "Cannot change value of tuple by index"   

Cannot change value of tuple by index


In [19]:
# 4.Though tuples are immutable
# But elements of a tuple can be mutable objects,
# for instance a list, as in the following line of code
c_tuple =(1,2,[10,20,30])
c_tuple[2][0] = 100

In [20]:
c_tuple

(1, 2, [100, 20, 30])

In [21]:
# 5.Tuples once created cannot be extended like list, 
# however two tuples can be concatenated.
print a_tuple + b_tuple

(1, 2, 'a', 1, 2, 'c')


In [22]:
# 6 Slicing of tuples
a =(1,2,3,4,5,6,7,8,9,10)
print a[1:]
print a[1:3]
print a[1:6:2]
print a[:-1]

(2, 3, 4, 5, 6, 7, 8, 9, 10)
(2, 3)
(2, 4, 6)
(1, 2, 3, 4, 5, 6, 7, 8, 9)


In [23]:
# 7.Tuple min max
print min(a),max(a)

1 10


In [24]:
# 8.in and not in
if 1 in a:    
    print "Element 1 is available in tuple a"
else:
    print "Element 1 is available in tuple a"

Element 1 is available in tuple a


Tuples are immutable

In [25]:
print a[::-1]

(10, 9, 8, 7, 6, 5, 4, 3, 2, 1)


namedtuple

In [27]:
from collections import namedtuple

vector = namedtuple("Dimension",'x y z')
vec_1 = vector(1,1,1)
vec_2 = vector(1,0,1)
manhattan_distance = abs(vec_1.x - vec_2.x) + abs(vec_1.y - vec_2.y)+ abs(vec_1.z - vec_2.z)
print "Manhattan distance between vectors = %d"%(manhattan_distance)

Manhattan distance between vectors = 1


### Using sets

In [1]:
# 1.Initialize two sentences.
st_1 = "dogs chase cats"
st_2 = "dogs hate cats"

In [2]:
# 2.Create set of words from strings
st_1_wrds = set(st_1.split())
st_2_wrds = set(st_2.split())

In [3]:
# 3.Find out the number of unique words in each set, vocabulary size.
no_wrds_st_1 = len(st_1_wrds)
no_wrds_st_2 = len(st_2_wrds)

In [4]:
# 4.Find out the list of common words between the two sets.
# Also find out the count of common words.
cmn_wrds = st_1_wrds.intersection(st_2_wrds)
no_cmn_wrds = len(st_1_wrds.intersection(st_2_wrds))

In [5]:
# 5.Get a list of unique words between the two sets.
# Also find out the count of unique words.
unq_wrds = st_1_wrds.union(st_2_wrds)
no_unq_wrds = len(st_1_wrds.union(st_2_wrds))

In [6]:
# 6.Calculate Jaccard similarity 
similarity = no_cmn_wrds / (1.0 * no_unq_wrds)

In [7]:
# 7.Let us now print to grasp our output.
print "No words in sent_1 = %d"%(no_wrds_st_1)
print "Sentence 1 words =", st_1_wrds
print "No words in sent_2 = %d"%(no_wrds_st_2)
print "Sentence 2 words =", st_2_wrds
print "No words in common = %d"%(no_cmn_wrds)
print "Common words =", cmn_wrds
print "Total unique words = %d"%(no_unq_wrds)
print "Unique words=",unq_wrds
print "Similarity = No words in common/No unique words, %d/%d = %.2f"%(no_cmn_wrds,no_unq_wrds,similarity)

No words in sent_1 = 3
Sentence 1 words = set(['cats', 'dogs', 'chase'])
No words in sent_2 = 3
Sentence 2 words = set(['cats', 'hate', 'dogs'])
No words in common = 2
Common words = set(['cats', 'dogs'])
Total unique words = 4
Unique words= set(['cats', 'hate', 'dogs', 'chase'])
Similarity = No words in common/No unique words, 2/4 = 0.50


In [8]:
a =(1,2,1)
set(a)

{1, 2}

In [9]:
b =[1,2,1]
set(b)

{1, 2}

Use sklearn library instead

In [11]:
# Load libraries
from sklearn.metrics import jaccard_similarity_score
# 1.Initialize two sentences.
st_1 = "dogs chase cats"
st_2 = "dogs hate cats"

In [14]:
# 2.Create set of words from strings
st_1_wrds = set(st_1.split())
st_2_wrds = set(st_2.split())
unq_wrds = st_1_wrds.union(st_2_wrds)
a  = [ 1 if w in st_1_wrds else 0 for w in unq_wrds ]
b  = [ 1 if w in st_2_wrds else 0 for w in unq_wrds]
print a
print b
print jaccard_similarity_score(a,b)

[1, 0, 1, 1]
[1, 1, 1, 0]
0.5


### Writing a list

In [1]:
# 1.Let us look at a quick example of list creation. 
a = range(1,10)
print a
b = ["a","b","c"]
print b

[1, 2, 3, 4, 5, 6, 7, 8, 9]
['a', 'b', 'c']


In [2]:
# 2.List can be accessed through indexing. Indexing starts at 0.
print a[0]

1


In [3]:
# 3.With negative indexing the elements of a list are accessed from backwards.
a[-1]

9

In [4]:
# 4.Slicing is accessing a subset of list by providing two indices.
print a[1:3]  # prints [2, 3]
print a[1:]   # prints [2, 3, 4, 5, 6, 7, 8, 9]
print a[-1:]  # prints [9]
print a[:-1]  # prints [1, 2, 3, 4, 5, 6, 7, 8]

[2, 3]
[2, 3, 4, 5, 6, 7, 8, 9]
[9]
[1, 2, 3, 4, 5, 6, 7, 8]


In [5]:
#5.List concatenation
a = [1,2]
b = [3,4]
print a + b # prints [1, 2, 3, 4]

[1, 2, 3, 4]


In [6]:
# 6.List  min max
print min(a),max(a)

1 2


In [7]:
# 7.in and not in
if 1 in a:    
    print "Element 1 is available in list a"
else:    
    print "Element 1 is available in tuple a"

Element 1 is available in list a


In [8]:
# 8. Appending and extending list
a = range(1,10)
print a
a.append(10)
print a

[1, 2, 3, 4, 5, 6, 7, 8, 9]
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]


In [9]:
# 9.List as a stack
a_stack = []
a_stack.append(1)
a_stack.append(2)
a_stack.append(3)
print a_stack.pop()
print a_stack.pop()
print a_stack.pop()

3
2
1


In [10]:
# 10.List as queue
a_queue = []
a_queue.append(1)
a_queue.append(2)
a_queue.append(3)
print a_queue.pop(0)
print a_queue.pop(0)
print a_queue.pop(0)

1
2
3


In [11]:
# 11.List sort and reverse
from random import shuffle
a = range(1,20)
shuffle(a)
print a
a.sort()
print a
a.reverse()
print a

[13, 6, 16, 4, 3, 15, 17, 19, 8, 1, 7, 2, 14, 10, 5, 9, 18, 12, 11]
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
[19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1]


### Creating a list from another list - list comprehension

In [1]:
# 1.Let us define a simple list with some positive and negative numbers.
a = [1,2,-1,-2,3,4,-3,-4]

In [2]:
# 2.Now let us write our list comprehension.
# pow() a power function takes two input and
# its output is the first variable raised to the power of the second.
b = [pow(x,2) for x in a if x < 0]

In [3]:
# 3.Finally let us see the output, i.e. the newly created list b.
print b

[1, 4, 9, 16]


For dictionaries

In [4]:
a = {'a':1,'b':2,'c':3}
b = {x:pow(y,2) for x,y in a.items()}
print b

{'a': 1, 'c': 9, 'b': 4}


Tuple Comprehension

In [6]:
def process(x):    
    if isinstance(x,str):        
        return x.lower()    
    elif isinstance(x,int):        
        return x*x    
    else:        
        return -9
    
a = (1,2,-1,-2,'D',3,4,-3,'A')
b = tuple(process(x) for x in a )
print b

(1, 4, 1, 4, 'd', 9, 16, 9, 'a')


### Using iterators

In [1]:
# 1.Let us write a simple iterator.
class SimpleCounter(object):    
    def __init__(self, start, end):        
        self.current = start        
        self.end = end    
    def __iter__(self):        
        'Returns itself as an iterator object'        
        return self    
    def next(self):        
        'Returns the next value till current is lower than end'        
        if self.current > self.end:            
            raise StopIteration        
        else:            
            self.current += 1            
            return self.current - 1

In [2]:
# 2.Now let us try to access the iterator
c = SimpleCounter(1,3)
print c.next()
print c.next()
print c.next()
print c.next()

1
2
3


StopIteration: 

In [3]:
# 3.Another way to access
for entry in iter(c):    
    print entry     

In [7]:
#f = open(some_file_of_interest)
f = open('file_of_interest.txt')
for l in iter(f):
    print l
f.close()

a

b

c

d


### Generating an iterator and a generator

In [8]:
SimpleCounter  = (x**2 for x in range(1,10))
tot = 0
for val in SimpleCounter:    
    tot+=val
print tot    

285


In [9]:
def my_gen(low,high):    
    for x in range(low,high):        
        yield x**2
        
tot = 0        
for val in my_gen(1,10):    
        tot+=val
print tot    

285
