# Python for Data Analysis, 2nd Edition

Python for Data Analysis by Wes McKinney (O’Reilly). Copyright 2017 Wes McKinney, 978-1-491-95766-0.

## Chapter 3: Built-in Data Structures, Functions, and Files

## 3.1 Data Structures and Sequences
- data structures: tuples, lists, dicts, and sets

- ## a) Tuple

In [421]:
tup = 1,2,3
print(tup)
print(type(tup))

(1, 2, 3)
<class 'tuple'>


In [422]:
nested_tup = (4,5,6), (7,8)
nested_tup

((4, 5, 6), (7, 8))

In [423]:
value = 'string'
tup1 = tuple(value)
print(tup1)

value1 = ['string']
tup2 = tuple(value1)
print(tup2)

value2 = [1,2,3],[5,6]
tup3 = tuple(value2)
print(tup3)

#only objects with __iter__ method can be converted into tuple
value3 = 1
#tup4 = tuple(value3)
#print(tup4)                   ----> TypeError: 'int' object is not iterable
 
try:
    iterator = iter(value3)
except TypeError:
    print ('not iterable')


('s', 't', 'r', 'i', 'n', 'g')
('string',)
([1, 2, 3], [5, 6])
not iterable


In [424]:
#accessing element
print(nested_tup[0])  # zero indexing 
print(nested_tup[0][1])

(4, 5, 6)
5


While the objects stored in a tuple may be mutable themselves,      once the tuple is created it’s not possible to modify which object is stored in each      slot:

In [425]:
tup_modify = tuple(['foo', [1,2,3], 1, True])
print(tup_modify)
tup_modify[1][1]=9
#tup_modify[3] = False                      #-->TypeError: 'tuple' object does not support item assignment
print(tup_modify)

('foo', [1, 2, 3], 1, True)
('foo', [1, 9, 3], 1, True)


In [426]:
#concatinating 
tupe = 'sam', 'li'
print(tupe)
tupe2 = 'avi', 'adi'
print(tupe + tupe2)
print(tupe *4)

('sam', 'li')
('sam', 'li', 'avi', 'adi')
('sam', 'li', 'sam', 'li', 'sam', 'li', 'sam', 'li')


In [427]:
#unpacking 
a,b,c,d = ('1', 2, [4,5], 'six')
c

[4, 5]

In [428]:
a,b,c,d = '1', 2, [4,5], 'six'

In [429]:
d

'six'

In [430]:
#swapping in other languages 
a= 7
b = 8

tmp = a 
a = b
b = tmp 
print(a,b)

#in python 
a,b = 4,5
b,a = a,b
print(a,b)

8 7
5 4


In [431]:
iter_tuple = [(1,2,3),(4,5,6),(7,8,9)]
for i in iter_tuple: 
    print('a = {}, b = {}, c = {}'.format(i[0], i[1], i[2]))
    print(f'a = {i[0]}, b = {i[1]}, c = {i[2]}')

a = 1, b = 2, c = 3
a = 1, b = 2, c = 3
a = 4, b = 5, c = 6
a = 4, b = 5, c = 6
a = 7, b = 8, c = 9
a = 7, b = 8, c = 9


In [432]:
#unpacking multiple elements from tuple 
a,b,*rest = 1,2,3,4,5,6
c,d,*_ = 'a','b',3,4,5,6

In [433]:
rest

[3, 4, 5, 6]

In [434]:
_

[3, 4, 5, 6]

## tuple methods
Tuple objects have only two methods
- .count (value in a tuple)
- .index () 

In [435]:
#tuple methods 
a = 1,2,3,4,5,6
print(a)
print(type(a))
a.count(3) 

(1, 2, 3, 4, 5, 6)
<class 'tuple'>


1

 ## b) List

   - In contrast with tuples, lists are variable-length and their contents can be modified in-place. You can define them using square brackets [] or using the list type function
   - Checking whether a list contains a value is a lot slower than doing so with dicts and sets (using in or not in operation)
   - concatination using + is expensive, use .extend() method 

In [436]:
a_list = [2,3,4,7]
tup = ('foo', 'moo', 'doo')
tup_list = list(tup)
print(tup_list)

['foo', 'moo', 'doo']


In [437]:
print(a_list[1])

3


In [438]:
a_list[1] = '6'
a_list

[2, '6', 4, 7]

In [439]:
gen = range(10)
print(gen)
value = list(gen)
value

range(0, 10)


[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [440]:
#adding and removing elements 
#1. add
a_list.append('9')
print(a_list)

#2. removing 
del(a_list[4])
print(a_list)

[2, '6', 4, 7, '9']
[2, '6', 4, 7]


In [441]:
a_list.insert(1, 6)
a_list

[2, 6, '6', 4, 7]

In [442]:
a_list.pop(1)
a_list

[2, '6', 4, 7]

In [443]:
a_list.remove('6')
a_list

[2, 4, 7]

### list methods
    - .append(val)            ==>  appends val at the last position
    - .clear()                ==> empty list 
    - .copy()                 ==> returns a copy of list 
    - .remove(val)            ==> remove first occurance of val 
    - .pop(index)             ==> remove elemnet at the mentioned index 
    - .insert(index, val)     ==> inserts val element at index 
    - .index(val)             ==> gives index of the mentioned val 
    - .reverse()              ==> reverses the list 
    - .count(val)             ==> count the total number of val in list
    - .sort(key = len)        ==> sorts the list in ascending order 

In [444]:
a_list.reverse()
a_list.

SyntaxError: invalid syntax (<ipython-input-444-12ed977698c5>, line 2)

In [445]:
b = a_list.copy()
b[1] = 10
print(a_list)
print(b)

[2, 4, 7]
[2, 10, 7]


In [446]:
#in and not in 
b_list =  ['foo', 'red', 'baz', 'dwarf', 'foo']
print('dwarf' in b_list)
print('avinash' not in b_list)

True
True


In [447]:
#concatinating and combining list
[4, None, 'foo'] + [1,2,3]

[4, None, 'foo', 1, 2, 3]

In [448]:
x_list = [4, None, 'foo']
x_list.extend([1,2,(4,5), 'six'])
x_list

[4, None, 'foo', 1, 2, (4, 5), 'six']

In [449]:
x_list.append([1,2,(4,5), 'six']) #appends take only one argument

In [450]:
x_list

[4, None, 'foo', 1, 2, (4, 5), 'six', [1, 2, (4, 5), 'six']]

In [451]:
everything = []
for i in list_of_lists:
    everything.extend(i) #is fater than

for i in list_of_lists:
    everything += i #this 

NameError: name 'list_of_lists' is not defined

### sorting 

In [452]:
#sorting 
a = [4,2,6,7,91,31,23]
b = ['avi', 'anu' , 'ikshit', 'adi', ' chirag', 'bhandari', 'sumit', 'rupali']
a.sort()
#sort has a few options that will occasionally come in handy. One is the ability to pass a secondary sort key—that is, 
#- a function that produces a value to use to sort the objects. 
b.sort(key=len)
print(a)
print(b)

[2, 4, 6, 7, 23, 31, 91]
['avi', 'anu', 'adi', 'sumit', 'ikshit', 'rupali', ' chirag', 'bhandari']


### binary search and maintaining a sorted list 


In [453]:
#binary search and maintaining a sorted list 
#- it simply means going through each list elemets and finding appropriate location that is suitable for a new elements
import bisect 

c = [1,2,2,2,4,7,8]

bisect.bisect(c, 3) # Out[100]: 4  ==> > 3 needs to be added in c list at index 4 
bisect.insort(c, 3) #bisect operations does not check the sorting 
c

[1, 2, 2, 2, 3, 4, 7, 8]

### indexing & slicing

In [454]:
#indexing & slicing
seq = [7, 2, 3, 7, 5, 6, 0, 1]

# 0- indexing
print(seq[0]) 

#slicing
print(seq [3:4]) # stop index is not included in the result 

7
[7]


In [455]:
#slicing (WITHOUT START OR END INDEX)
print (seq[3:]) # startindex=3 ;stopindex = last ==> therefore elements from 3rd index till last will be dispalyed 

print (seq[:5]) #startindex=first ;stopindex =5 ==> therefore elements from start till index 4 will be displayed   

[7, 5, 6, 0, 1]
[7, 2, 3, 7, 5]


In [456]:
#slicing (WITH NEGATIVE INDEX)
print(seq[-4:-2]) #4th element from end along with 3rd element 
print(seq[-3:]) #3rd element from end and all te rest 

[5, 6]
[6, 0, 1]


In [457]:
#slicing (WITH STEPS)
print(seq[::2])
print(seq[::-1]) #reversing list

[7, 3, 5, 0]
[1, 0, 6, 5, 7, 3, 2, 7]


### Built in sequence function 
- ENUMERATE() - to keep tracking the index along with value 
- SORTED() - to sort the list 
- ZIP() - to zip two or more objects [zip “pairs” up the elements        of a number of lists, tuples, or other sequences to        create a list of tuples:]
- REVERSED() - reversed is a generator which reverses the object. but it will not materialize unless list or dict functions are applied.

In [458]:
#ENUMERATE
collection = ['jake', 'take' , 'make']
#normal way of tracking the index

j = 0
for v in collection:
    #do something here
    j +=1 #index counter 
    
for i, v in enumerate(collection):
    print(f'{i} : {v}')

0 : jake
1 : take
2 : make


In [459]:
#SORTED
name = 'avinash'
sorted(name)

['a', 'a', 'h', 'i', 'n', 's', 'v']

In [460]:
sorted(collection)

['jake', 'make', 'take']

In [461]:
dict1 = {3:'c', 2:'b', 1:'a'}
sort_dict_key = sorted(dict1)
for k in sort_dict_key:
    print(sort_dict_key[k])

2
3


IndexError: list index out of range

In [462]:
#ZIP
list1 = ['a' , 'ab' , 'ac']
list2 = list(range(0,6)) 

zip_list = zip(list1 , list2)

list(zip_list) #==> only 3 obj can be zipped 

[('a', 0), ('ab', 1), ('ac', 2)]

In [463]:
dict(zip_list)

{}

In [464]:
zipped = [('a', 'avinash') ,('b' , 'binod' ), ('c' , 'chand')]
initial, firstname = zip(*zipped)
#this confirms that zip created by collection of tuples
print(initial, firstname)

('a', 'b', 'c') ('avinash', 'binod', 'chand')


In [465]:
#REVERSED - generator object
list (reversed(range(0,10)))

[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

## c) DICTIONARIES 
- aka hash map or associative array 
- collection of key value pair 
- ways to create dict:
    - use {} and :
    - use dict ()

In [466]:
empty_dict = {}
a = {'a': 'some_value' , 'b':[1,2,3,4,5], 'c':{'aa':1, 'bb':2} }

In [467]:
#creating dict form tuples 
dict(zip(range(0,9) , reversed(range(0,9))))
#zip creates tuples, 

{0: 8, 1: 7, 2: 6, 3: 5, 4: 4, 5: 3, 6: 2, 7: 1, 8: 0}

In [468]:
#access insert or set elements
#access
print(a['a'])

#insert
a['d'] = None
print(a)

#set
a['b'] = range(0,4)
print(a)

some_value
{'a': 'some_value', 'b': [1, 2, 3, 4, 5], 'c': {'aa': 1, 'bb': 2}, 'd': None}
{'a': 'some_value', 'b': range(0, 4), 'c': {'aa': 1, 'bb': 2}, 'd': None}


In [469]:
#del or pop 
del(a['d'])
print(a)

a.pop('c')
print(a)

{'a': 'some_value', 'b': range(0, 4), 'c': {'aa': 1, 'bb': 2}}
{'a': 'some_value', 'b': range(0, 4)}


### built in dict methods 
- a.clear
- a.copy 
- a.fromkeys
- a.get            ==> if key is in some dictioanry return the same value else return default 
- a.items
- a.keys
- a.pop
- a.popitem
- a.setdefault     ==> suppose you want to a form a new dictionary that has keys as the first letter of value in a list
- a.update         ==> can merge one dict into another using the update method
- a.values

In [470]:
#a.clear()                        ==> clears the dictionary
a.clear()
print(a)

#a.copy()                         ==> copies the dictionary
a = {'a': 'some_value' , 'b':[1,2,3,4,5], 'c':{'aa':1, 'bb':2} }
av = a.copy()
print(av)

#a.fromkeys()                     ==> creates new dictionary 
print(a.fromkeys(range(0,9), 5)) #a.fromkeys(iterable, value= None, /)

#a.get()                          ==> if key is in some dictioanry return the same value else return default 
print(a.get('c', 5))
#.get() can also be replaced by a['b']
print(a['b'])

#a.items(), a.keys() , a.values()==> returns all elements, keys , values 
print(a.items())
print(a.keys())
print(a.values())

#a.popitem()                      ==> pops out or deletes last element 
print(a.popitem())
print(a)

#a.pop()                          ==> pops out or delets an element with the specidied key 
a.pop('b')
print(a)

#a.update()                       ==> can merge one dict into another using the update method
a.update({'b':'foo', 'c':7})
print(a)

{}
{'a': 'some_value', 'b': [1, 2, 3, 4, 5], 'c': {'aa': 1, 'bb': 2}}
{0: 5, 1: 5, 2: 5, 3: 5, 4: 5, 5: 5, 6: 5, 7: 5, 8: 5}
{'aa': 1, 'bb': 2}
[1, 2, 3, 4, 5]
dict_items([('a', 'some_value'), ('b', [1, 2, 3, 4, 5]), ('c', {'aa': 1, 'bb': 2})])
dict_keys(['a', 'b', 'c'])
dict_values(['some_value', [1, 2, 3, 4, 5], {'aa': 1, 'bb': 2}])
('c', {'aa': 1, 'bb': 2})
{'a': 'some_value', 'b': [1, 2, 3, 4, 5]}
{'a': 'some_value'}
{'a': 'some_value', 'b': 'foo', 'c': 7}


In [471]:
#creating dict form tuples 
dict(zip(range(0,9) , reversed(range(0,9))))
#zip creates tuples, 

{0: 8, 1: 7, 2: 6, 3: 5, 4: 4, 5: 3, 6: 2, 7: 1, 8: 0}

In [472]:
a = {'a': 'some_value' , 'b':[1,2,3,4,5], 'c':{'aa':1, 'bb':2} }

#a.get()                        ==> if key is in some dictioanry return the same value else return default 
print(a.get('a', 5))

#a.setdefault()                 ==> suppose you want to a form a new dictionary that has keys as the first letter of value in a list
a1 = ['avinash', 'anukriti', 'sandeep', 'rupali' , 'chaitanya', 'mitul', 'meenal']
a1_intialbaseddict={}
for word in a1:
    letter = word[0]
    a1_intialbaseddict.setdefault(letter , []).append(word)
print(a1_intialbaseddict)


#another way of doing the same thing
from collections import defaultdict
a1_intialbaseddict2 = defaultdict(list)
for word in a1:    
    a1_intialbaseddict2[word[0]].append(word)
print(a1_intialbaseddict2)

some_value
{'a': ['avinash', 'anukriti'], 's': ['sandeep'], 'r': ['rupali'], 'c': ['chaitanya'], 'm': ['mitul', 'meenal']}
defaultdict(<class 'list'>, {'a': ['avinash', 'anukriti'], 's': ['sandeep'], 'r': ['rupali'], 'c': ['chaitanya'], 'm': ['mitul', 'meenal']})


### Valid dict key types:
- val = anytype 
- key = immutable objects 
- While the values of a dict can be any Python object, the keys generally        have to be immutable objects like scalar types (int, float, string) or        tuples (all the objects in the tuple need to be immutable, too). The        technical term here is hashability. You can        check whether an object is hashable (can be used as a key in a dict)        with the hash        function:

In [473]:
hash((1, 2, (2, 3)))

-9209053662355515447

In [474]:
hash('string')

-8114448630589539248

In [475]:
hash((1, 2, [2, 3]))

TypeError: unhashable type: 'list'

## d) set 
- A set is an unordered collection of unique elements. 
- You can think of them like dicts, but keys only, no values. A set can be created in two ways: 
        - via the set function or a set literal 
        - via curly braces
- set supports operations like union, intersection, difference, and symmetric difference.

In [476]:
{1,2,3,4,5,6} 
type({1,2,3,4,5,6})

set

In [477]:
set ([1,2,2,1,1,3,4])

{1, 2, 3, 4}

In [478]:
a = {1,2,3,4,5,6}
b= {7,8,9,10,11,12}

#union
print(a.union(b))
print(a | b )

#intersection 
print(a.intersection(b))
print(a & b )

{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}
{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}
set()
set()



- a.add(x) - Add element x to              the set a 
- a.clear() - Reset the set a to an empty state,              discarding all of its elements
- a.remove(x) - Remove element x from              the set a
- a.pop() - Remove an arbitrary element from the set a, raising KeyError if the              set is empty
- a.union(b) - a | b - All of the unique elements in a and b
- a.update(b) - a |= b - Set the contents of a to be the union              of the elements in a and  b 
- a.intersection(b) - a & b - All of the elements in both  a and b
- a.intersection_update(b) - a &= b - Set the contents of a to be the              intersection of the elements in a and              b
- a.difference(b) - a - b - The elements in a              that are not in b
- a.difference_update(b) - a -= b - Set a to the elements in a that are not in b
- a.symmetric_difference(b) - a ^ b - All of the elements in either a or b but not              both
- a.symmetric_difference_update(b) - a ^= b - Set a to contain the elements in either a or b but not              both
- a.issubset(b) - <= - True if the elements of a              are all contained in b
- a.issuperset(b)- >= - True if the elements of b              are all contained in a
- a.isdisjoint(b) - True if a and b have no elements in common

In [479]:
a

{1, 2, 3, 4, 5, 6}

In [480]:
a.add(7)
a

{1, 2, 3, 4, 5, 6, 7}

In [481]:
a.clear()
a

set()

In [482]:
a = {1,2,3,4,5,6}
a.remove(2)
a

{1, 3, 4, 5, 6}

In [483]:
a.pop() #raises error if set is empty (DIFFEREENCE BETWEEN POP AND REMOVE)
a

{3, 4, 5, 6}

In [484]:
a = {1,2,3,4,5,6}
b= {6,7,8,9,10,11,12}
print("union:" , a.union(b))
print("union:" , a | b)

print("intersection:" , a.intersection(b))
print("intersection:" , a & b)

a.update(b)
print("union and update:",a)
a |= b
print("union and update:", a)



union: {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}
union: {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}
intersection: {6}
intersection: {6}
union and update: {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}
union and update: {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}


In [485]:
a = {1,2,3,4,5,6}
b= {6,7,8,9,10,11,12}
a.intersection_update(b)
print("intersection and update:", a)
a &= b
print("intersection and update:", a)

intersection and update: {6}
intersection and update: {6}


In [486]:
a = {1,2,3,4,5,6}
b= {6,7,8,9,10,11,12}
a.difference(b) # remove common and display unique of a 

{1, 2, 3, 4, 5}

In [487]:
a = {1,2,3,4,5,6}
b= {6,7,8,9,10,11,12}
a.symmetric_difference(b) # unique elements from a and b 

{1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12}

In [488]:
a = {1,2,3}
b= {6,1,2,3,7,8,9,10,11,12}
a.issubset(b) # all a are in b 

True

In [489]:
a.issuperset(b) # all b are in a 

False

In [490]:
a.isdisjoint(b) #no elements in common

False

## List, Set, and Dict Comprehensions

- List comprehensions are one of the most-loved Python language features. 
- They allow      you to concisely form a new list by filtering the elements of a      collection, transforming the elements passing the filter in one concise      expression.
        - [expr for val in collection if condition]
        - dict_comp = {key-expr : value-expr for value in collection             if condition}
        - set_comp = {expr for value in collection if condition}
  

In [491]:
#This is equivalent to the following for loop:
result = []
for val in collection:
    if condition: 
        result.append(expr)

NameError: name 'condition' is not defined

In [492]:
#find elements with len > 2 and convert them in uppercase using list comprehension :
strings = ['a', 'as', 'bat', 'car', 'dove', 'python']

[ x.upper() for x in strings if len(x) > 2]

['BAT', 'CAR', 'DOVE', 'PYTHON']

In [493]:
#create a set containing just the lengths of the strings using set comprehension:
{len(x) for x in strings}

{1, 2, 3, 4, 6}

In [494]:
#the above operation can also be achieved by 
set(map(len, strings))

{1, 2, 3, 4, 6}

In [495]:
#dict comprehension
{len(val): val for index, val in enumerate (strings)}

{1: 'a', 2: 'as', 3: 'car', 4: 'dove', 6: 'python'}

## Nested List Comprehension 
- Keep in mind that the order of the for expressions would be the same if you        wrote a nested for loop instead of        a list comprehension

In [496]:
all_data = [['John', 'Emily', 'Michael', 'Mary', 'Steven'],
            ['Maria', 'Juan', 'Javier', 'Natalia', 'Pilar']]

#display only name with 2 e's 
# usual way 
final = []
for names in all_data:
    enough_es = [name for name in names if name.count('e') >=2]
    final.extend(enough_es)
print (final)

#nested list comprehension 
print([name for names in all_data for name in names if name.count('e') >=2])

['Steven']
['Steven']


In [497]:
# “flatten” a list of tuples of integers into a simple list of integers
some_tuples = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]
[j for i in some_tuples for j in i] 

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [498]:
#list comprehension inside a list comprehension - This produces a list of lists, rather than a flattened list of all of the inner elements
[[j for j in i]for i in some_tuples]

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

# 3.2 Functions 
- important method of coding 
- improves readbility and organisation 
- no need to rewrite similar set of code instead create function 
- created using def and vlaues are returned using return statement 
        - def function_name (par1, par 2):
             - #do something here 
             - return final_result 
- paramters can be combination of positional arguments and keyword arguments(i.e having a default value)
- global and local viables or namespace - After the function is finished, the local namespace is destroyed 

#### Concept of local and global variable 
- local = specifed insde a function and distroyed once the function has done executing
- global = specified outside the function or inside with GLOBAL keyword

NOTE: discourage use of the global keyword. Typically global variables        are used to store some kind of state in a system. If you find yourself        using a lot of them, it may indicate a need for object-oriented        programming (using classes).

In [499]:
# concept of local and global variable 

def func():
    aa1 = [] #-- a is local 
    for i in range(5):
        aa1.append(i)
    
func()

In [500]:
aa1 #local objects are distroyed Hence the error 

[0, 1, 2, 3, 4]

In [501]:
aa1 = [] #--> a is global  
def func():
    for i in range(5):
        aa1.append(i)
        
func()

In [502]:
aa1 

[0, 1, 2, 3, 4]

In [503]:
def func():
    global a2  #--> defined inside but global 
    a2 = []
    for i in range(5):
        a2.append(i)
func()

In [504]:
a2

[0, 1, 2, 3, 4]

#### Returning multiple values:

In [505]:
def f():    
    a = 5    
    b = 6    
    c = 7    
    return a, b, c

a, b, c = f()
print(a,b,c)

5 6 7


## functions are objects 

In [506]:
states = ['   Alabama ', 'Georgia!', 'Georgia', 'georgia', 'FlOrIda','south   carolina##', 'West virginia?']

In [507]:
import re 

def clean_string(strings):
    result = []
    for value in strings:        
        value = value.strip()                   #stripping whitespace
        value = re.sub('[!#?]', '', value)      #removing punctuation symbols  
        value = value.title()                   #standardizing on proper capitalization
        result.append(value)    
    return result

clean_string(states)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South   Carolina',
 'West Virginia']

In [508]:
x = '   Alabama '
x.strip()

y = 'georgia!'
val = re.sub('[!]', '', y)
final =val.title()
final 

'Georgia'

In [509]:
def remove_punctuation(value):
    return re.sub('[!#?]','',value)

ops = [str.strip, remove_punctuation, str.title]

def clean_string(strings, ops1):
    result = [] 
    for value in strings:
        for function in ops1:
            value = function(value)
        result.append(value)
    return result

clean_string(states, ops)  



['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South   Carolina',
 'West Virginia']

## Anonymous (Lambda) Functions
- Python has support for so-called anonymous or      lambda functions, which are a way of writing      functions consisting of a single statemen

- NOTE : One reason lambda functions are called anonymous functions is        that , unlike functions declared with the def keyword, the function object        itself is never given an explicit __name__        attribute.

In [510]:
def short_function(x):    
    return x * 2

short_function(2)

4

In [511]:
equiv_anon = lambda x : x*2  #converting easy fucntion on the fly 
equiv_anon(2)

4

In [512]:
def apply_to_list(some_list, f):    
    return [f(x) for x in some_list]

ints = [4, 0, 1, 5, 6]
apply_to_list(ints, lambda x: x * 2) #passing lambda function to a fucntion 

[8, 0, 2, 10, 12]

#### Currying: Partial Argument Application
- Currying is computer science jargon (named after the mathematician Haskell      Curry) that means deriving new functions from existing ones by      partial argument application

In [513]:
def add_numbers(x, y):    
    return x + y

In [514]:
add_five = lambda y: add_numbers(5,y) #calling add_numbers fucntion using lambda function 

In [515]:
add_five(6)

11

In [516]:
add_numbers(2,3)

5

## Generator 
- Having a consistent way to iterate over sequences, like objects in a list or      lines in a file, is an important Python feature. This is accomplished by      means of the iterator protocol, a generic way      to make objects iterable. For example, iterating over a dict yields the      dict keys:

- A generator is a concise way to construct a      new iterable object. Whereas normal functions execute and return a      single result at a time, generators return a sequence of multiple      results lazily, pausing after each one until the next one is requested.      To create a generator, use the yield keyword      instead of return in a      function:

In [517]:
# iterating over dictionary 
some_dict = {'a': 1, 'b': 2, 'c': 3}

for key in some_dict:
    print(key)

a
b
c


- Yield - generators return a sequence of multiple results lazily, pausing after each one until the next one is requested
- return - return one or more final value only once 
- print - prints wherever it is called 

In [518]:
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++yeild
def squares(n=10):    
    print('Generating squares from 1 to {0}'.format(n ** 2))    
    for i in range(1, n + 1):        
        yield i ** 2

In [519]:
gen = squares()
gen

<generator object squares at 0x7fdc669dd820>

In [520]:
for x in gen:
    print(x, end = ' ')

Generating squares from 1 to 100
1 4 9 16 25 36 49 64 81 100 

In [521]:
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++return 
def squares1(n=10):    
    print('Generating squares from 1 to {0}'.format(n ** 2))    
    for i in range(1, n + 1):        
        return i ** 2
gen1 = squares1()
gen1

Generating squares from 1 to 100


1

In [522]:
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++print
def squares1(n=10):    
    print('Generating squares from 1 to {0}'.format(n ** 2))    
    for i in range(1, n + 1):        
        print(i ** 2)
gen1 = squares1()
gen1

Generating squares from 1 to 100
1
4
9
16
25
36
49
64
81
100


### generator expressions 
- concise way of wrting generator expression 
- This is a generator        analogue to list, dict, and set comprehensions;

In [523]:
#appraoch - 1 line using generator expression pay attention of ()
gen = (i ** 2 for i in range (10))

#verbose similar function  - 4 lines 
def _make_gen():    
    for x in range(100):        
        yield x ** 2
gen = _make_gen()

print(gen)
for x in gen: 
    print(x)

<generator object _make_gen at 0x7fdc66953890>
0
1
4
9
16
25
36
49
64
81
100
121
144
169
196
225
256
289
324
361
400
441
484
529
576
625
676
729
784
841
900
961
1024
1089
1156
1225
1296
1369
1444
1521
1600
1681
1764
1849
1936
2025
2116
2209
2304
2401
2500
2601
2704
2809
2916
3025
3136
3249
3364
3481
3600
3721
3844
3969
4096
4225
4356
4489
4624
4761
4900
5041
5184
5329
5476
5625
5776
5929
6084
6241
6400
6561
6724
6889
7056
7225
7396
7569
7744
7921
8100
8281
8464
8649
8836
9025
9216
9409
9604
9801


In [524]:
sum(x ** 2 for x in range(100))

328350

In [525]:
dict((i, i **2) for i in range(100))

{0: 0,
 1: 1,
 2: 4,
 3: 9,
 4: 16,
 5: 25,
 6: 36,
 7: 49,
 8: 64,
 9: 81,
 10: 100,
 11: 121,
 12: 144,
 13: 169,
 14: 196,
 15: 225,
 16: 256,
 17: 289,
 18: 324,
 19: 361,
 20: 400,
 21: 441,
 22: 484,
 23: 529,
 24: 576,
 25: 625,
 26: 676,
 27: 729,
 28: 784,
 29: 841,
 30: 900,
 31: 961,
 32: 1024,
 33: 1089,
 34: 1156,
 35: 1225,
 36: 1296,
 37: 1369,
 38: 1444,
 39: 1521,
 40: 1600,
 41: 1681,
 42: 1764,
 43: 1849,
 44: 1936,
 45: 2025,
 46: 2116,
 47: 2209,
 48: 2304,
 49: 2401,
 50: 2500,
 51: 2601,
 52: 2704,
 53: 2809,
 54: 2916,
 55: 3025,
 56: 3136,
 57: 3249,
 58: 3364,
 59: 3481,
 60: 3600,
 61: 3721,
 62: 3844,
 63: 3969,
 64: 4096,
 65: 4225,
 66: 4356,
 67: 4489,
 68: 4624,
 69: 4761,
 70: 4900,
 71: 5041,
 72: 5184,
 73: 5329,
 74: 5476,
 75: 5625,
 76: 5776,
 77: 5929,
 78: 6084,
 79: 6241,
 80: 6400,
 81: 6561,
 82: 6724,
 83: 6889,
 84: 7056,
 85: 7225,
 86: 7396,
 87: 7569,
 88: 7744,
 89: 7921,
 90: 8100,
 91: 8281,
 92: 8464,
 93: 8649,
 94: 8836,
 95: 9025,


### itertools module 
- The standard library itertools module has a collection of generators for many common data        algorithms. For example, groupby        takes any sequence and a function, grouping consecutive elements in        the sequence by return value of the function.

In [526]:
import itertools 

In [527]:
first_letter = lambda x : x[0]
names = ['Alan', 'Adam', 'Wes', 'Will', 'Albert', 'Steven']

for l, n in itertools.groupby(names, first_letter):
    print(l, list(n)) #grouping consecutive elements in the sequence

A ['Alan', 'Adam']
W ['Wes', 'Will']
A ['Albert']
S ['Steven']


- 1) combinations(iterable, k) __=__ Generates a sequence of all possible k-tuples of elements in the                iterable, ignoring order and without replacement (see also the                companion function combinations_with_replacement)
- 2) permutations(iterable, k) __=__ Generates a sequence of all possible k-tuples of elements in the                iterable, respecting order
- 3) groupby(iterable[, keyfunc]) __=__ Generates (key, sub-iterator) for each unique key
- 4) product(*iterables, repeat=1)   __=__ Generates the Cartesian product of the input iterables as                tuples, similar to a nested for loop

In [528]:
itertools.groupby(names, first_letter)

<itertools.groupby at 0x7fdc65fda770>

## Errors and Exception Handling
- In data analysis      applications, many functions only work on certain kinds of input. As an      example, Python’s float function is      capable of casting a string to a floating-point number, but fails with ValueError      on improper inputs:

In [529]:
float ('123')

123.0

In [530]:
float('something')

ValueError: could not convert string to float: 'something'

In [531]:
#Suppose we wanted a version of float that fails gracefully, returning the input argument.

def float1 (x):
    try: 
        return float(x)
    except:  # suppress all types of errors
        return x 
float1('something')

'something'

In [532]:
float1 ((1,2))

(1, 2)

In [533]:
#You might want to only suppress ValueError, since a TypeError (the input was not a string or numeric value) 
#might indicate a legitimate bug in your program.
def attempt_float(x):
    try:
        return float(x)
    except ValueError: #suppressing value error only throw typeError 
        return x
    
attempt_float('something')

'something'

In [534]:
attempt_float((1,2)) #now it will only throw Typeerror 

TypeError: float() argument must be a string or a number, not 'tuple'

In [535]:
def attempt_float(x):
    try:
        return float(x)
    except (TypeError, ValueError): # handling multiple type of errors 
        return x
attempt_float('something')

'something'

In [536]:
attempt_float((1,2))

(1, 2)

In some cases, you may not want to suppress an exception, but you want some code to be executed regardless of whether the code in the try block succeeds or not. To do this, use finally:

In [537]:
f = open(path, 'w')

try:
    write_to_file(f)
finally:
    f.close()

NameError: name 'write_to_file' is not defined

In [538]:
#Here, the file handle f will always get closed. Similarly, you can have code that executes only if the try: block succeeds using else:
f = open(path, 'w')

try:
    write_to_file(f)
except:
    print('Failed')
else:
    print('Succeeded')
finally:
    f.close()

Failed


# 3.3 Files and the Operating System

In [539]:
path = '/Users/avinashtripathi/Desktop/segismundo.txt'

In [540]:
#To open a file for reading or writing, use the built-in open function
f = open(path)

In [541]:
for line in f:
    print(line)

In [542]:
[x.rstrip() for x in open(path)]

[]

In [543]:
#When you use open to create file objects, it is important to explicitly close the file 
with open(path) as f:
    lines = [x.rstrip() for x in f]
#with - This will automatically close the file f when exiting the with block.    

In [544]:
#read returns a certain number of characters from the file
f = open(path, 'w')

In [545]:
f.write('my name is avinash')
f.close()

In [546]:
f = open(path)

In [547]:
f.read()

'my name is avinash'

In [548]:
for line in f:
    print(line)

In [549]:
#tell gives you the current position:
f.tell()

18

In [550]:
#if you want to check the default encoding - to understand how the charcters are taken 
import sys
sys.getdefaultencoding()

'utf-8'

In [551]:
f.seek(4) #- will set your pointer to a position 

4

In [552]:
f.read(6) #will read the number of charcters 

'ame is'

In [553]:
with open(path, 'w') as f:
    f.write("Sueña el rico en su riqueza,\nque más cuidados le ofrece;\n\nsueña el pobre que padece\nsu miseria y su pobreza;\n\nsueña el que a medrar empieza,\nsueña el que afana y pretende,\nsueña el que agravia y ofende,\n\ny en el mundo, en conclusión,\ntodos sueñan lo que son,\naunque ninguno lo entiende.\n")
    
f = open(path)
for lines in f: 
    print(lines)
    
[l.rstrip() for l in open(path)]

Sueña el rico en su riqueza,

que más cuidados le ofrece;



sueña el pobre que padece

su miseria y su pobreza;



sueña el que a medrar empieza,

sueña el que afana y pretende,

sueña el que agravia y ofende,



y en el mundo, en conclusión,

todos sueñan lo que son,

aunque ninguno lo entiende.



['Sueña el rico en su riqueza,',
 'que más cuidados le ofrece;',
 '',
 'sueña el pobre que padece',
 'su miseria y su pobreza;',
 '',
 'sueña el que a medrar empieza,',
 'sueña el que afana y pretende,',
 'sueña el que agravia y ofende,',
 '',
 'y en el mundo, en conclusión,',
 'todos sueñan lo que son,',
 'aunque ninguno lo entiende.']

In [554]:
#Beware using seek when opening files in any mode other than binary. If the file position falls in the middle 
#of the bytes defining a Unicode character, then subsequent reads will result in an error

#encoding option in open command will convert the encoding of file from one to another file. 