# Data Structures,Function and Files from "Python for Data Analysis" Wes Mckinney

***Tuples***

In [1]:
tup = 4,5,6

In [2]:
tup

(4, 5, 6)

In [4]:
nested_tup = (4,5,6),(7,8)

In [5]:
nested_tup

((4, 5, 6), (7, 8))

In [6]:
tuple([4,5,6])

(4, 5, 6)

In [7]:
tup = tuple('string')

In [8]:
tup

('s', 't', 'r', 'i', 'n', 'g')

In [9]:
tup[0]

's'

### While objects stored in "Tuple" may be mutable themselves ,once the tuple is created it's not possible to modify which object is stored in each slot.

In [10]:
tup = tuple(['foo',[1,2],True])

In [11]:
tup[2] = False

TypeError: 'tuple' object does not support item assignment

If object inside a tuple is mutable ,such as a list, you can modify it in-place.

In [12]:
tup[1].append(3)

In [14]:
tup

('foo', [1, 2, 3], True)

In [15]:
(4,None,'foo') + (6,0) + ('bar',)

(4, None, 'foo', 6, 0, 'bar')

In [16]:
('foo','bar') * 4

('foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar')

# Unpacking tuples

If we try to assign to a tuple like expression of variables,Python will attempt to unpack the value on the righthand side of the equals sign

In [17]:
tup = (4,5,6)

In [18]:
a,b,c = tup

In [19]:
tup

(4, 5, 6)

In [20]:
b

5

Even sequences with nested tuples can be unpacked :

In [21]:
tup = 4,5,(6,7)

In [22]:
a,b,(c,d) = tup

In [23]:
d

7

In [24]:
tmp = a

In [25]:
a = b

In [26]:
b= tmp

In [27]:
a,b = 1,2

In [28]:
a

1

In [29]:
b

2

**In python ,the swaps can done like this**

In [30]:
b,a = a,b

In [31]:
a

2

In [32]:
b

1

A common use of variable unpacking is iterating over sequences of tuples or lists:

In [33]:
seq = [(1,2,3),(4,5,6),(7,8,9)]

In [34]:
for a,b,c in seq:
    print('a={0},b={1},c={2}'.format(a,b,c))

a=1,b=2,c=3
a=4,b=5,c=6
a=7,b=8,c=9


In [35]:
values = 1,2,3,4,5

**The special syntax uses *rest which is also used in function signatures to capture an arbitarily long list of positional arguments:**

In [36]:
a,b,*rest = values

In [37]:
a,b

(1, 2)

In [38]:
rest

[3, 4, 5]

**This rest bit is sometimes something we want to discard ;there is nothing special about the rest name.As a matter of convention,many python programmers will use the underscore(_) for unwanted variables**

In [41]:
a,b, *_ = values

In [43]:
a,b

(1, 2)

# Tuple method

In [44]:
a = (1,2,2,2,3,4,2)

In [45]:
a.count(2)

4

# List

In [46]:
a_list = [2,3,7,None]

In [47]:
tup = ('foo','bar','baz')

In [48]:
b_list = list(tup)

In [49]:
b_list

['foo', 'bar', 'baz']

In [50]:
b_list[1] = 'peekaboo'

In [51]:
b_list

['foo', 'peekaboo', 'baz']

**Lists and tuples are semantically similar(though tuples cannot be modified) and can be used interchangeably in many functions**

In [52]:
gen = range(10)

In [53]:
gen

range(0, 10)

In [54]:
list(gen)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

# Adding and removing elements

In [56]:
b_list.append('dwarf')

In [57]:
b_list

['foo', 'peekaboo', 'baz', 'dwarf']

In [59]:
b_list.insert(1,'red') #insert an element at a specific location

In [61]:
b_list.pop(2) #removes and returns element at a particular index

'red'

In [62]:
b_list

['foo', 'red', 'peekaboo', 'baz', 'dwarf']

In [63]:
b_list.append('foo')

In [64]:
b_list

['foo', 'red', 'peekaboo', 'baz', 'dwarf', 'foo']

In [65]:
b_list.remove('foo')

In [66]:
b_list

['red', 'peekaboo', 'baz', 'dwarf', 'foo']

If performance is not a concern by using append and remove,you can use a python list as a perfectly suitable "multiset" data structure

In [67]:
'dwarf' in b_list

True

In [68]:
'dwarf' not in b_list

False

# Concatenating and combining lists

In [69]:
[4,None,'foo'] + [7,8,(2,3)]

[4, None, 'foo', 7, 8, (2, 3)]

In [70]:
x = [4,None,'foo']

In [71]:
x.extend([7,8,(2,3)])

In [72]:
x

[4, None, 'foo', 7, 8, (2, 3)]

In [77]:
everything = []
for chunk in list_of_lists:
    everything.extend(chunk)

NameError: name 'list_of_lists' is not defined

In [78]:
everything = []
for chunk in list_of_lists:
    everything = everything + chunk

NameError: name 'list_of_lists' is not defined

**Sorting**

In [79]:
a = [7,2,5,1,3]

In [80]:
a.sort()

In [81]:
a

[1, 2, 3, 5, 7]

In [82]:
b = ['saw','small','He','foxes','six']
b.sort(key=len)

In [83]:
b

['He', 'saw', 'six', 'small', 'foxes']

Binary Search and maintaining a sorted list

In [84]:
import bisect

In [85]:
c = [1,2,2,2,2,3,4,7]

In [86]:
bisect.bisect(c,2)

5

In [87]:
bisect.bisect(c,5)

7

In [88]:
bisect.insort(c,6)

In [89]:
c

[1, 2, 2, 2, 2, 3, 4, 6, 7]

**Slicing**

In [90]:
seq = [7,2,3,7,5,6,0,1]

In [91]:
seq[1:5]

[2, 3, 7, 5]

In [93]:
seq[3:4] = [6,3]

In [94]:
seq

[7, 2, 3, 6, 3, 5, 6, 0, 1]

In [95]:
seq[:5]

[7, 2, 3, 6, 3]

In [96]:
seq[-4:]

[5, 6, 0, 1]

In [98]:
seq[-6:-2]

[6, 3, 5, 6]

In [99]:
seq[::2]

[7, 3, 3, 6, 1]

In [101]:
seq[::-1] #reversing a list or tuple

[1, 0, 6, 5, 3, 6, 3, 2, 7]

# Built in Sequence Functions

**Enumerate**

In [102]:
i = 0
for value in collection:
    i +=1

NameError: name 'collection' is not defined

In [103]:
some_list = ['foo','bar','baz']
mapping = {}

In [104]:
for i,v in enumerate(some_list):
    mapping[v] = i

In [105]:
mapping

{'foo': 0, 'bar': 1, 'baz': 2}

In [106]:
sorted([7,1,2,6,0,3,2])

[0, 1, 2, 2, 3, 6, 7]

In [107]:
sorted('horse race')

[' ', 'a', 'c', 'e', 'e', 'h', 'o', 'r', 'r', 's']

**zip**

In [108]:
seq1 = ['foo','bar','baz']
seq2 = ['one','two','three']
zipped = zip(seq1,seq2)

In [109]:
list(zipped)

[('foo', 'one'), ('bar', 'two'), ('baz', 'three')]

In [110]:
seq3 = [False,True]

In [112]:
for i,(a,b) in enumerate(zip(seq1,seq2)):
    print('{0}: {1},{2}'.format(i,a,b))

0: foo,one
1: bar,two
2: baz,three


In [113]:
pitchers = [('Nolan','Ryan'),('Roger','Clemens'),('Curt','Schilling')]

In [114]:
first_names,last_names = zip(*pitchers)

In [115]:
first_names

('Nolan', 'Roger', 'Curt')

In [116]:
last_names

('Ryan', 'Clemens', 'Schilling')

**reversed**

reversed iterates over the elements of a sequence in reverse order

In [117]:
list(reversed(range(10)))

[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

# Dict 

**Hash Map or Associative array**
It is a flexible sized collection of key value pairs,where key and value are Python objects.

In [118]:
empty_dict = {}

In [121]:
d1 = {'a' : 'some value','b':[1,2,3,4,5]}

In [122]:
d1

{'a': 'some value', 'b': [1, 2, 3, 4, 5]}

You can access ,insert or set elements using the same syntax as for accessing elements of a list or tuple:

In [123]:
d1[7] = 'an integer'

In [124]:
d1

{'a': 'some value', 'b': [1, 2, 3, 4, 5], 7: 'an integer'}

In [125]:
d1['b']

[1, 2, 3, 4, 5]

In [126]:
'b' in d1

True

**You can delete values either using the del keyword or the pop method(which simultaneously returns the value and deletes the key)**

In [127]:
d1[5] = 'some value'

In [128]:
d1

{'a': 'some value', 'b': [1, 2, 3, 4, 5], 7: 'an integer', 5: 'some value'}

In [129]:
d1['dummy'] = 'another value'

In [130]:
d1

{'a': 'some value',
 'b': [1, 2, 3, 4, 5],
 7: 'an integer',
 5: 'some value',
 'dummy': 'another value'}

In [131]:
del d1[5]

In [132]:
d1

{'a': 'some value',
 'b': [1, 2, 3, 4, 5],
 7: 'an integer',
 'dummy': 'another value'}

In [133]:
ret = d1.pop('dummy')

In [134]:
ret

'another value'

In [135]:
d1

{'a': 'some value', 'b': [1, 2, 3, 4, 5], 7: 'an integer'}

In [136]:
list(d1.keys())

['a', 'b', 7]

In [137]:
list(d1.values())

['some value', [1, 2, 3, 4, 5], 'an integer']

In [138]:
d1.update({'b':'foo','c':12})

In [139]:
d1

{'a': 'some value', 'b': 'foo', 7: 'an integer', 'c': 12}

Creating dicts from sequences

In [140]:
mapping = {}

In [142]:
key_list = list(d1.keys())
value_list = list(d1.values())
for key,value in zip(key_list,value_list):
    mapping[key] = value

In [143]:
mapping

{'a': 'some value', 'b': 'foo', 7: 'an integer', 'c': 12}

**Default values**

In [178]:
if key in some_dict:
    value = some_dict[key]
else:
    value = default_value

NameError: name 'some_dict' is not defined

In [145]:
words = ['apple','bat' , 'bar' , 'atom' ,'book']

In [146]:
by_letter = {}

In [147]:
for word in words:
    letter = word[0]
    if letter not in by_letter:
        by_letter[letter] = [word]
    else:
        by_letter[letter].append(word)

In [148]:
by_letter

{'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']}

# Valid dict key types

In [149]:
hash('string')

329361747

In [150]:
hash((1,2,(2,3)))

1387206534

In [152]:
hash((1,2,[3,4])) #fails beacuse lists are mutable

TypeError: unhashable type: 'list'

In [153]:
d = {}

In [154]:
d[tuple([1,2,3])] = 5

In [155]:
d

{(1, 2, 3): 5}

# Set 

A set is an unordered collection of unique elements. You  can think of it like dicts,but keys only no values.A set can be created in two ways:via the set function or via set literal with curly braces:

In [156]:
set([2,2,2,1,3,3])

{1, 2, 3}

In [157]:
{2,2,2,3,4,5,5,1}

{1, 2, 3, 4, 5}

Sets support mathematical set operations like union,intersection,difference and symmetric difference. 

In [158]:
a = {1,2,3,4,5}

In [159]:
b = {3,4,5,6,7}

In [160]:
a.union(b)

{1, 2, 3, 4, 5, 6, 7}

In [161]:
a | b

{1, 2, 3, 4, 5, 6, 7}

In [162]:
a.intersection(b)

{3, 4, 5}

In [163]:
a & b

{3, 4, 5}

In [164]:
c = a.copy()

In [165]:
c

{1, 2, 3, 4, 5}

In [166]:
c |= b

In [167]:
c

{1, 2, 3, 4, 5, 6, 7}

In [168]:
d = a.copy()

In [169]:
d &=b
d

{3, 4, 5}

Like dicts ,set elements generally must be immutable.

In [170]:
my_data = [1,2,3,4,5]

In [171]:
my_set = {tuple(my_data)}

In [172]:
my_set

{(1, 2, 3, 4, 5)}

In [173]:
a_set = {1,2,3,4,5}

In [174]:
{1,2,3}.issubset({1,2,3})

True

In [175]:
{1,2,3} == {3,2,1}

True

# List,Set and Dict Comprehension

In [177]:
# [expr for val in collection if condition]

In [180]:
result = []
for val in collection:
    if condition:
        result.append(expr)

NameError: name 'collection' is not defined

In [181]:
strings = ['a','as','bat','car','drove','python']

In [182]:
[x.upper() for x in strings if len(x)>2]

['BAT', 'CAR', 'DROVE', 'PYTHON']

In [183]:
#dict_comp = {key-expr : value expr for value in collection if condition}

In [184]:
#set_comp = {expr for value in collection if condition}

In [185]:
unique_lengths = {len(x) for x in strings}

In [186]:
unique_lengths

{1, 2, 3, 5, 6}

In [187]:
set(map(len,strings))

{1, 2, 3, 5, 6}

In [188]:
loc_mapping = {val : index for index, val in enumerate(strings)}

In [189]:
loc_mapping

{'a': 0, 'as': 1, 'bat': 2, 'car': 3, 'drove': 4, 'python': 5}

**Nested List comprehensions**

In [190]:
all_data = [['John','Emily','Micheal','Mary','Steven'],
           ['Maria','Juan','Javier','Natalia','Pilar']]

In [191]:
names_of_interest = []
for names in all_data:
    enough_es = [name for name in names if name.count('e')>=2]
    names_of_interest.extend(enough_es)

In [192]:
names_of_interest

['Steven']

In [193]:
some_tuples = [(1,2,3),(4,5,6),(7,8,9)]

In [194]:
flattened = [x for tup in some_tuples for x in tup]

In [195]:
flattened

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [197]:
flattened = []
for tup in some_tuples:
    for x in tup:
        flattened.append(x)

In [198]:
flattened

[1, 2, 3, 4, 5, 6, 7, 8, 9]

We can have arbitarily many levels of nesting, though if you have more than two or three levels of nesting we should probably start to question whether this makes sense from a code readability standpoint.

In [199]:
[[x for x in tup] for tup in some_tuples]

[[1, 2, 3], [4, 5, 6], [7, 8, 9]]