# Data Structures

## Lists

In [1]:
empty_list = list()
empty_list = []

In [2]:
len(empty_list)

0

In [4]:
empty_list[0]

IndexError: list index out of range

In [5]:
squares = [1, 4 ,9 ,25, 36]

In [6]:
squares

[1, 4, 9, 25, 36]

In [7]:
squares[0]

1

In [8]:
squares[-1]

36

In [9]:
squares[1:3]

[4, 9]

In [10]:
squares[:2]

[1, 4]

In [11]:
squares[2:]

[9, 25, 36]

In [12]:
squares[:]

[1, 4, 9, 25, 36]

### Lists of lists

In [13]:
cubes = [1, 8, 27, 64, 125]

In [14]:
powers = [squares, cubes]

In [15]:
powers

[[1, 4, 9, 25, 36], [1, 8, 27, 64, 125]]

In [16]:
powers[0][2]

9

### Shallow vs. deep copying

In [17]:
copy_of_squares = squares

In [18]:
copy_of_squares.append(49)

In [19]:
copy_of_squares

[1, 4, 9, 25, 36, 49]

In [20]:
squares

[1, 4, 9, 25, 36, 49]

In [21]:
deep_copy_of_squares = squares.copy()

In [22]:
deep_copy_of_squares

[1, 4, 9, 25, 36, 49]

In [23]:
deep_copy_of_squares.append(64)

In [24]:
deep_copy_of_squares

[1, 4, 9, 25, 36, 49, 64]

In [25]:
squares

[1, 4, 9, 25, 36, 49]

### Append vs. extend

In [26]:
squares.append([64, 81])

In [27]:
squares

[1, 4, 9, 25, 36, 49, [64, 81]]

In [28]:
del squares[-1]

In [29]:
squares.extend([64, 81])

In [30]:
squares

[1, 4, 9, 25, 36, 49, 64, 81]

### List comprehensions

In [31]:
squares = []
for x in range(10):
    squares.append(x**2)

In [32]:
squares = list(map(lambda x: x**2, range(10)))

In [33]:
squares = [x**2 for x in range(10)]

## Tuples

Similar to lists, but immutable [WHY IS THIS IMPORTANT?]

In [34]:
latitude = 42.314081
longitude = 83.036857

### Tuple packing

In [35]:
coord = (latitude, longitude)

In [36]:
coord

(42.314081, 83.036857)

### Sequence unpacking

In [37]:
coord_lat, coord_lon = coord

### Special cases: 1-tuples and 0-tuples

In [38]:
empty_tuple = ()

In [39]:
empty_tuple


()

In [40]:
one_tuple = "mads",  # note trailing comma

In [41]:
one_tuple

('mads',)

## Sets

 A set is an unordered collection with no duplicate elements. 

In [42]:
animals = {'dog', 'cat', 'horse', 'dog'}

In [43]:
animals

{'cat', 'dog', 'horse'}

In [44]:
'dog' in animals

True

In [45]:
'turtle' in animals

False

In [46]:
letters = set('antidisestablishmentarianism') # Longest non-contrived and nontechnical English word

In [47]:
letters

{'a', 'b', 'd', 'e', 'h', 'i', 'l', 'm', 'n', 'r', 's', 't'}

In [48]:
len(letters)

12

## Dictionaries

Indexed by keys, which are any immutable data type (i.e. not lists or sets).  Tuples are legitimate keys as long as they don't contain any mutable types.  Strings are commonly used as keys.

In [49]:
city_population = {'Chongqing': 30165500, 
          'Shanghai': 24183300, 
          'Beijing': 21707000, 
          'Istanbul': 15029231, 
          'Karachi': 14910352,
          'Dhaka': 14399000, 
          'Tokyo': 13515271, 
          'Moscow': 13200000}

In [50]:
city_population

{'Chongqing': 30165500,
 'Shanghai': 24183300,
 'Beijing': 21707000,
 'Istanbul': 15029231,
 'Karachi': 14910352,
 'Dhaka': 14399000,
 'Tokyo': 13515271,
 'Moscow': 13200000}

In [51]:
city_population['Dhaka']

14399000

In [52]:
'Beijing' in city_population

True

In [53]:
'Ann Arbor' in city_population

False

In [54]:
city_population['Ann Arbor'] = 113934

In [55]:
'Ann Arbor' in city_population

True

In [56]:
for city, population in city_population.items():
    print('The population of {0} is {1}.'.format(city, population))

The population of Chongqing is 30165500.
The population of Shanghai is 24183300.
The population of Beijing is 21707000.
The population of Istanbul is 15029231.
The population of Karachi is 14910352.
The population of Dhaka is 14399000.
The population of Tokyo is 13515271.
The population of Moscow is 13200000.
The population of Ann Arbor is 113934.


## collections
* Counter
* defaultdict
* deque
* namedtuple


### Counter

In [57]:
from collections import Counter

In [58]:
c_empty = Counter()                           # a new, empty counter
c_longest_word = Counter('antidisestablishmentarianism')                 # a new counter from an iterable
c_colors = Counter({'red': 4, 'blue': 2})      # a new counter from a mapping
c_animals = Counter(cats=4, dogs=8) 

In [59]:
c_longest_word

Counter({'i': 5,
         'a': 4,
         's': 4,
         'n': 3,
         't': 3,
         'e': 2,
         'm': 2,
         'd': 1,
         'b': 1,
         'l': 1,
         'h': 1,
         'r': 1})

In [60]:
c_longest_word.most_common(1)

[('i', 5)]

In [61]:
c_longest_word.popitem()

('r', 1)

In [62]:
c_longest_word

Counter({'i': 5,
         'a': 4,
         's': 4,
         'n': 3,
         't': 3,
         'e': 2,
         'm': 2,
         'd': 1,
         'b': 1,
         'l': 1,
         'h': 1})

In [63]:
counts = {'u':1,'i':4,'u':8}

In [64]:
Counter(counts).most_common(1)

[('u', 8)]

In [65]:
things_to_count=['a','a','a','a','b','b','b','b','b']

In [66]:
Counter(things_to_count).most_common(1)

[('b', 5)]

### defaultdict

In [67]:
fruit_totals = {}
fruit_counts = [('apple', 3),('pear', 2), ('orange', 1), ('apple', 1)]

for fruit,count in fruit_counts:
    if fruit in fruit_totals:
        fruit_totals[fruit] = fruit_totals[fruit] + count
    else:
        fruit_totals[fruit] = count
print(fruit_totals)

{'apple': 4, 'pear': 2, 'orange': 1}


In [68]:
from collections import defaultdict
fruit_totals = defaultdict(int)
for fruit,count in fruit_counts:
    fruit_totals[fruit] = fruit_totals[fruit] + count
print(fruit_totals)

defaultdict(<class 'int'>, {'apple': 4, 'pear': 2, 'orange': 1})


In [69]:
Counter(fruit_totals).most_common(1)

[('apple', 4)]

In [70]:
from collections import defaultdict
fruit_totals = defaultdict(list)
for fruit,count in fruit_counts:
    fruit_totals[fruit].append(count)
print(fruit_totals)

defaultdict(<class 'list'>, {'apple': [3, 1], 'pear': [2], 'orange': [1]})


### deque

In [71]:
from collections import deque
import itertools # beyond the scope of this course -- 
                 # we are using itertools.islice() to return a slice of an iterator, 
                 # which is like a slice of a list but more memory-efficient

In [72]:
def moving_average(iterable, n=3):
    # moving_average([40, 30, 50, 46, 39, 44]) --> 40.0 42.0 45.0 43.0
    # http://en.wikipedia.org/wiki/Moving_average
    it = iter(iterable)
    d = deque(itertools.islice(it, n-1))
    d.appendleft(0)
    s = sum(d)
    for elem in it:
        s += elem - d.popleft()
        d.append(elem)
        yield s / n

In [73]:
for i in moving_average([40, 30, 50, 46, 39, 44]):
    print(i)

40.0
42.0
45.0
43.0


### namedtuple

In [74]:
from collections import namedtuple

In [75]:
Coord = namedtuple('Coord', ['latitude','longitude'])

In [76]:
c = Coord(latitude = 42.314081, longitude = 83.036857)

In [77]:
c[1]

83.036857

In [78]:
lat, lon = c

In [79]:
c.latitude

42.314081

In [80]:
c

Coord(latitude=42.314081, longitude=83.036857)

In [81]:
d = {'latitude': 42.314081, 'longitude': '83.036857'}

In [82]:
c = Coord(**d)

In [83]:
c

Coord(latitude=42.314081, longitude='83.036857')