# Data Strutures

## 1.1 Unpacking a sequence into separate variables

In [1]:
data = [ 'ACME', 50, 91.1, (2012, 12, 21) ]
name, shares, price, date = data

In [2]:
date

(2012, 12, 21)

In [3]:
_, shares, price, _ = data

## 1.2 Unpacking elements from iterables of arbitary length

In [4]:
def drop_first_last(grades):
    first, *middle, last = grades
    return avg(middle)

In [5]:
record = ('Dave', 'dave@example.com', '773-555-1212', '847-555-1212')
name, email, *phone_numbers = record

In [6]:
record = ('ACME', 50, 123.45, (12, 18, 2012))
name, *_, (*_, year) = record
year

2012

## 1.3 Keeping the last N items

In [7]:
from collections import deque

def search(lines, pattern, history=5):
    previous_lines = deque(maxlen=history)
    for line in lines:
        if pattern in line:
            yield line, previous_lines
        previous_lines.append(line)

## 1.4 Finding the largest or smallest N items

In [8]:
import heapq

nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3, nums))  # Prints [42, 37, 23]
print(heapq.nsmallest(3, nums)) # Prints [-4, 1, 2]

[42, 37, 23]
[-4, 1, 2]


In [9]:
portfolio = [
   {'name': 'IBM', 'shares': 100, 'price': 91.1},
   {'name': 'AAPL', 'shares': 50, 'price': 543.22},
   {'name': 'FB', 'shares': 200, 'price': 21.09},
   {'name': 'HPQ', 'shares': 35, 'price': 31.75},
   {'name': 'YHOO', 'shares': 45, 'price': 16.35},
   {'name': 'ACME', 'shares': 75, 'price': 115.65}
]

cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price'])
expensive = heapq.nlargest(3, portfolio, key=lambda s: s['price'])

In [12]:
heap = list(nums)
heapq.heapify(nums)

In [13]:
heap

[-4, 2, 1, 23, 7, 2, 18, 23, 42, 37, 8]

## 1.5 Implementing a priority queue

## 1.6 Mapping keys to multiple values in dictionary

In [14]:
from collections import defaultdict

d = defaultdict(list)
d['a'].append(1)
d['a'].append(2)
d['b'].append(4)
...

d = defaultdict(set)
d['a'].add(1)

In [17]:
pairs = [('a',1),('b',3)]
d = defaultdict(list)
for key, value in pairs:
    d[key].append(value)

In [19]:
d['a']

[1]

## 1.7 Keeping dictionaries in order

In [20]:
from collections import OrderedDict

d = OrderedDict()
d['foo'] = 1
d['bar'] = 2
d['spam'] = 3

## 1.8 Calculating with dictionaries

In [21]:
prices = {
   'ACME': 45.23,
   'AAPL': 612.78,
   'IBM': 205.55,
   'HPQ': 37.20,
   'FB': 10.75
}

In [22]:
min_price = min(zip(prices.values(), prices.keys()))
# min_price is (10.75, 'FB')

max_price = max(zip(prices.values(), prices.keys()))
# max_price is (612.78, 'AAPL')

## 1.9 Finding commonalities in two dictionaries

In [24]:
a = {
   'x' : 1,
   'y' : 2,
   'z' : 3
}

b = {
   'w' : 10,
   'x' : 11,
   'y' : 2
}

# Find keys in common
a.keys() & b.keys()   # { 'x', 'y' }

# Find keys in a that are not in b
a.keys() - b.keys()   # { 'z' }

# Find (key,value) pairs in common
a.items() & b.items() # { ('y', 2) }

{('y', 2)}

In [25]:
c = {key:a[key] for key in a.keys() - {'z', 'w'}}

## 1.10 Removing duplicates from a sequence while maintaining order

In [26]:
def dedupe(items):
    seen = set()
    for item in items:
        if item not in seen:
            yield item
            seen.add(item)

In [27]:
a = [1, 5, 2, 1, 9, 1, 5, 10]
list(dedupe(a))

[1, 5, 2, 9, 10]

In [28]:
def dedupe(items, key=None):
    seen = set()
    for item in items:
        val = item if key is None else key(item)
        if val not in seen:
            yield item
            seen.add(val)

In [29]:
a = [ {'x':1, 'y':2}, {'x':1, 'y':3}, {'x':1, 'y':2}, {'x':2, 'y':4}]
list(dedupe(a, key=lambda d: (d['x'],d['y'])))

[{'x': 1, 'y': 2}, {'x': 1, 'y': 3}, {'x': 2, 'y': 4}]

## 1.11 Naming a slice

In [30]:
items = [0, 1, 2, 3, 4, 5, 6]
a = slice(2, 4)
items[a]

[2, 3]

In [31]:
s = "HelloWorld"
a.indices(len(s))

(2, 4, 1)

## 1.12 Determining the most frequently occurring items in sequence

In [32]:
from collections import Counter

In [33]:
words = [
   'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
   'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
   'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
   'my', 'eyes', "you're", 'under'
]
word_counts = Counter(words)

In [34]:
type(word_counts)

collections.Counter

In [36]:
word_counts.most_common(2)

[('eyes', 8), ('the', 5)]

## 1.13 Sorting a list of dicitonaries by a common key

In [37]:
rows = [
    {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
    {'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
    {'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
    {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]

In [38]:
from operator import itemgetter

rows_by_fname = sorted(rows, key=itemgetter('fname'))
rows_by_uid = sorted(rows, key=itemgetter('uid'))

In [39]:
rows_by_fname

[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004},
 {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
 {'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
 {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]

## 1.14 Sorting objects without native comparison support

In [40]:
class User:
    def __init__(self, user_id):
        self.user_id = user_id
    def __repr__(self):
        return 'User({})'.format(self.user_id)

In [41]:
users = [User(23), User(3), User(99)]

In [42]:
from operator import attrgetter
by_id = sorted(users, key=attrgetter('user_id'))

In [43]:
by_id

[User(3), User(23), User(99)]

## 1.15 Grouping records together based on a field

In [44]:
rows = [
    {'address': '5412 N CLARK', 'date': '07/01/2012'},
    {'address': '5148 N CLARK', 'date': '07/04/2012'},
    {'address': '5800 E 58TH', 'date': '07/02/2012'},
    {'address': '2122 N CLARK', 'date': '07/03/2012'},
    {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
    {'address': '1060 W ADDISON', 'date': '07/02/2012'},
    {'address': '4801 N BROADWAY', 'date': '07/01/2012'},
    {'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]

In [45]:
from operator import itemgetter
from itertools import groupby

In [49]:
rows.sort(key=itemgetter('date'))

for date, items in groupby(rows, key=itemgetter('date')):
    print(date)
    for item in items:
        print('    ', item)

07/01/2012
     {'address': '5412 N CLARK', 'date': '07/01/2012'}
     {'address': '4801 N BROADWAY', 'date': '07/01/2012'}
07/02/2012
     {'address': '5800 E 58TH', 'date': '07/02/2012'}
     {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}
     {'address': '1060 W ADDISON', 'date': '07/02/2012'}
07/03/2012
     {'address': '2122 N CLARK', 'date': '07/03/2012'}
07/04/2012
     {'address': '5148 N CLARK', 'date': '07/04/2012'}
     {'address': '1039 W GRANVILLE', 'date': '07/04/2012'}


## 1.16 Filtering sequence elements

In [50]:
mylist = [1, 4, -5, 10, -7, 2, 3, -1]
pos = (n for n in mylist if n > 0)
pos

<generator object <genexpr> at 0x120738f50>

In [51]:
print(list(pos))

[1, 4, 10, 2, 3]


In [52]:
values = ['1', '2', '-3', '-', '4', 'N/A', '5']

def is_int(val):
    try:
        x = int(val)
        return True
    except ValueError:
        return False

ivals = list(filter(is_int, values))
print(ivals)

['1', '2', '-3', '4', '5']


## 1.17 Extracting a subset of a dictionary

In [53]:
prices = {
   'ACME': 45.23,
   'AAPL': 612.78,
   'IBM': 205.55,
   'HPQ': 37.20,
   'FB': 10.75
}

In [54]:
p1 = { key:value for key, value in prices.items() if value > 200 }
p1

{'AAPL': 612.78, 'IBM': 205.55}

## 1.18 Mapping names to sequence elements

In [55]:
from collections import namedtuple
Subscriber = namedtuple('Subscriber', ['addr', 'joined'])
sub = Subscriber('jonesy@example.com', '2012-10-19')

In [56]:
sub.addr

'jonesy@example.com'

In [58]:
sub = sub._replace(joined='2012-10-20')
sub

Subscriber(addr='jonesy@example.com', joined='2012-10-20')

## 1.19 Transforming and reducing data at the same time

In [59]:
nums = [1, 2, 3, 4, 5]
s = sum((x * x for x in nums))    # Pass generator-expr as argument
s = sum(x * x for x in nums)      # More elegant syntax

In [60]:
s

55

## 1.20 Combining multiple mappings into a single mapping

In [61]:
a = {'x': 1, 'z': 3 }
b = {'y': 2, 'z': 4 }

In [62]:
from collections import ChainMap
c = ChainMap(a,b)
print(c['x'])      # Outputs 1  (from a)
print(c['y'])      # Outputs 2  (from b)
print(c['z'])      # Outputs 3  (from a)

1
2
3
