The collections module
from collections import (
deque, defaultdict, namedtuple,
OrderedDict, Counter
)
Baptiste Mispelon (bmispelon)
I've been using python for over 5 years.
Currently doing web development with django at M2BPO.
No advanced python knowledge required.
Lots of small code examples.
Available online.
Python has "batteries included" philosophy,
Python has "batteries included" philosophy,
but they are easy to miss if you don't know about them.
The :navy:`collections` module.
5 specialized container datatypes.
ABC (not presented today).
List-like container with fast appends and pops on either end.
Introduced in python 2.4.
Short for "doubled-ended queue".
Operation | list | deque |
---|---|---|
append | ||
pop (right) | ||
prepend | ||
pop (left) |
Operation | list | deque |
---|---|---|
append | O(1) | |
pop (right) | ||
prepend | ||
pop (left) |
Operation | list | deque |
---|---|---|
append | O(1) | |
pop (right) | O(1) | |
prepend | ||
pop (left) |
Operation | list | deque |
---|---|---|
append | O(1) | |
pop (right) | O(1) | |
prepend | O(n) | |
pop (left) |
Operation | list | deque |
---|---|---|
append | O(1) | |
pop (right) | O(1) | |
prepend | O(n) | |
pop (left) | O(n) |
Operation | list | deque |
---|---|---|
append | O(1) | O(1) |
pop (right) | O(1) | O(1) |
prepend | O(n) | O(1) |
pop (left) | O(n) | O(1) |
deque(iterable, maxlen=None)
Both parameters are optional.
Iterable: list, str, dict, generator, ...
deque() # ?
deque() # deque([])
deque() # deque([])
deque('abc') # ?
deque() # deque([])
deque('abc') # deque(['a', 'b', 'c'])
deque() # deque([])
deque('abc') # deque(['a', 'b', 'c'])
deque(xrange(1000, 0, -1), maxlen=3)
# ?
deque() # deque([])
deque('abc') # deque(['a', 'b', 'c'])
deque(xrange(1000, 0, -1), maxlen=3)
# deque([3, 2, 1], maxlen=3)
Print the last 20 lines of a file.
$ tail -n 20 some_file.txt
with open('some_file.txt') as f:
last = list(f)[-20:] ###
print "\n".join(last)
last = []
with open('some_file.txt') as f:
for line in f:
last.append(line)
if len(last) > 20:
last.pop(0) ###
print "\n".join(last)
with open('some_file.txt') as f:
last = deque(f, maxlen=20) ###
print "\n".join(last)
Dict subclass that calls a factory function to supply missing values.
Introduced in python 2.5.
# defaultdict(factory, *a, **kw)
defaultdict(f)
defaultdict(f, {'foo': 'bar'})
defaultdict(f, [('foo', 'bar')])
defaultdict(f, foo='bar')
Any callable without required arguments:
- functions (named or anonymous)
- classes
- instance methods
bool | |
int | |
float | |
complex | |
str | |
list | |
dict |
bool() | False |
int | |
float | |
complex | |
str | |
list | |
dict |
bool() | False |
int() | 0 |
float | |
complex | |
str | |
list | |
dict |
bool() | False |
int() | 0 |
float() | 0.0 |
complex | |
str | |
list | |
dict |
bool() | False |
int() | 0 |
float() | 0.0 |
complex() | 0j |
str | |
list | |
dict |
bool() | False |
int() | 0 |
float() | 0.0 |
complex() | 0j |
str() | '' |
list | |
dict |
bool() | False |
int() | 0 |
float() | 0.0 |
complex() | 0j |
str() | '' |
list() | [] |
dict |
bool() | False |
int() | 0 |
float() | 0.0 |
complex() | 0j |
str() | '' |
list() | [] |
dict() | {} |
# Regular dict:
rd = dict(foo='bar')
rd['foo'] # ?
# Regular dict:
rd = dict(foo='bar')
rd['foo'] # 'bar'
# Regular dict:
rd = dict(foo='bar')
rd['foo'] # 'bar'
rd['missing'] # ?
# Regular dict:
rd = dict(foo='bar')
rd['foo'] # 'bar'
rd['missing'] # KeyError
factory = lambda: 'X'
dd = defaultdict(factory, foo='bar')
dd['foo'] # ?
factory = lambda: 'X'
dd = defaultdict(factory, foo='bar')
dd['foo'] # 'bar'
factory = lambda: 'X'
dd = defaultdict(factory, foo='bar')
dd['foo'] # 'bar'
dd['missing'] # ?
factory = lambda: 'X'
dd = defaultdict(factory, foo='bar')
dd['foo'] # 'bar'
dd['missing'] # 'X'
Given a list of payments (date, amount),
we want to get a mapping of {date: [amounts]}
d = {}
for date, amount in L:
if date not in L:
d[date] = []
d[date].append(amount)
d = {}
for date, amount in L:
l = d.setdefault(date, [])
l.append(amount)
d = defaultdict(list)
for date, amount in L:
d[date].append(amount)
Factory function for creating tuple subclasses with named fields.
Introduced in python 2.6.
It's a subclass of tuple whose attributes can also be accessed by name (not just by position).
It uses as much memory as a regular tuple.
Like a tuple, it's immutable.
# cls = namedtuple(name, fields)
# cls = namedtuple(name, fields)
namedtuple('Point', ['x', 'y', 'z'])
# cls = namedtuple(name, fields)
namedtuple('Point', ['x', 'y', 'z'])
namedtuple('Point', 'x y z')
# cls = namedtuple(name, fields)
namedtuple('Point', ['x', 'y', 'z'])
namedtuple('Point', 'x y z')
namedtuple('Point', 'x,y,z')
# cls = namedtuple(name, fields)
namedtuple('Point', ['x', 'y', 'z'])
namedtuple('Point', 'x y z')
namedtuple('Point', 'x,y,z')
# Creates **classes**,
# not **instances**.
Point = namedtuple('Point', 'x y z')
Point = namedtuple('Point', 'x y z')
p = Point(23, 10, 85)
Point = namedtuple('Point', 'x y z')
p = Point(23, 10, 85)
p = Point(x=23, y=10, z=85)
Point = namedtuple('Point', 'x y z')
p = Point(23, 10, 85)
p = Point(x=23, y=10, z=85)
p = Point(y=10, x=23, z=85)
p = Point(x=23, y=10, z=85)
p = Point(x=23, y=10, z=85)
p[0] # ?
p = Point(x=23, y=10, z=85)
p[0] # 23
p = Point(x=23, y=10, z=85)
p[0] # 23
p.z # ?
p = Point(x=23, y=10, z=85)
p[0] # 23
p.z # 85
p = Point(x=23, y=10, z=85)
p._fields # ?
p = Point(x=23, y=10, z=85)
p._fields # ['x', 'y', 'z']
p1 = Point(x=23, y=10, z=85)
p1 = Point(x=23, y=10, z=85)
p2 = p1._replace(z=56)
p1 = Point(x=23, y=10, z=85)
p2 = p1._replace(z=56)
tuple(p2) # ?
p1 = Point(x=23, y=10, z=85)
p2 = p1._replace(z=56)
tuple(p2) # (23, 10, 56)
p = Point(x=23, y=10, z=85)
p._asdict()
# ?
p = Point(x=23, y=10, z=85)
p._asdict()
# {'x': 23, 'y': 10, 'z': 85}
p = Point(x=23, y=10, z=85)
p._asdict()
# {'x': 23, 'y': 10, 'z': 85}
# Actually an OrderedDict instance
Dict subclass that remembers the order entries were added.
Introduced in python 2.7.
Identical to dict.
OrderedDict(mapping)
OrderedDict(iterable)
OrderedDict(**kwargs)
d = {}
for char in 'abc':
d[char] = None
print ''.join(d) # ?
d = {}
for char in 'abc':
d[char] = None
print ''.join(d) # 'acb'
d = {}
for char in 'abc':
d[char] = None
print ''.join(d) # 'acb'
# Actually,
# it depends on python's version.
d = {}
for char in 'abc':
d[char] = None
print ''.join(d) # 'acb'
# Actually,
# it depends on python's version.
# And it's also affected by -R flag.
Order is consistent between two iterations if no items have been added or deleted.
zip(d.keys(), d.values())
# Same as d.items()
d = {}
for char in 'abc':
d[char] = None
d = {}
for char in 'abc':
d[char] = None
# Can be written as:
d = dict.fromkeys('abc')
OrderDict are ordered by insertion order:
d = OrderedDict.fromkeys('abc')
''.join(d) # ?
OrderDict are ordered by insertion order:
d = OrderedDict.fromkeys('abc')
''.join(d) # 'abc'
d = OrderedDict.fromkeys('abcda')
''.join(d) # ?
d = OrderedDict.fromkeys('abcda')
''.join(d) # 'bcda'
od1 = OrderedDict.fromkeys('abc')
od2 = OrderedDict.fromkeys('cba')
rd1 = dict.fromkeys('abc')
rd2 = dict.fromkeys('cba')
rd1 == rd2 # ?
od1 = OrderedDict.fromkeys('abc')
od2 = OrderedDict.fromkeys('cba')
rd1 = dict.fromkeys('abc')
rd2 = dict.fromkeys('cba')
rd1 == rd2 # True
od1 = OrderedDict.fromkeys('abc')
od2 = OrderedDict.fromkeys('cba')
rd1 = dict.fromkeys('abc')
rd2 = dict.fromkeys('cba')
rd1 == rd2 # True
od1 == od2 # ?
od1 = OrderedDict.fromkeys('abc')
od2 = OrderedDict.fromkeys('cba')
rd1 = dict.fromkeys('abc')
rd2 = dict.fromkeys('cba')
rd1 == rd2 # True
od1 == od2 # False
od1 = OrderedDict.fromkeys('abc')
od2 = OrderedDict.fromkeys('cba')
rd1 = dict.fromkeys('abc')
rd2 = dict.fromkeys('cba')
rd1 == rd2 # True
od1 == od2 # False
od1 == rd2 # ?
od1 = OrderedDict.fromkeys('abc')
od2 = OrderedDict.fromkeys('cba')
rd1 = dict.fromkeys('abc')
rd2 = dict.fromkeys('cba')
rd1 == rd2 # True
od1 == od2 # False
od1 == rd2 # True
Dict subclass for counting hashable objects.
Introduced in python 2.7.
Basically a defaultdict(int)
.
With some useful methods for counting stuff.
Counter(iterable)
Iterable: list, str, dict, generator, ...
# First approach, with a plain dict:
counter = {}
for letter in 'ababac':
if letter not in counter:
counter[letter] = 0
counter[letter] += 1
counter['a'] # 3
# Third approach, with a defaultdict
counter = defaultdict(int)
for letter in 'ababac':
counter[letter] += 1
counter['a'] # 3
# Finally, with a Counter
counter = Counter('ababac')
counter['a'] # 3
counter = Counter('aaabbc')
counter['a'] # ?
counter = Counter('aaabbc')
counter['a'] # 3
counter = Counter('aaabbc')
counter['a'] # 3
counter['d'] # ?
counter = Counter('aaabbc')
counter['a'] # 3
counter['d'] # 0
counter = Counter('aaabbc')
sum(counter.values()) # ?
counter = Counter('aaabbc')
sum(counter.values()) # 6
# Total number of elements
counter = Counter('aaabbc')
len(counter) # ?
counter = Counter('aaabbc')
len(counter) # 3
# Total number of unique elements:
counter = Counter('aaabbc')
counter.keys()
# ?
counter = Counter('aaabbc')
counter.keys()
# ['a', 'b', 'c']
# List of unique elements
# List elements, with repetition
c = Counter('ababac')
c.elements()
# ?
# List elements, with repetition
c = Counter('ababac')
c.elements()
# ['a', 'a', 'a', 'b', 'b', 'c']
# List elements, with repetition
c = Counter('ababac')
c.elements()
# ['a', 'a', 'a', 'b', 'b', 'c']
# The order is arbitrary!
# List (element, count)
c = Counter('aaabbc')
c.most_common()
# ?
# List (element, count)
c = Counter('aaabbc')
c.most_common()
# [('a', 3), ('b', 2), ('c', 1)]
c = Counter('aaabbc')
c.most_common(2)
# ?
c = Counter('aaabbc')
c.most_common(2)
# [('a', 3), ('b', 2)]
# Add elements
c = Counter('aaabb')
c.update(a=2, b=1)
c # ?
# Add elements
c = Counter('aaabb')
c.update(a=2, b=1)
c # Counter({'a': 5, 'b': 3})
# subtract elements
c = Counter('aaabb')
c.subtract(a=2, b=1)
c # ?
# subtract elements
c = Counter('aaabb')
c.subtract(a=2, b=1)
c # Counter({'a': 1, 'b': 1})
c1 = Counter('aaaabb')
c2 = Counter('aabbb')
c1 + c2 # ?
c1 = Counter('aaaabb')
c2 = Counter('aabbb')
c1 + c2 # Counter({'a': 6, 'b': 5})
c1 = Counter('aaaabb')
c2 = Counter('aabbb')
c1 + c2 # Counter({'a': 6, 'b': 5})
c1 - c2 # ?
c1 = Counter('aaaabb')
c2 = Counter('aabbb')
c1 + c2 # Counter({'a': 6, 'b': 5})
c1 - c2 # Counter({'a': 2})
c1 = Counter('aaaabb')
c2 = Counter('aabbb')
c1 + c2 # Counter({'a': 6, 'b': 5})
c1 - c2 # Counter({'a': 2})
c1 & c2 # ?
c1 = Counter('aaaabb')
c2 = Counter('aabbb')
c1 + c2 # Counter({'a': 6, 'b': 5})
c1 - c2 # Counter({'a': 2})
c1 & c2 # Counter({'a': 2, 'b': 2})
c1 = Counter('aaaabb')
c2 = Counter('aabbb')
c1 + c2 # Counter({'a': 6, 'b': 5})
c1 - c2 # Counter({'a': 2})
c1 & c2 # Counter({'a': 2, 'b': 2})
c1 | c2 # ?
c1 = Counter('aaaabb')
c2 = Counter('aabbb')
c1 + c2 # Counter({'a': 6, 'b': 5})
c1 - c2 # Counter({'a': 2})
c1 & c2 # Counter({'a': 2, 'b': 2})
c1 | c2 # Counter({'a': 4, 'b': 3})
- deque: list with fast operations on both sides;
- defaultdict: dict with default values;
- namedtuple: tuple with named attributes;
- OrderedDict: ordered dict;
- Counter: counts things.
- bmispelon@gmail.com
- github.com/bmispelon
- twitter: @bmispelon
- IRC: bmispelon on Freenode (#python, #django)
d = dict(y='yes', no='no')
'%(y)s %(n)s' % d
# ?
d = dict(y='yes', n='no')
'%(y)s %(n)s' % d
# 'yes no'
d = dict(y='yes')
'%(y)s %(n)s' % d
# ?
d = dict(y='yes')
'%(y)s %(n)s' % d
# KeyError: 'n'
factory = lambda: 'no'
d = defaultdict(factory, y='yes')
'%(y)s %(n)s' % d
# ?
factory = lambda: 'no'
d = defaultdict(factory, y='yes')
'%(y)s %(n)s' % d
# 'yes no'
# don't try this at home!
def factory():
return defaultdict(factory)
d = defaultdict(factory)
d['a']['b']['c'] = 42
s = 'a,b.a8a?b'
Counter(c for c in s
if c.isalpha())
# ?
s = 'a,b.a8a?b'
Counter(c for c in s
if c.isalpha())
# Counter({'a': 3, 'b': 2})
s = 'a,b.A8A?B'
Counter(c.lower() for c in s
if c.isalpha())
# ?
s = 'a,b.A8A?B'
Counter(c.lower() for c in s
if c.isalpha())
# Counter({'a': 3, 'b': 2})
text = 'foo bar foo'
Counter(text.split())
# ?
text = 'foo bar foo'
Counter(text.split())
# Counter({'foo': 2, 'bar': 1})
- bmispelon@gmail.com
- github.com/bmispelon
- twitter: @bmispelon
- IRC: bmispelon on Freenode (#python, #django)