In [1]:
from collections import defaultdict, Counter
import timeit

In [2]:
# count the number of word occurrences in a piece of text
text = "I need to count the number of word occurrences in a piece of text. How could I do that? " \
       "Python provides us with multiple ways to do the same thing. But only one way I find beautiful."


In [4]:
# Method 1--using dict
t1 = timeit.default_timer()
word_count_dict = {}
for w in text.split(" "):
    if w in word_count_dict:
        word_count_dict[w] += 1
    else:
        word_count_dict[w] = 1
t1_end = timeit.default_timer()
print('cost time: ', str(t1_end - t1))
print('dict: ', word_count_dict)

cost time:  0.00020132567032860038
dict:  {'the': 2, 'multiple': 1, 'in': 1, 'way': 1, 'I': 3, 'us': 1, 'occurrences': 1, 'Python': 1, 'need': 1, 'count': 1, 'number': 1, 'with': 1, 'provides': 1, 'How': 1, 'find': 1, 'text.': 1, 'But': 1, 'could': 1, 'ways': 1, 'that?': 1, 'of': 2, 'a': 1, 'do': 2, 'beautiful.': 1, 'only': 1, 'thing.': 1, 'word': 1, 'piece': 1, 'to': 2, 'same': 1, 'one': 1}


In [5]:
# Method 2--using defaultdict
t2 = timeit.default_timer()
word_count_dict = defaultdict(int)
for w in text.split(" "):
    word_count_dict[w] += 1
t2_end = timeit.default_timer()
print('cost time: ', str(t2_end - t2))
print('defaultdict: ', word_count_dict)

cost time:  0.00012908528273669617
defaultdict:  defaultdict(<class 'int'>, {'the': 2, 'multiple': 1, 'in': 1, 'way': 1, 'I': 3, 'us': 1, 'occurrences': 1, 'Python': 1, 'need': 1, 'count': 1, 'number': 1, 'with': 1, 'provides': 1, 'How': 1, 'find': 1, 'text.': 1, 'But': 1, 'could': 1, 'ways': 1, 'that?': 1, 'of': 2, 'a': 1, 'do': 2, 'beautiful.': 1, 'only': 1, 'thing.': 1, 'word': 1, 'piece': 1, 'to': 2, 'same': 1, 'one': 1})


In [6]:
# Method 3--using Counter
t3 = timeit.default_timer()
word_count_dict = Counter()
for w in text.split(" "):
    word_count_dict[w] += 1
t3_end = timeit.default_timer()
print('cost time: ', str(t3_end - t3))
print('Counter: ', word_count_dict)

cost time:  0.00016185004869129216
Counter:  Counter({'I': 3, 'the': 2, 'of': 2, 'do': 2, 'to': 2, 'multiple': 1, 'in': 1, 'way': 1, 'us': 1, 'occurrences': 1, 'Python': 1, 'need': 1, 'count': 1, 'number': 1, 'with': 1, 'provides': 1, 'How': 1, 'find': 1, 'text.': 1, 'But': 1, 'could': 1, 'ways': 1, 'that?': 1, 'a': 1, 'beautiful.': 1, 'only': 1, 'thing.': 1, 'word': 1, 'piece': 1, 'same': 1, 'one': 1})


In [7]:
# get the most common word
print('most common word: ', word_count_dict.most_common(10))

most common word:  [('I', 3), ('the', 2), ('of', 2), ('do', 2), ('to', 2), ('multiple', 1), ('in', 1), ('way', 1), ('us', 1), ('occurrences', 1)]


In [11]:
t3 = timeit.default_timer()
# Second method using Counter
word_counter = Counter(text.split(" "))
t3_end = timeit.default_timer()
print('cost time: ', str(t3_end - t3))
print('Counter: ', word_counter)

cost time:  0.00019579908330058515
Counter:  Counter({'I': 3, 'the': 2, 'of': 2, 'do': 2, 'to': 2, 'multiple': 1, 'in': 1, 'way': 1, 'us': 1, 'occurrences': 1, 'Python': 1, 'need': 1, 'count': 1, 'number': 1, 'with': 1, 'provides': 1, 'How': 1, 'find': 1, 'text.': 1, 'But': 1, 'could': 1, 'ways': 1, 'that?': 1, 'a': 1, 'beautiful.': 1, 'only': 1, 'thing.': 1, 'word': 1, 'piece': 1, 'same': 1, 'one': 1})


In [12]:
# Count Characters
print(Counter('abccccccddddd'))  
# Count List elements
print(Counter([1, 2, 3, 4, 5, 1, 2]))  

Counter({'c': 6, 'd': 5, 'a': 1, 'b': 1})
Counter({1: 2, 2: 2, 3: 1, 4: 1, 5: 1})


In [13]:
# more defaultdict examples
s = [('color', 'blue'), ('color', 'orange'), ('color', 'yellow'), ('fruit', 'banana'), ('fruit', 'orange'),
     ('fruit', 'banana')]
d = defaultdict(list)
for k, v in s:
    d[k].append(v)
print(d)  

defaultdict(<class 'list'>, {'color': ['blue', 'orange', 'yellow'], 'fruit': ['banana', 'orange', 'banana']})


In [14]:
# using set instead of list
s = [('color', 'blue'), ('color', 'orange'), ('color', 'yellow'), ('fruit', 'banana'), ('fruit', 'orange'),
     ('fruit', 'banana')]
d = defaultdict(set)
for k, v in s:
    d[k].add(v)
print(d)

defaultdict(<class 'set'>, {'color': {'blue', 'yellow', 'orange'}, 'fruit': {'banana', 'orange'}})


In [15]:
d = defaultdict()
d['age'] = 2
d['name'] = 'jonh'
print(d)

defaultdict(None, {'name': 'jonh', 'age': 2})


In [16]:
a = dict()
for k, v in s:
    a.setdefault(k, []).append(v)
print(a)

{'color': ['blue', 'orange', 'yellow'], 'fruit': ['banana', 'orange', 'banana']}


In [17]:
print(Counter(('2', '3')))

Counter({'2': 1, '3': 1})


In [18]:
print(Counter({'a': 2, 'c': 3}))

Counter({'c': 3, 'a': 2})


In [20]:
a = (2, 1)
a.__hash__()

3713082714465905806

In [21]:
b=[1,2]
b.__hash__()

TypeError: 'NoneType' object is not callable

In [24]:
c = Counter(cats=4, dogs=8)
print(c)

Counter({'dogs': 8, 'cats': 4})


In [27]:
c = Counter(a=4, b=2, c=0, d=-2)
sorted(c.elements())

['a', 'a', 'a', 'a', 'b', 'b']

In [33]:
c = Counter(a=4, b=2, c=0, d=-2)
d = Counter(a=1, b=2, c=3, d=4)
c.subtract(d)
print(c)

Counter({'a': 3, 'b': 0, 'c': -3, 'd': -6})


In [34]:
list(c)

['c', 'a', 'b', 'd']

In [35]:
set(c)

{'a', 'b', 'c', 'd'}

In [38]:
print(c)
+c
-c

Counter({'a': 3, 'b': 0, 'c': -3, 'd': -6})


Counter({'c': 3, 'd': 6})

In [39]:
c = Counter(a=3, b=1)
d = Counter(a=1, b=2)
# add two counters together:  c[x] + d[x]
print(c + d)                     
# subtract (keeping only positive counts)
print(c - d )                      
# intersection:  min(c[x], d[x]) # doctest: +SKIP
print(c & d)                    
# union:  max(c[x], d[x])
print(c | d)                   

Counter({'a': 4, 'b': 3})
Counter({'a': 2})
Counter({'a': 1, 'b': 1})
Counter({'a': 3, 'b': 2})


In [29]:
c.items()

dict_items([('c', 0), ('a', 4), ('b', 2), ('d', -2)])

In [30]:
c.values()

dict_values([0, 4, 2, -2])

In [31]:
sum(c.values())

4