One common use of dictionaries is counting how often we 'see' something. In other words, it is very convenient for counting stuff

In [4]:
# initialize a dictionary object
ccc = dict()

In [5]:
# Here the key automatically initializes a count for dictionary
ccc['csev'] = 1
ccc['cwen'] = 1

In [6]:
print(ccc)

{'csev': 1, 'cwen': 1}


In [7]:
# Let's say we see another instance of cwen, so instead of just appending another instance for cwen, the dictionary dynamically adds another instance of cwen as a frequency
ccc['cwen'] += 1

In [8]:
print(ccc)

{'csev': 1, 'cwen': 2}


* It is an error to reference a key that is not in the dictionary
* We can use the in operator to see if a key is in the dictionary

In [9]:
print(ccc['cwen'])

2


In [10]:
print(ccc['zhen'])

KeyError: 'zhen'

In [11]:
'zhen' in ccc

False

In [12]:
# so this is how you build a kind of histogram code
counts = dict()
names = ['csev', 'cwen', 'csev', 'zqian', 'cwen']
for name in names:
    if name not in counts:
        counts[name] = 1
    else:
        counts[name] += 1
print(counts)

{'csev': 2, 'cwen': 2, 'zqian': 1}


In [13]:
# using get method  for dictionaries does this for us
counts.get?

[1;31mSignature:[0m [0mcounts[0m[1;33m.[0m[0mget[0m[1;33m([0m[0mkey[0m[1;33m,[0m [0mdefault[0m[1;33m=[0m[1;32mNone[0m[1;33m,[0m [1;33m/[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mDocstring:[0m Return the value for key if key is in the dictionary, else default.
[1;31mType:[0m      builtin_function_or_method


In [14]:
counts.get('zqian', 0)

1

In [15]:
counts.get('funds', 0)

0

In [16]:
# incorporating the get method in the for loop code can be shortened
for name in names:
    counts[name] = counts.get(name, 0) + 1
print(counts)

{'csev': 4, 'cwen': 4, 'zqian': 2}


* Dictionaries are not stored in order
* we can do a word count with a dictionary on some text

In [17]:
with open('data.txt', 'r') as f:
    line = f.readline()
    words = line.split()

    print('Words:', words)

    print('Counting...')
    for word in words:
        counts[word] = counts.get(word, 0) + 1
    print('Counts', counts)

Words: ['lk', 'stark', '-', 'ndis', 'plan', 'ndis', 'number', '412225671', 'my', 'ndis', 'contact', 'nick', 'fury', 'avengers', 'coordinator', 'the', 'avengers', 'society', 'phone', '0400000003', 'email', 'nick.', 'Furyndis.', 'Gov.', 'Au', 'ndis', 'plan', 'start', 'date', '18', 'april', '2020', 'ndis', 'plan', 'review', 'due', 'date', '17', 'april', '2021', 'a', 'national', 'disability', 'insurance', 'agency', 'ndia', 'representative', 'will', 'contact', 'me', 'about', 'my', 'plan', 'review', 'before', 'my', 'plan', 'review', 'due', 'date.', 'Personal', 'information', '-', 'confidential', 'name', 'hulk', 'stark', 'page', '2', 'of', '9', 'ndis', 'number', '412225671', '190408840', 'mapbhoy', 'edi', 'ab', 'april', '2019', '3245', 'bl-', '21466lks', 'profile', 'what', 'i', 'want', 'people', 'to', 'know', 'about', 'me', 'date', 'of', 'birth', '10', 'november', '2006', 'preferred', 'contact', 'details', '—', 'telephone', 'home', 'number', '0403578819', 'work', 'number', '0403578819', 'mobi

In [28]:
# subsetting a dictionary with [key] identifies its value
counts = {'chuck' : 1, 'annie': 42, 'jan': 100}
for key in counts:
    if counts[key] > 10:
        print(key, counts[key])

annie 42
jan 100


In [20]:
lst = list()
for k, v in counts.items():
    newtup = (v, k)
    lst.append(newtup)

In [24]:
lst = sorted(lst, reverse=True)

In [26]:
for k, v in lst[:10]:
    print(k, v)

381 to
330 and
290 my
260 the
205 will
181 be
180 supports
166 ndis
156 plan
155 support
