# Unit 25. 딕셔너리 응용하기

In [1]:
x = {'a': 10, 'b': 20, 'c': 30, 'd': 40}
x.setdefault('e')
x

{'a': 10, 'b': 20, 'c': 30, 'd': 40, 'e': None}

In [2]:
x.setdefault('f', 100)
x

{'a': 10, 'b': 20, 'c': 30, 'd': 40, 'e': None, 'f': 100}

In [3]:
# 값을 변경 또는 추가
x.update(e=50)
x

{'a': 10, 'b': 20, 'c': 30, 'd': 40, 'e': 50, 'f': 100}

In [4]:
x.update(a=900, f=60)
x

{'a': 900, 'b': 20, 'c': 30, 'd': 40, 'e': 50, 'f': 60}

In [5]:
y = {1: 'one', 2: 'two'}
y.update({1: 'ONE', 3: 'THREE'})
y

{1: 'ONE', 2: 'two', 3: 'THREE'}

In [6]:
y.update([[2, 'TWO'], [4, 'FOUR']])
y

{1: 'ONE', 2: 'TWO', 3: 'THREE', 4: 'FOUR'}

In [7]:
y.update(zip([1, 2], ['one', 'two']))
y

{1: 'one', 2: 'two', 3: 'THREE', 4: 'FOUR'}

In [8]:
# 엘리먼트 삭제
x = {'a': 10, 'b': 20, 'c': 30, 'd': 40}
z = x.pop('a')
x, z

({'b': 20, 'c': 30, 'd': 40}, 10)

In [9]:
z = x.pop('b', 0)
z

20

In [10]:
z = x.pop('z', 0)
z

0

In [11]:
del x['c']
x

{'d': 40}

In [12]:
x = {'a': 10, 'b': 20, 'c': 30, 'd': 40}
x.popitem()

('d', 40)

In [13]:
x.popitem()

('c', 30)

In [14]:
x = {'a': 10, 'b': 20, 'c': 30, 'd': 40}
x.clear()
x

{}

In [15]:
# x['a']
x = {'a': 10, 'b': 20, 'c': 30, 'd': 40}
x.get('a')

10

### 딕셔너리에서 키-값 쌍을 모두 가져오기

In [16]:
x = {'a': 10, 'b': 20, 'c': 30, 'd': 40}
x.items()

dict_items([('a', 10), ('b', 20), ('c', 30), ('d', 40)])

In [17]:
for key, value in x.items():
    print(key, value)

a 10
b 20
c 30
d 40


In [18]:
x.keys()

dict_keys(['a', 'b', 'c', 'd'])

In [19]:
x.values()

dict_values([10, 20, 30, 40])

In [20]:
for key in x.keys():
    print(key, x[key])

a 10
b 20
c 30
d 40


### 리스트와 튜플로 딕셔너리 만들기

In [21]:
keys = ['a', 'b', 'c', 'd']
x = dict.fromkeys(keys)
x

{'a': None, 'b': None, 'c': None, 'd': None}

In [22]:
y = dict.fromkeys(keys, 100)
y

{'a': 100, 'b': 100, 'c': 100, 'd': 100}

### 딕셔너리와 for 반복문

In [23]:
x = {'a': 10, 'b': 20, 'c': 30, 'd': 40}
for key, value in x.items():
    print(key, value)

a 10
b 20
c 30
d 40


In [24]:
for key in x.keys():
    print(key, x.get(key))

a 10
b 20
c 30
d 40


In [25]:
for value in x.values():
    print(value)

10
20
30
40


### 딕셔너리 표현식

In [26]:
keys = ['a', 'b', 'c', 'd']
x = {key: value for key, value in dict.fromkeys(keys, 0).items()}
x

{'a': 0, 'b': 0, 'c': 0, 'd': 0}

In [27]:
# key-value를 서로 바꿈
{value: key for key, value in {'a': 10, 'b': 20, 'c': 30, 'd': 40}.items()}

{10: 'a', 20: 'b', 30: 'c', 40: 'd'}

In [28]:
x = {'a': 10, 'b': 20, 'c': 30, 'd': 40}
for key, value in x.items():
    if value == 20:    # 값이 20이면
        del x[key]     # 키-값 쌍 삭제
x

RuntimeError: dictionary changed size during iteration

In [29]:
x = {'a': 10, 'b': 20, 'c': 30, 'd': 40}
x = {key: value for key, value in x.items() if value != 20}
x

{'a': 10, 'c': 30, 'd': 40}

In [30]:
# Filtering
x = {'a': 10, 'b': 20, 'c': 30, 'd': 40}
x = {key: value for key, value in x.items() if value < 25}
x

{'a': 10, 'b': 20}

### Nested Dictionary

In [31]:
terrestrial_planet = {
    'Mercury': {
        'mean_radius': 2439.7,
        'mass': 3.3022E+23,
        'orbital_period': 87.969
    },
    'Venus': {
        'mean_radius': 6051.8,
        'mass': 4.8676E+24,
        'orbital_period': 224.70069,
    },
    'Earth': {
        'mean_radius': 6371.0,
        'mass': 5.97219E+24,
        'orbital_period': 365.25641,
    },
    'Mars': {
        'mean_radius': 3389.5,
        'mass': 6.4185E+23,
        'orbital_period': 686.9600,
    }
}

In [32]:
print(terrestrial_planet['Venus']['mean_radius'])

6051.8


In [33]:
for outer_key, outer_value in terrestrial_planet.items():
    print(outer_key)
    for inner_key, inner_value in outer_value.items():
        print('\t', inner_key, ':\t', inner_value, sep='')

Mercury
	mean_radius:	2439.7
	mass:	3.3022e+23
	orbital_period:	87.969
Venus
	mean_radius:	6051.8
	mass:	4.8676e+24
	orbital_period:	224.70069
Earth
	mean_radius:	6371.0
	mass:	5.97219e+24
	orbital_period:	365.25641
Mars
	mean_radius:	3389.5
	mass:	6.4185e+23
	orbital_period:	686.96


### 딕셔너리의 할당과 복사

In [34]:
x = {'a': 10, 'b': 20, 'c': 30, 'd': 40}
y = x
y['d'] = 99
x

{'a': 10, 'b': 20, 'c': 30, 'd': 99}

In [35]:
x = {'a': 10, 'b': 20, 'c': 30, 'd': 40}
y = x.copy()
y['d'] = 99
x

{'a': 10, 'b': 20, 'c': 30, 'd': 40}

In [36]:
x = {'a': {'python': '2.7'}, 'b': {'python': '3.6'}}
y = x.copy()
y['a']['python'] = '2.7.15'
x

{'a': {'python': '2.7.15'}, 'b': {'python': '3.6'}}

In [37]:
x = {'a': {'python': '2.7'}, 'b': {'python': '3.6'}}
import copy
y = copy.deepcopy(x)
y['a']['python'] = '2.7.15'
x

{'a': {'python': '2.7'}, 'b': {'python': '3.6'}}

### Word Count Top 10 연습문제

In [38]:
lorem = '''
What is Lorem Ipsum?
Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.

Why do we use it?
It is a long established fact that a reader will be distracted by the readable content of a page when looking at its layout. The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here', making it look like readable English. Many desktop publishing packages and web page editors now use Lorem Ipsum as their default model text, and a search for 'lorem ipsum' will uncover many web sites still in their infancy. Various versions have evolved over the years, sometimes by accident, sometimes on purpose (injected humour and the like).

Where does it come from?
Contrary to popular belief, Lorem Ipsum is not simply random text. It has roots in a piece of classical Latin literature from 45 BC, making it over 2000 years old. Richard McClintock, a Latin professor at Hampden-Sydney College in Virginia, looked up one of the more obscure Latin words, consectetur, from a Lorem Ipsum passage, and going through the cites of the word in classical literature, discovered the undoubtable source. Lorem Ipsum comes from sections 1.10.32 and 1.10.33 of "de Finibus Bonorum et Malorum" (The Extremes of Good and Evil) by Cicero, written in 45 BC. This book is a treatise on the theory of ethics, very popular during the Renaissance. The first line of Lorem Ipsum, "Lorem ipsum dolor sit amet..", comes from a line in section 1.10.32.

The standard chunk of Lorem Ipsum used since the 1500s is reproduced below for those interested. Sections 1.10.32 and 1.10.33 from "de Finibus Bonorum et Malorum" by Cicero are also reproduced in their exact original form, accompanied by English versions from the 1914 translation by H. Rackham.

Where can I get some?
There are many variations of passages of Lorem Ipsum available, but the majority have suffered alteration in some form, by injected humour, or randomised words which don't look even slightly believable. If you are going to use a passage of Lorem Ipsum, you need to be sure there isn't anything embarrassing hidden in the middle of text. All the Lorem Ipsum generators on the Internet tend to repeat predefined chunks as necessary, making this the first true generator on the Internet. It uses a dictionary of over 200 Latin words, combined with a handful of model sentence structures, to generate Lorem Ipsum which looks reasonable. The generated Lorem Ipsum is therefore always free from repetition, injected humour, or non-characteristic words etc.
'''

In [39]:
# 구둣점, 숫자 삭제
import re
import string
lorem = re.sub(f'[{string.punctuation}0-9]', '', lorem, flags=re.MULTILINE)
print(lorem)


What is Lorem Ipsum
Lorem Ipsum is simply dummy text of the printing and typesetting industry Lorem Ipsum has been the industrys standard dummy text ever since the s when an unknown printer took a galley of type and scrambled it to make a type specimen book It has survived not only five centuries but also the leap into electronic typesetting remaining essentially unchanged It was popularised in the s with the release of Letraset sheets containing Lorem Ipsum passages and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum

Why do we use it
It is a long established fact that a reader will be distracted by the readable content of a page when looking at its layout The point of using Lorem Ipsum is that it has a moreorless normal distribution of letters as opposed to using Content here content here making it look like readable English Many desktop publishing packages and web page editors now use Lorem Ipsum as their default model text and 

In [40]:
# 소문자로 변경
lorem = lorem.lower()
#print(lorem)

In [41]:
lorem_list = lorem.split()
print(lorem_list)

['what', 'is', 'lorem', 'ipsum', 'lorem', 'ipsum', 'is', 'simply', 'dummy', 'text', 'of', 'the', 'printing', 'and', 'typesetting', 'industry', 'lorem', 'ipsum', 'has', 'been', 'the', 'industrys', 'standard', 'dummy', 'text', 'ever', 'since', 'the', 's', 'when', 'an', 'unknown', 'printer', 'took', 'a', 'galley', 'of', 'type', 'and', 'scrambled', 'it', 'to', 'make', 'a', 'type', 'specimen', 'book', 'it', 'has', 'survived', 'not', 'only', 'five', 'centuries', 'but', 'also', 'the', 'leap', 'into', 'electronic', 'typesetting', 'remaining', 'essentially', 'unchanged', 'it', 'was', 'popularised', 'in', 'the', 's', 'with', 'the', 'release', 'of', 'letraset', 'sheets', 'containing', 'lorem', 'ipsum', 'passages', 'and', 'more', 'recently', 'with', 'desktop', 'publishing', 'software', 'like', 'aldus', 'pagemaker', 'including', 'versions', 'of', 'lorem', 'ipsum', 'why', 'do', 'we', 'use', 'it', 'it', 'is', 'a', 'long', 'established', 'fact', 'that', 'a', 'reader', 'will', 'be', 'distracted', 'by',

In [42]:
word_dict = {}
for word in lorem_list:
    if word in word_dict.keys():
        word_dict[word] += 1
    else:
        word_dict.update({word: 1})
print(word_dict)

{'what': 1, 'is': 8, 'lorem': 19, 'ipsum': 19, 'simply': 2, 'dummy': 2, 'text': 5, 'of': 21, 'the': 28, 'printing': 1, 'and': 10, 'typesetting': 2, 'industry': 1, 'has': 4, 'been': 1, 'industrys': 1, 'standard': 2, 'ever': 1, 'since': 2, 's': 3, 'when': 2, 'an': 1, 'unknown': 1, 'printer': 1, 'took': 1, 'a': 15, 'galley': 1, 'type': 2, 'scrambled': 1, 'it': 11, 'to': 7, 'make': 1, 'specimen': 1, 'book': 2, 'survived': 1, 'not': 2, 'only': 1, 'five': 1, 'centuries': 1, 'but': 2, 'also': 2, 'leap': 1, 'into': 1, 'electronic': 1, 'remaining': 1, 'essentially': 1, 'unchanged': 1, 'was': 1, 'popularised': 1, 'in': 10, 'with': 3, 'release': 1, 'letraset': 1, 'sheets': 1, 'containing': 1, 'passages': 2, 'more': 2, 'recently': 1, 'desktop': 2, 'publishing': 2, 'software': 1, 'like': 3, 'aldus': 1, 'pagemaker': 1, 'including': 1, 'versions': 3, 'why': 1, 'do': 1, 'we': 1, 'use': 3, 'long': 1, 'established': 1, 'fact': 1, 'that': 2, 'reader': 1, 'will': 2, 'be': 2, 'distracted': 1, 'by': 7, 'rea

In [43]:
sorted_words = sorted(word_dict, key=lambda k : word_dict[k], reverse=True)
print(sorted_words)

['the', 'of', 'lorem', 'ipsum', 'a', 'it', 'and', 'in', 'is', 'from', 'to', 'by', 'text', 'has', 'on', 'latin', 'words', 's', 'with', 'like', 'versions', 'use', 'content', 'as', 'making', 'many', 'their', 'over', 'injected', 'humour', 'are', 'simply', 'dummy', 'typesetting', 'standard', 'since', 'when', 'type', 'book', 'not', 'but', 'also', 'passages', 'more', 'desktop', 'publishing', 'that', 'will', 'be', 'readable', 'page', 'at', 'using', 'here', 'look', 'english', 'web', 'model', 'for', 'have', 'years', 'sometimes', 'where', 'popular', 'classical', 'literature', 'bc', 'passage', 'going', 'comes', 'sections', 'de', 'finibus', 'bonorum', 'et', 'malorum', 'cicero', 'this', 'first', 'line', 'reproduced', 'form', 'some', 'there', 'or', 'which', 'you', 'internet', 'what', 'printing', 'industry', 'been', 'industrys', 'ever', 'an', 'unknown', 'printer', 'took', 'galley', 'scrambled', 'make', 'specimen', 'survived', 'only', 'five', 'centuries', 'leap', 'into', 'electronic', 'remaining', 'ess

In [44]:
for index, value in enumerate(sorted_words):
    if index == 10:
        break;
    print(value, word_dict[value])

the 28
of 21
lorem 19
ipsum 19
a 15
it 11
and 10
in 10
is 8
from 8


### 파일로부터 읽어서 Word counting

In [45]:
with open('c:/temp/lorem.txt', 'r') as file:    # hello.txt 파일을 읽기 모드(r)로 열기
    line_list = file.readlines()
lines = ' '.join(line_list)
lines = lines.lower()
#print(lines)

In [46]:
import re
import string
lines = re.sub(f'[{string.punctuation}0-9]', '', lines, flags=re.MULTILINE)
#print(lines)

In [47]:
words_list = lines.split()
#print(words_list)

In [48]:
word_dict = {}
for word in words_list:
    if word in word_dict.keys():
        word_dict[word] += 1
    else:
        word_dict.update({word: 1})
print(word_dict)

{'what': 1, 'is': 8, 'lorem': 19, 'ipsum': 19, 'simply': 2, 'dummy': 2, 'text': 5, 'of': 21, 'the': 28, 'printing': 1, 'and': 10, 'typesetting': 2, 'industry': 1, 'has': 4, 'been': 1, 'industrys': 1, 'standard': 2, 'ever': 1, 'since': 2, 's': 3, 'when': 2, 'an': 1, 'unknown': 1, 'printer': 1, 'took': 1, 'a': 15, 'galley': 1, 'type': 2, 'scrambled': 1, 'it': 11, 'to': 7, 'make': 1, 'specimen': 1, 'book': 2, 'survived': 1, 'not': 2, 'only': 1, 'five': 1, 'centuries': 1, 'but': 2, 'also': 2, 'leap': 1, 'into': 1, 'electronic': 1, 'remaining': 1, 'essentially': 1, 'unchanged': 1, 'was': 1, 'popularised': 1, 'in': 10, 'with': 3, 'release': 1, 'letraset': 1, 'sheets': 1, 'containing': 1, 'passages': 2, 'more': 2, 'recently': 1, 'desktop': 2, 'publishing': 2, 'software': 1, 'like': 3, 'aldus': 1, 'pagemaker': 1, 'including': 1, 'versions': 3, 'why': 1, 'do': 1, 'we': 1, 'use': 3, 'long': 1, 'established': 1, 'fact': 1, 'that': 2, 'reader': 1, 'will': 2, 'be': 2, 'distracted': 1, 'by': 7, 'rea

In [49]:
sorted_words = sorted(word_dict, key=lambda k : word_dict[k], reverse=True)
print(sorted_words)

['the', 'of', 'lorem', 'ipsum', 'a', 'it', 'and', 'in', 'is', 'from', 'to', 'by', 'text', 'has', 'on', 'latin', 'words', 's', 'with', 'like', 'versions', 'use', 'content', 'as', 'making', 'many', 'their', 'over', 'injected', 'humour', 'are', 'simply', 'dummy', 'typesetting', 'standard', 'since', 'when', 'type', 'book', 'not', 'but', 'also', 'passages', 'more', 'desktop', 'publishing', 'that', 'will', 'be', 'readable', 'page', 'at', 'using', 'here', 'look', 'english', 'web', 'model', 'for', 'have', 'years', 'sometimes', 'where', 'popular', 'classical', 'literature', 'bc', 'passage', 'going', 'comes', 'sections', 'de', 'finibus', 'bonorum', 'et', 'malorum', 'cicero', 'this', 'first', 'line', 'reproduced', 'form', 'some', 'there', 'or', 'which', 'you', 'internet', 'what', 'printing', 'industry', 'been', 'industrys', 'ever', 'an', 'unknown', 'printer', 'took', 'galley', 'scrambled', 'make', 'specimen', 'survived', 'only', 'five', 'centuries', 'leap', 'into', 'electronic', 'remaining', 'ess

In [50]:
for index, value in enumerate(sorted_words):
    if index == 10:
        break;
    print(value, word_dict[value])

the 28
of 21
lorem 19
ipsum 19
a 15
it 11
and 10
in 10
is 8
from 8
