# 기본 데이터 구조

- 스택과 큐(stack&queue with list)
- 튜플과 집합(tuple & set)
- 사전(dictionary)
- Collection 모듈

## 스택

- 나중에 넣은 데이터를 먼저 반환(LIFO)
- Push / Pop

In [2]:
a = [1,2,3,4,5]
a.append(10) #push
a.append(20) #push
print(a.pop())
print(a.pop())

20
10


In [5]:
#입력된 문자열 역순출력
word = input('input a word : ')
word_list = list(word)
for i in range(len(word_list)) :
    print(word_list.pop())

input a word :  ㄴㅁㅇㄹ


ㄹ
ㅇ
ㅁ
ㄴ


## 큐

- 먼저 넣은 데이터를 먼저 반환(FIFO)

In [6]:
a = [1,2,3,4,5]
a.append(10)
a.append(20)
print(a.pop(0))
print(a.pop(0))

1
2


## 튜플

- 값의 변경이 불가능한 리스트

In [9]:
t = (1,2,3)
print(t+t, t*2)
len(t)
t[1] = 5 # error

(1, 2, 3, 1, 2, 3) (1, 2, 3, 1, 2, 3)


TypeError: 'tuple' object does not support item assignment

## 집합(set)

- 수학에서의 집합과 동일한 연산 가능
- 순서없음
- 중복 불가

In [18]:
s = set([1,2,3,1,2,3])
print(s)

{1, 2, 3}


In [19]:
s.add(1) # 원소추가
s

{1, 2, 3}

In [20]:
s.remove(1) # 원소제거
s

{2, 3}

In [21]:
s.update([1,2,4,5,5,3]) # 원소추가 한번에
s

{1, 2, 3, 4, 5}

In [22]:
s.discard(3) # 원소삭제
s

{1, 2, 4, 5}

In [23]:
s.clear() # 모든 원소 삭제

In [25]:
s1 = set([1,2,3,4,5])
s2 = set([3,4,5,6,7])
print(s1.union(s2)) #합집합
print(s1 | s2) #합집합
print(s1.intersection(s2)) #교집합
print(s1 & s2) #교집합
print(s1.difference(s2)) #차집합
print(s1 - s2)

{1, 2, 3, 4, 5, 6, 7}
{1, 2, 3, 4, 5, 6, 7}
{3, 4, 5}
{3, 4, 5}
{1, 2}
{1, 2}


## 사전(dict)

- key-value 쌍으로 데이터 저장
- hash table

In [26]:
student_info = {99:'d', 33:'a'}
student_info[99]

'd'

In [31]:
country_code = {}
country_code = {"America" : 1, "Korea" : 82, "China" : 86, "Japan" : 81}
print(country_code)
print(country_code.items())
print(country_code.keys())
print(country_code.values())
country_code["German"] = 49
print(country_code)

{'America': 1, 'Korea': 82, 'China': 86, 'Japan': 81}
dict_items([('America', 1), ('Korea', 82), ('China', 86), ('Japan', 81)])
dict_keys(['America', 'Korea', 'China', 'Japan'])
dict_values([1, 82, 86, 81])
{'America': 1, 'Korea': 82, 'China': 86, 'Japan': 81, 'German': 49}


In [32]:
# dict 활용, command analyzer
import csv

def getKey(item) :
    return item[1]

command_data = []
with open('data/command_data.csv', 'r') as csvfile :
    spamreader = csv.reader(csvfile, delimiter=',',quotechar='"')
    for row in spamreader :
        command_data.append(row)
command_counter = {}
for data in command_data :
    if data[1] in command_counter.keys():
        command_counter[data[1]] += 1
    else :
        command_counter[data[1]] = 1

dictlist = []
for key, value in command_counter.items(): 
    temp = [key, value]
    dictlist.append(temp)

sorted_dict = sorted(dictlist, key=getKey, reverse=True)
print(sorted_dict[:100])

[['bookworm', 8500], ['elsa', 7500], ['fillmore', 7394], ['francis', 5978], ['anton_ego', 5819], ['queen_grimhilde', 5000], ['kristoff', 4934], ['brent_mustangburger', 4838], ['emperor_zurg', 4470], ['tarzan', 4193], ['stitch', 3742], ['marlon_the_alligator', 3203], ['faline', 3115], ['meg', 3098], ['fear', 2968], ['roo', 2782], ['claire_wheeler', 2777], ['don_carlton', 2773], ['guido', 2541], ['flynn_rider', 1996], ['mama_odie', 1883], ['darla_sherman', 1861], ['tiger_lily', 1846], ['chick_hicks', 1678], ['louis_the_alligator', 1374], ['the_dodo', 1364], ['ray_the_firefly', 998], ['tigger', 884], ['jane_porter', 852], ['al_mcwhiggin', 777], ['tinker_bell', 696], ['peter_pig', 500], ['rocket_raccoon', 473], ['charlotte_la_bouff', 472], ['peter_pan', 463], ['auto', 458], ['kocoum', 438], ['prince_naveen', 425], ['flik', 424], ['dory', 410], ['bo_peep', 407], ['captain_hook', 403], ['aladdin', 402], ['chatter_telephone', 372], ['django', 371], ['charlie', 363], ['bomb_voyage', 337], ['ri

## collections 

- List, Tuple, Dict에 대한 Python Built-in 확장 자료 구조(모듈)

In [33]:
from collections import deque
from collections import Counter
from collections import OrderedDict
from collections import defaultdict
from collections import namedtuple

### deque

- Stack과 Queue 지원
- List에 비해 효율적인 자료 저장방식 지원

In [35]:
deque_list = deque()
for i in range(5):
    deque_list.append(i) # 우 추가 
print(deque_list)
deque_list.appendleft(10) # 좌 추가
print(deque_list)

deque([0, 1, 2, 3, 4])
deque([10, 0, 1, 2, 3, 4])


- rotate, reverse 등 Linked List의 특성을 지원
- 기존 list 형태의 함수를 모두 지원

In [38]:
# rotate = 오른쪽으로 이동
deque_list.rotate(2)
print(deque_list)
deque_list.rotate(2)
print(deque_list)
print(deque(reversed(deque_list)))

deque([1, 2, 3, 4, 10, 0])
deque([10, 0, 1, 2, 3, 4])
deque([4, 3, 2, 1, 0, 10])


In [40]:
# extend, extendleft
deque_list.extend([5,6,7])
print(deque_list)
deque_list.extendleft([5,6,7])
print(deque_list)

deque([7, 6, 5, 10, 0, 1, 2, 3, 4, 5, 6, 7, 5, 6, 7])
deque([7, 6, 5, 7, 6, 5, 10, 0, 1, 2, 3, 4, 5, 6, 7, 5, 6, 7])


In [41]:
import time

In [43]:
start_time = time.time()
deque_list = deque()
for i in range(10000):
    for j in range(10000):
        deque_list.append(j)
        deque_list.pop()
print(time.time()-start_time, 's')

10.552529096603394 s


In [44]:
start_time = time.time()
just_list = []
for i in range(10000):
    for j in range(10000):
        just_list.append(j)
        just_list.pop()
print(time.time()-start_time, 's')

25.15894889831543 s


### orderedDict

- 순서가 있는 dict

In [46]:
from collections import OrderedDict
d = OrderedDict()
d['x'] = 100
d['y'] = 200
d['z'] = 300
d['l'] = 500

for k, v in d.items():
    print(k,v)

x 100
y 200
z 300
l 500


In [50]:
for k,v in OrderedDict(sorted(d.items(), key=lambda t:t[0])).items():
    print(k,v) # key에 따라 정렬
print()
for k,v in OrderedDict(sorted(d.items(), key=lambda t:t[1])).items():
    print(k,v) # value에 따라 정렬

l 500
x 100
y 200
z 300

x 100
y 200
z 300
l 500


### defaultDict

- dict type에 기본값을 지정, 신규값 생성 시 사용

In [53]:
from collections import defaultdict
d = defaultdict(object)
d = defaultdict(lambda:0)
print(d["first"]) #key값 생성 -> 자동으로 value에 0

0


In [56]:
# 하나의 문장에 단어가 얼마나 들어가있는지 세보자, text-mining 접근법
text = """A press release is the quickest and easiest way to get free publicity. If well written, a press release can result in multiple published articles about your firm and its products. And that can mean new prospects contacting you asking you to sell to them. ....""".lower().split()
print(text)

['a', 'press', 'release', 'is', 'the', 'quickest', 'and', 'easiest', 'way', 'to', 'get', 'free', 'publicity.', 'if', 'well', 'written,', 'a', 'press', 'release', 'can', 'result', 'in', 'multiple', 'published', 'articles', 'about', 'your', 'firm', 'and', 'its', 'products.', 'and', 'that', 'can', 'mean', 'new', 'prospects', 'contacting', 'you', 'asking', 'you', 'to', 'sell', 'to', 'them.', '....']


In [59]:
count_word = defaultdict(lambda:0)
for t in text :
    count_word[t] += 1
for i,v in OrderedDict(sorted(count_word.items(), key=lambda t:t[1],reverse=True)).items():
    print(i,v)

and 3
to 3
a 2
press 2
release 2
can 2
you 2
is 1
the 1
quickest 1
easiest 1
way 1
get 1
free 1
publicity. 1
if 1
well 1
written, 1
result 1
in 1
multiple 1
published 1
articles 1
about 1
your 1
firm 1
its 1
products. 1
that 1
mean 1
new 1
prospects 1
contacting 1
asking 1
sell 1
them. 1
.... 1


### Counter

Sequence type의 data element들의 갯수를 dict 형태로 반환

In [60]:
from collections import Counter

c = Counter()
c = Counter('gallahad')
print(c)

Counter({'a': 3, 'l': 2, 'g': 1, 'h': 1, 'd': 1})


In [61]:
#counter를 list형태로 변환하는 것도 가능하다
c = Counter({'red':4, 'blue':2})
print(c)
print(list(c.elements())) 

Counter({'red': 4, 'blue': 2})
['red', 'red', 'red', 'red', 'blue', 'blue']


In [63]:
c = Counter(cats=4,dogs=8) #string으로 인식
print(c)
print(list(c.elements()))

Counter({'dogs': 8, 'cats': 4})
['cats', 'cats', 'cats', 'cats', 'dogs', 'dogs', 'dogs', 'dogs', 'dogs', 'dogs', 'dogs', 'dogs']


In [67]:
#set 연산 지원
c = Counter(a=4,b=2,c=0,d=-2)
d = Counter(a=1,b=2,c=3,d=4)
c.subtract(d) # c-d
print(c)

Counter({'a': 3, 'b': 0, 'c': -3, 'd': -6})
