## 한글 형태소 분석기
- refer : https://konlpyko.readthedocs.io/ko/v0.6.0/

In [1]:
%pip install konlpy

Collecting konlpy
  Downloading konlpy-0.6.0-py2.py3-none-any.whl (19.4 MB)
                                              0.0/19.4 MB ? eta -:--:--
                                              0.0/19.4 MB ? eta -:--:--
                                              0.3/19.4 MB 9.9 MB/s eta 0:00:02
                                              0.5/19.4 MB 7.2 MB/s eta 0:00:03
     -                                        0.6/19.4 MB 5.6 MB/s eta 0:00:04
     -                                        0.7/19.4 MB 4.7 MB/s eta 0:00:05
     -                                        0.9/19.4 MB 4.4 MB/s eta 0:00:05
     --                                       1.1/19.4 MB 4.5 MB/s eta 0:00:05
     --                                       1.3/19.4 MB 4.4 MB/s eta 0:00:05
     ---                                      1.5/19.4 MB 4.4 MB/s eta 0:00:05
     ---                                      1.7/19.4 MB 4.4 MB/s eta 0:00:05
     ---                                      1.9/19.4 MB 4.3 MB/s et

### Okt : 한글 형태분석기 중 하나

In [2]:
from konlpy.tag import Okt

In [3]:
okt = Okt() # 형태소 분석기 선택

In [4]:
sentence_list = ['아버지가방에들어가신다'
                , '나는 밥을 먹는다.'
                , '하늘을 나는 자동차'
                , '아이폰 기다리다 지쳐 애플공홈에서 언락폰질러버렸다 6+ 128기가실버ㅋ'
                ]

In [5]:
morphs_list = okt.morphs(sentence_list[1]) # 토큰화
morphs_list

['나', '는', '밥', '을', '먹는다', '.']

In [6]:
okt.pos(sentence_list[1]) # 품사 달기

[('나', 'Noun'),
 ('는', 'Josa'),
 ('밥', 'Noun'),
 ('을', 'Josa'),
 ('먹는다', 'Verb'),
 ('.', 'Punctuation')]

In [7]:
okt.nouns(sentence_list[1]) # 명사만 가져오기

['나', '밥']

In [8]:
okt.phrases('애플공홈')

['애플공홈', '애플', '공홈']

### Mecab

In [9]:
!pip install python-mecab-ko

Collecting python-mecab-ko
  Downloading python_mecab_ko-1.3.3-cp311-cp311-win_amd64.whl (814 kB)
                                              0.0/814.0 kB ? eta -:--:--
     ----------                             235.5/814.0 kB 4.8 MB/s eta 0:00:01
     ------------------------               532.5/814.0 kB 6.7 MB/s eta 0:00:01
     ---------------------------------      727.0/814.0 kB 5.1 MB/s eta 0:00:01
     -------------------------------------- 814.0/814.0 kB 4.7 MB/s eta 0:00:00
Collecting python-mecab-ko-dic (from python-mecab-ko)
  Downloading python_mecab_ko_dic-2.1.1.post2-py3-none-any.whl (34.5 MB)
                                              0.0/34.5 MB ? eta -:--:--
                                              0.4/34.5 MB 13.1 MB/s eta 0:00:03
     -                                        0.9/34.5 MB 11.3 MB/s eta 0:00:03
     -                                        1.4/34.5 MB 11.3 MB/s eta 0:00:03
     --                                       1.9/34.5 MB 11.0 MB/s et

In [10]:
from mecab import MeCab

In [11]:
mecab = MeCab()

In [12]:
mecab.morphs(sentence_list[0])

['아버지', '가', '방', '에', '들어가', '신다']

In [13]:
mecab.pos(sentence_list[0])

[('아버지', 'NNG'),
 ('가', 'JKS'),
 ('방', 'NNG'),
 ('에', 'JKB'),
 ('들어가', 'VV'),
 ('신다', 'EP+EC')]

In [14]:
mecab.parse('애플공홈')

[Morpheme(span=Span(start=0, end=2), surface='애플', feature=Feature(pos='NNP', semantic='인명', has_jongseong=True, reading='애플', type=None, start_pos=None, end_pos=None, expression=None)),
 Morpheme(span=Span(start=2, end=4), surface='공홈', feature=Feature(pos='NNG', semantic=None, has_jongseong=True, reading='공홈', type=None, start_pos=None, end_pos=None, expression=None))]

In [16]:
for sentence in sentence_list:
    morphs_list = mecab.morphs(sentence)
    print(morphs_list)

['아버지', '가', '방', '에', '들어가', '신다']
['나', '는', '밥', '을', '먹', '는다', '.']
['하늘', '을', '나', '는', '자동차']
['아이폰', '기다리', '다', '지쳐', '애플', '공홈', '에서', '언락', '폰', '질러', '버렸', '다', '6', '+', '128', '기', '가', '실버', 'ㅋ']
