In [1]:
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

### Wordnet Synset 및 SentiWordnet Sentisynset 클래스

In [2]:
# corpus는 말뭉치라는 의미

from nltk.corpus import wordnet

term = 'present'
synsets = wordnet.synsets(term)

In [3]:
type(synsets), len(synsets)

(list, 18)

In [4]:
print(synsets)

[Synset('present.n.01'), Synset('present.n.02'), Synset('present.n.03'), Synset('show.v.01'), Synset('present.v.02'), Synset('stage.v.01'), Synset('present.v.04'), Synset('present.v.05'), Synset('award.v.01'), Synset('give.v.08'), Synset('deliver.v.01'), Synset('introduce.v.01'), Synset('portray.v.04'), Synset('confront.v.03'), Synset('present.v.12'), Synset('salute.v.06'), Synset('present.a.01'), Synset('present.a.02')]


In [5]:
for synset in synsets[:5]:
    print(f'#### name: {synset.name()} ####')
    print('POS :', synset.lexname())
    print('정의 :', synset.definition())
    print('표제어 :', synset.lemma_names())

#### name: present.n.01 ####
POS : noun.time
정의 : the period of time that is happening now; any continuous stretch of time including the moment of speech
표제어 : ['present', 'nowadays']
#### name: present.n.02 ####
POS : noun.possession
정의 : something presented as a gift
표제어 : ['present']
#### name: present.n.03 ####
POS : noun.communication
정의 : a verb tense that expresses actions or states at the time of speaking
표제어 : ['present', 'present_tense']
#### name: show.v.01 ####
POS : verb.perception
정의 : give an exhibition of to an interested audience
표제어 : ['show', 'demo', 'exhibit', 'present', 'demonstrate']
#### name: present.v.02 ####
POS : verb.communication
정의 : bring forward and present to the mind
표제어 : ['present', 'represent', 'lay_out']


* 어휘간의 유사도

In [6]:
# 단어를 입력할 때는 synsets()

for synset in wordnet.synsets('tiger'):
    print(synset.name(), synset.definition())

tiger.n.01 a fierce or audacious person
tiger.n.02 large feline of forests in most of Asia having a tawny coat with black stripes; endangered


In [15]:
# 단어, 품사를 아는 경우에는 synset()

tiger = wordnet.synset('tiger.n.02')
tree = wordnet.synset('tree.n.01')
lion = wordnet.synset('lion.n.01')
cat = wordnet.synset('cat.n.01')
dog = wordnet.synset('dog.n.01')
tire = wordnet.synset('tire.n.01')

In [16]:
wordnet.synsets('tire')[0].definition()

'hoop that covers a wheel'

In [19]:
# 단어 간의 유사도

tiger.path_similarity(lion), tiger.path_similarity(dog), tiger.path_similarity(tree)

(0.3333333333333333, 0.16666666666666666, 0.07142857142857142)

In [20]:
# 6개 단어 간의 유사도

similarities = []
entities = [tree, lion, tiger, cat, dog, tire]
for entity in entities:
    similarity = [entity.path_similarity(another) for another in entities]
    similarities.append(similarity)

similarities

[[1.0,
  0.07142857142857142,
  0.07142857142857142,
  0.07692307692307693,
  0.125,
  0.08333333333333333],
 [0.07142857142857142,
  1.0,
  0.3333333333333333,
  0.25,
  0.16666666666666666,
  0.058823529411764705],
 [0.07142857142857142,
  0.3333333333333333,
  1.0,
  0.25,
  0.16666666666666666,
  0.058823529411764705],
 [0.07692307692307693, 0.25, 0.25, 1.0, 0.2, 0.0625],
 [0.125,
  0.16666666666666666,
  0.16666666666666666,
  0.2,
  1.0,
  0.09090909090909091],
 [0.08333333333333333,
  0.058823529411764705,
  0.058823529411764705,
  0.0625,
  0.09090909090909091,
  1.0]]

In [21]:
df = pd.DataFrame(similarities, columns = ['tree', 'lion', 'tiger', 'cat', 'dog', 'tire'])
df

Unnamed: 0,tree,lion,tiger,cat,dog,tire
0,1.0,0.071429,0.071429,0.076923,0.125,0.083333
1,0.071429,1.0,0.333333,0.25,0.166667,0.058824
2,0.071429,0.333333,1.0,0.25,0.166667,0.058824
3,0.076923,0.25,0.25,1.0,0.2,0.0625
4,0.125,0.166667,0.166667,0.2,1.0,0.090909
5,0.083333,0.058824,0.058824,0.0625,0.090909,1.0


* SentiSynset 객체

In [22]:
from nltk.corpus import sentiwordnet as swn

# 이 경우 list를 붙여줘야 한다
senti_synsets = list(swn.senti_synsets('slow'))

In [23]:
print(type(senti_synsets))
print(len(senti_synsets))
print(senti_synsets)

<class 'list'>
11
[SentiSynset('decelerate.v.01'), SentiSynset('slow.v.02'), SentiSynset('slow.v.03'), SentiSynset('slow.a.01'), SentiSynset('slow.a.02'), SentiSynset('dense.s.04'), SentiSynset('slow.a.04'), SentiSynset('boring.s.01'), SentiSynset('dull.s.08'), SentiSynset('slowly.r.01'), SentiSynset('behind.r.03')]


In [25]:
senti_synsets = list(swn.senti_synsets('father'))
print(type(senti_synsets))
print(len(senti_synsets))
print(senti_synsets)

<class 'list'>
9
[SentiSynset('father.n.01'), SentiSynset('forefather.n.01'), SentiSynset('father.n.03'), SentiSynset('church_father.n.01'), SentiSynset('father.n.05'), SentiSynset('father.n.06'), SentiSynset('founder.n.02'), SentiSynset('don.n.03'), SentiSynset('beget.v.01')]


In [26]:
# father 단어의 긍정 감성지수, 부정 감성지수, 객관성 지수(중립 감성지수)

father = swn.senti_synset('father.n.01')
father.pos_score(), father.neg_score(), father.obj_score()

(0.0, 0.0, 1.0)

In [27]:
# mother 단어의 긍정 감성지수, 부정 감성지수, 객관성 지수(중립 감성지수)

mother = swn.senti_synset('mother.n.01')
mother.pos_score(), mother.neg_score(), mother.obj_score()

(0.0, 0.0, 1.0)

In [29]:
# fabulous 단어의 긍정 감성지수, 부정 감성지수, 객관성 지수(중립 감성지수)

fabulous = swn.senti_synset('fabulous.a.01')
fabulous.pos_score(), fabulous.neg_score(), fabulous.obj_score()

(0.875, 0.125, 0.0)

In [31]:
# precisely 단어의 긍정 감성지수, 부정 감성지수, 객관성 지수(중립 감성지수)

precisely = swn.senti_synset('precisely.r.01')
precisely.pos_score(), precisely.neg_score(), precisely.obj_score()

(0.125, 0.0, 0.875)

In [32]:
# like 단어의 긍정 감성지수, 부정 감성지수, 객관성 지수(중립 감성지수)

like = swn.senti_synset('like.v.01')
like.pos_score(), like.neg_score(), like.obj_score()

(0.125, 0.0, 0.875)

In [33]:
wordnet.NOUN, wordnet.ADJ, wordnet.ADV, wordnet.VERB

('n', 'a', 'r', 'v')

* 감성지수 계산

In [34]:
from nltk import word_tokenize, pos_tag

sentence = "It's good to see you again."
word_list = word_tokenize(sentence)
word_list

['It', "'s", 'good', 'to', 'see', 'you', 'again', '.']

In [35]:
pos_tag(word_list)

[('It', 'PRP'),
 ("'s", 'VBZ'),
 ('good', 'JJ'),
 ('to', 'TO'),
 ('see', 'VB'),
 ('you', 'PRP'),
 ('again', 'RB'),
 ('.', '.')]