# Named Entity Recognition with StanfordNerTagger

NER is a process of locating and classifying elements in the text into pre-defined categories such as the names of a person, an organization, a location,... 

In [11]:
from nltk.tag import StanfordNERTagger
from nltk.tokenize import word_tokenize


st = StanfordNERTagger('/home/jovyan/work/english.conll.4class.distsim.crf.ser.gz',
 '/home/jovyan/work/stanford-ner.jar',
 encoding='utf-8')

text = 'While in France, Christine Lagarde discussed short-term stimulus efforts in a recent interview with the Wall Street Journal.'

tokenized_text = word_tokenize(text)
classified_text = st.tag(tokenized_text)

print(classified_text)

[('While', 'O'), ('in', 'O'), ('France', 'LOCATION'), (',', 'O'), ('Christine', 'PERSON'), ('Lagarde', 'PERSON'), ('discussed', 'O'), ('short-term', 'O'), ('stimulus', 'O'), ('efforts', 'O'), ('in', 'O'), ('a', 'O'), ('recent', 'O'), ('interview', 'O'), ('with', 'O'), ('the', 'O'), ('Wall', 'ORGANIZATION'), ('Street', 'ORGANIZATION'), ('Journal', 'ORGANIZATION'), ('.', 'O')]


# Word2vec

In [12]:
!pip install gensim



In [13]:
import nltk
nltk.download('all-corpora')

[nltk_data] Downloading collection 'all-corpora'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to /home/jovyan/nltk_data...
[nltk_data]    |   Package abc is already up-to-date!
[nltk_data]    | Downloading package alpino to
[nltk_data]    |     /home/jovyan/nltk_data...
[nltk_data]    |   Package alpino is already up-to-date!
[nltk_data]    | Downloading package biocreative_ppi to
[nltk_data]    |     /home/jovyan/nltk_data...
[nltk_data]    |   Package biocreative_ppi is already up-to-date!
[nltk_data]    | Downloading package brown to
[nltk_data]    |     /home/jovyan/nltk_data...
[nltk_data]    |   Package brown is already up-to-date!
[nltk_data]    | Downloading package brown_tei to
[nltk_data]    |     /home/jovyan/nltk_data...
[nltk_data]    |   Package brown_tei is already up-to-date!
[nltk_data]    | Downloading package cess_cat to
[nltk_data]    |     /home/jovyan/nltk_data...
[nltk_data]    |   Package cess_cat is already up-to-date!
[nltk_data]    | Downloading

KeyboardInterrupt: 

In [None]:
nltk.download('punkt')

## Find most similar words to a given word 

In [19]:
import gensim
from nltk.corpus import abc

model= gensim.models.Word2Vec(abc.sents())
X= list(model.wv.vocab)
data=model.wv.most_similar('science')
print(data)

[('law', 0.9421144723892212), ('policy', 0.9247351288795471), ('agriculture', 0.9243161678314209), ('general', 0.9243013262748718), ('media', 0.9220113158226013), ('practice', 0.9166394472122192), ('discussion', 0.9146825671195984), ('reservoir', 0.9133236408233643), ('Cooper', 0.9095713496208191), ('tight', 0.908125638961792)]


## Find similarity between words 

In [20]:
print('Similarity between science and computer: ',model.wv.similarity('science','computer'))
print('Similarity between science and fruit: ',model.wv.similarity('science','fruit'))
print('Similarity between fruit and apple: ',model.wv.similarity('fruit','apple'))
print('Similarity between banana and apple: ',model.wv.similarity('banana','apple'))


Similarity between science and computer:  0.7405981
Similarity between science and fruit:  0.48782697
Similarity between fruit and apple:  0.69224894
Similarity between banana and apple:  0.9594686


# Sentiment Analysis

Read (here: https://github.com/cjhutto/vaderSentiment#about-the-scoring) for more details on VADER scoring methodology. https://www.csc2.ncsu.edu/faculty/healey/tweet_viz/tweet_app/

In [16]:
!pip install vaderSentiment



In [17]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyser = SentimentIntensityAnalyzer()

In [18]:
def sentiment_analyzer_scores(sentence):
    score = analyser.polarity_scores(sentence)
    print("{:-<40} {}".format(sentence, str(score)))
sentiment_analyzer_scores("The phone is super cool.")

The phone is super cool.---------------- {'neg': 0.0, 'neu': 0.326, 'pos': 0.674, 'compound': 0.7351}
