# Sentiment Analysis Assessment - Solution

## Task #1: Perform vector arithmetic on your own words
Write code that evaluates vector arithmetic on your own set of related words. The goal is to come as close to an expected word as possible.

In [1]:
# Import spaCy and load the language library. Remember to use a larger model!
import spacy
nlp = spacy.load('en_core_web_lg')

In [3]:
# Choose the words you wish to compare, and obtain their vectors
actor = nlp.vocab['actor'].vector
film = nlp.vocab['film'].vector
hospital = nlp.vocab['hospital'].vector

In [4]:
# Import spatial and define a cosine_similarity function
from scipy.spatial.distance import cosine


In [5]:
# Write an expression for vector arithmetic
# For example: new_vector = word1 - word2 + word3
new_vec = actor - film + hospital

In [6]:
# List the top ten closest vectors in the vocabulary to the result of the expression above

similar = []
for x in nlp.vocab.vectors:
    word = nlp.vocab[x]
    if word.has_vector:
        if word.is_lower:
            if word.is_alpha:
                y = 1-cosine(new_vec, word.vector)
                similar.append((word,y))

similar = sorted(similar, key=lambda item: -item[1])
print([w[0].text for w in similar[:10]])

['hospital', 'prehospital', 'hospitalisé', 'hospitalisés', 'hospitalier', 'hospitalisée', 'hospita', 'hospitalisées', 'hospitalist', 'nisi']


#### CHALLENGE: Write a function that takes in 3 strings, performs a-b+c arithmetic, and returns a top-ten result

In [11]:
def vector_math(a,b,c):
    A = nlp.vocab[a].vector
    B = nlp.vocab[b].vector
    C = nlp.vocab[c].vector
    new_vec = A - B + C
    
    similar = []
    for x in nlp.vocab.vectors:
        word = nlp.vocab[x]
        if word.has_vector:
            if word.is_lower:
                if word.is_alpha:
                    y = 1-cosine(new_vec, word.vector)
                    similar.append((word,y))
    similar = sorted(similar, key=lambda item: -item[1])
    res = [w[0].text for w in similar[:10]]
    return res
    
    
    
    
    
    
    
    
    
    

In [12]:
# Test the function on known words:
vector_math('king','man','woman')

['king',
 'kings',
 'princes',
 'consort',
 'princeling',
 'monarch',
 'princelings',
 'princesses',
 'prince',
 'kingship']

## Task #2: Perform VADER Sentiment Analysis on your own review
Write code that returns a set of SentimentIntensityAnalyzer polarity scores based on your own written review.

In [13]:
# Import SentimentIntensityAnalyzer and create an sid object
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()


In [14]:
# Write a review as one continuous string (multiple sentences are ok)
review = 'It is one of the most followed comedy movies of the Bollywood industry. The plot is simple but great. People still enjoys it and laugh at the jokes after so many years.'

In [15]:
# Obtain the sid scores for your review
sid.polarity_scores(review)

{'neg': 0.0, 'neu': 0.582, 'pos': 0.418, 'compound': 0.9656}

### CHALLENGE: Write a function that takes in a review and returns a score of "Positive", "Negative" or "Neutral"

In [16]:
def review_rating(string):
    if sid.polarity_scores(string).get('compound')>0:
        return 'Positive'
    elif sid.polarity_scores(string).get('compound')<0: 
        return 'Negative'
    else: 
        return 'Neutral'
    
    

In [17]:
# Test the function on your review above:
review_rating(review)

'Positive'