# Sentiment Analysis
This file is used for performing sentiment analysis, both on an article as a whole, and on sentences that contain the main topic(s) related to that article

Spacy: Used for NLP and has the machine learning module

SpacyTextBlob: Used for the sentiment analysis

Pandas: Stores the data as a dataframe table

NewsPaper: Used for web scraping

Requests: Makes the connection to the URL

In [None]:
import spacy
from spacytextblob.spacytextblob import SpacyTextBlob
import pandas as pd
import requests
from newspaper import Article
import numpy as np

# Main Pipeline Function

In [1]:
def main_pipeline_sentiment_analysis(urls):
    # Loops through our URLS and scraps the data
    # Put all empty dictionaries here
    sentimentDic = {}

    for count, x in enumerate(urls):
        if(count % 10 == 0): #layman's way of showing progress
            print(str(count))

        url = x #the url of the article we want to webscrape and analyze

        # Send the URL to get scraped, returning the text of the article
        page_text = scrapeData(x)

        # Runs sentiment analysis. Will need to make a new function and a new dictionary
        # for each type of analysis we want to run. Will pass in the page_text, the dic, and
        # x (the url)
        sentimentDic = sentimentAnalysis(page_text, sentimentDic, url)


    # For each analysis we run we need to then convert that dictionary with the following method
    df = dictionaryToDataFrame(sentimentDic)
    
    #Clean dataframe by dropping all rows that failed webscraping
    df = drop_failed_webscraping_rows(df)
    
    return df

# Article Level Sentiment Analysis
This code block is used to performe sentiment analysis on the entire document. Analysis is done with TextBlob with the help of Spacy.

In [None]:
def sentimentAnalysis(text, dictionary, url):
    if(len(dictionary) == 0):
        dictionary = {
            "URL": [],
            "Sentiment Score": [],
            "Sentiment Label": [],
            "Subjectivity Score": [],
            "Positive Words": [],
            "Negative Words": [],
            "Text": []
            }
        
    # If there was an error while parsing the document we will not do any sentiment analysis
    # on the article text.
    if(text[0:8] != "PARERROR"):
        # Start the sentiment analysis now
        dictionary["URL"].append(url)
        doc = nlp(text)
        
        # Get's sentiment and subjectivity
        sentiment = doc._.blob.polarity
        sentiment = round(sentiment,2)
        subjectivity = doc._.blob.subjectivity
        subjectivity = round(subjectivity,2)

        # Gives positive or negative label
        if sentiment >= 0.033 and sentiment <= 0.043:
            sent_label = "Neutral"
        elif sentiment > 0.043 and sentiment < 0.143:
            sent_label = "Neutral Positive"
        elif sentiment > 0.143:
            sent_label = "Positive"
        elif sentiment < 0.033 and sentiment > -0.062:
            sent_label = "Neutral Negative"
        elif sentiment < -0.062:
            sent_label = "Negative"
    
        # Appending labels to the dictionary
        dictionary["Sentiment Label"].append(sent_label)
        dictionary["Sentiment Score"].append(sentiment)
        dictionary["Subjectivity Score"].append(subjectivity)
        dictionary["Text"].append(text)

        positive_words = []
        negative_words = []
    
        # Creating a list of positive and negative words
        for x in doc._.blob.sentiment_assessments.assessments:
          if x[1] > 0:
            positive_words.append(x[0][0])
          elif x[1] < 0:
            negative_words.append(x[0][0])
          else:
            pass

        dictionary["Positive Words"].append(', '.join(set(positive_words)))
        dictionary["Negative Words"].append(', '.join(set(negative_words)))
    
    # Hits if there was a scrapping error
    else:
        dictionary["URL"].append(url)
        dictionary["Sentiment Label"].append(text)
        dictionary["Sentiment Score"].append(0.0)
        dictionary["Subjectivity Score"].append(0.0)
        dictionary["Text"].append(text)

        positive_words = []
        negative_words = []

        dictionary["Positive Words"].append(', '.join(set(positive_words)))
        dictionary["Negative Words"].append(', '.join(set(negative_words)))
    
    return dictionary

# Topic Level Sentiment
This code block is used to preform the sentiment analysis based on the topic word(s) of an article. It will perform sentiment on the sentences that contain the topic(s).

In [None]:

#returns a dictionary of all topics, with all their associated topic words in the form {Topic: [words]}
def create_topic_words_dict(ldamodel):
    my_dict = {'Topic_' + str(i): [token for token, score in ldamodel.show_topic(i, topn=10)] for i in range(0, ldamodel.num_topics)}
    
    return my_dict

#returns all sentences in a document as a list
def get_sentences(doc):
    return doc.sents

#Takes a doc object from spacy and returns a tuple list of form (sentence, sentiment of sentence) for all sentences
def sentence_sentiment_from_doc(doc):
    sentences = get_sentences(doc)
    tuple_list = []
    for sentence in sentences:
        sent_doc = nlp(sentence.text)
        tuple_list.append((sentence.text,sent_doc._.blob.polarity)) #list of tuples of form [(text, sentiment)]
    return tuple_list

#Returns an average sentiment score of all topics for a single document
def sentence_sentiment_on_topics(doc, topic_list):
    sentence_sentiment_list = sentence_sentiment_from_doc(doc) #get all sentences and their sentiment
    score_list = []
    return_dict = {}
    
    for key in topic_list: #for every topic
        for word in topic_list[key]: #for every word in that topic
            for sentence, sentiment in sentence_sentiment_list:
                 if sentence.find(word) != -1: #if the word is in that sentence we add the sentiment value
                        score_list.append(sentiment)
        if not score_list:
            return_dict[key] = 0
        else:
            return_dict[key] = sum(score_list) / len(score_list) #average of all sentence sentiments for topic
    
    return return_dict

def topic_sentence_sentiment_analysis(df, LDA_model, corpus):
    #cleaneddf = drop_failed_webscraping_rows(df)
    #LDA_model, corpus = create_lda_model(cleaneddf, 20, 5, 5)

    topicSentDic = {}
    for x in range(len(df["URL"])): #for every article
        page_text = df.iloc[x]["Text"]
        tempdoc = nlp(page_text) #gather page text and transform into doc object
        topic_list = create_topic_words_dict(LDA_model) #list of topics and their words
        temp = sentence_sentiment_on_topics(tempdoc,topic_list) #dictionary of all topics and their average sentiment for the article
        topicSentDic[df.iloc[x]["URL"]] = temp #append sentiment dict
    
    return topicSentDic

In [None]:
# Pass in a word and a document and get back the average sentiment of that word for the document.
def word_sentiment_per_doc(word, text):
    doc = nlp(text)
    sentence_sentiment_list = sentence_sentiment_from_doc(doc) # get all sentences and their sentiment
    word_score = 0
    total_appearences = 0
    
    for sentence, sentiment in sentence_sentiment_list:
        if sentence.find(word) != -1: #if the word is in that sentence we add the sentiment value
            word_score += sentiment 
            total_appearences += 1
    
    if total_appearences == 0:
        return None
    
    word_sentiment = word_score / total_appearences
    return word_sentiment

# takes the relavent topics with the text for the document
def topic_sentiment_per_doc(topics, text):
    topic_sentiment_df = []
    
    for i, topic_tuple in enumerate(topics):
        topic_id, topic = topic_tuple
        # For each topic
        weighted_topic_sentiment = 0
        for word, score in topic:
            # For each word in a topic
            # Multiply the relavence by the sentiment to get a weighted sentiment
            word_sentiment = word_sentiment_per_doc(word, text)
            if word_sentiment != None:
                weighted_word_sentiment = score * word_sentiment
                weighted_topic_sentiment += weighted_word_sentiment
        topic_sentiment_df.append((topic_id, weighted_topic_sentiment))
        
    return topic_sentiment_df