In [52]:
import spacy
from spacytextblob.spacytextblob import SpacyTextBlob
import pandas as pd
import requests
from newspaper import Article
import numpy as np

In [53]:
def sentimentAnalysis(text, dictionary, url):
    if(len(dictionary) == 0):
        dictionary = {
            "URL": [],
            "Sentiment Score": [],
            "Sentiment Label": [],
            "Subjectivity Score": [],
            "Positive Words": [],
            "Negative Words": [],
            "Text": []
            }
    if(text[0:8] != "PARERROR"):
        # Start the sentiment analysis now
        dictionary["URL"].append(url)
        doc = nlp(text)
        sentiment = doc._.blob.polarity
        sentiment = round(sentiment,2)
        subjectivity = doc._.blob.subjectivity
        subjectivity = round(subjectivity,2)

        # Gives positive or negative label
        if sentiment >= 0.033 and sentiment <= 0.043:
            sent_label = "Neutral"
        elif sentiment > 0.043 and sentiment < 0.143:
            sent_label = "Neutral Positive"
        elif sentiment > 0.143:
            sent_label = "Positive"
        elif sentiment < 0.033 and sentiment > -0.062:
            sent_label = "Neutral Negative"
        elif sentiment < -0.062:
            sent_label = "Negative"

        dictionary["Sentiment Label"].append(sent_label)
        dictionary["Sentiment Score"].append(sentiment)
        dictionary["Subjectivity Score"].append(subjectivity)
        dictionary["Text"].append(text)

        positive_words = []
        negative_words = []

        for x in doc._.blob.sentiment_assessments.assessments:
          if x[1] > 0:
            positive_words.append(x[0][0])
          elif x[1] < 0:
            negative_words.append(x[0][0])
          else:
            pass

        dictionary["Positive Words"].append(', '.join(set(positive_words)))
        dictionary["Negative Words"].append(', '.join(set(negative_words)))
    
    else:
        dictionary["URL"].append(url)
        dictionary["Sentiment Label"].append(text)
        dictionary["Sentiment Score"].append(0.0)
        dictionary["Subjectivity Score"].append(0.0)
        dictionary["Text"].append(text)

        positive_words = []
        negative_words = []

        dictionary["Positive Words"].append(', '.join(set(positive_words)))
        dictionary["Negative Words"].append(', '.join(set(negative_words)))
    
    return dictionary

# Topic Level Sentiment

In [54]:
# def get_sentences(doc):
#     return doc.sents

# def get_sentence_list(doc):
#     return [sent for sent in doc.sents]

# #Takes a doc object from spacy and returns a tuple list of form (sentence, sentiment of sentence) for all sentences
# def sentence_sentiment_from_doc(doc):
#     sentences = get_sentences(doc)
#     tuple_list = []
#     for sentence in sentences:
#         sent_doc = nlp(sentence.text)
#         tuple_list.append((sentence.text,sent_doc._.blob.polarity))
#     return tuple_list

# #takes in a single topic word, the word's weight, and the doc, and returns sentiment of that word within the doc
# def sentence_level_sentiment_of_word(word, weight, doc):
#     sentence_sentiment_list = sentence_sentiment_from_doc(doc)
#     sentiment_total = 0
    
#     for (sentence, sentiment) in sentence_sentiment_list:
#              if sentence.find(word) != -1:
#                     #print(name,sentence)
#                     sentiment_total += sentiment
#     return sentiment_total

# def topic_level_sentiment(ldamodel):
#     my_dict = {'Topic_' + str(i): [token for token, score in ldamodel.show_topic(i, topn=10)] for i in range(0, ldamodel.num_topics)}
    
#     return my_dict

#returns a dictionary of all topics, with all their associated topic words in the form {Topic: [words]}
def create_topic_words_dict(ldamodel):
    my_dict = {'Topic_' + str(i): [token for token, score in ldamodel.show_topic(i, topn=10)] for i in range(0, ldamodel.num_topics)}
    
    return my_dict

#returns all sentences in a document as a list
def get_sentences(doc):
    return doc.sents

#Takes a doc object from spacy and returns a tuple list of form (sentence, sentiment of sentence) for all sentences
def sentence_sentiment_from_doc(doc):
    sentences = get_sentences(doc)
    tuple_list = []
    for sentence in sentences:
        sent_doc = nlp(sentence.text)
        tuple_list.append((sentence.text,sent_doc._.blob.polarity)) #list of tuples of form [(text, sentiment)]
    return tuple_list

#Returns an average sentiment score of all topics for a single document
def sentence_sentiment_on_topics(doc, topic_list, df_topics):
    sentence_sentiment_list = sentence_sentiment_from_doc(doc) #get all sentences and their sentiment
    score_list = []
    return_dict = {}
    
    for key in topic_list: #for every topic
        for topic in df_topics: # every topic within our current article
            print(key[-1])
            print(topic[0])
            print(type(key[-1]))
            print(type(topic[0]))
            if int(key[-1]) == topic[0]: # Only does sentiment on topics that are part of the related topics
                print("HERE")
                for word in topic_list[key]: #for every word in that topic
                    for sentence, sentiment in sentence_sentiment_list:
                         if sentence.find(word) != -1: #if the word is in that sentence we add the sentiment value
                                score_list.append(sentiment)
                if not score_list:
                    return_dict[key] = 0
                else:
                    return_dict[key] = sum(score_list) / len(score_list) #average of all sentence sentiments for topic
    
    return return_dict

def topic_sentence_sentiment_analysis(df, LDA_model, corpus):
    #cleaneddf = drop_failed_webscraping_rows(df)
    #LDA_model, corpus = create_lda_model(cleaneddf, 20, 5, 5)

    topicSentDic = {}
    for x in range(len(df["URL"])): #for every article
        page_text = df.iloc[x]["Text"]
        df_topics = df.iloc[x]["Topics"]
        tempdoc = nlp(page_text) #gather page text and transform into doc object
        topic_list = create_topic_words_dict(LDA_model) #list of topics and their words
        temp = sentence_sentiment_on_topics(tempdoc,topic_list, df_topics) #dictionary of all topics and their average sentiment for the article
        topicSentDic[df.iloc[x]["URL"]] = temp #append sentiment dict
    
    return topicSentDic