In [1]:
import pandas as pd
from nltk.stem import WordNetLemmatizer # change words to root form that are recognized by dictionary (vs. stemmer)
from nltk.tokenize import TreebankWordTokenizer
from nltk.probability import FreqDist
from nltk.corpus import stopwords
import gradio as gr

In [2]:
#Taken from EDHEC Coursera Course
word_list=[]
for sentiment_class in ["Negative","Positive","Uncertainty","Litigious","StrongModal","WeakModal","Constraining"]:
    sentiment_list=pd.read_excel("LM Word List.xlsx",sheet_name=sentiment_class,header=None)
    sentiment_list.columns=["Word"]
    sentiment_list["Word"]=sentiment_list["Word"].str.lower()
    sentiment_list[sentiment_class]=1 #1 if the word exists in the sheet, 0 otherwise
    sentiment_list=sentiment_list.set_index("Word")[sentiment_class] #set the words as the index
    word_list.append(sentiment_list)
word_list=pd.concat(word_list,axis=1,sort=True).fillna(0)

In [3]:
def detect_sentiment(input_text):
    #tokenize
    tokenized_text=TreebankWordTokenizer().tokenize(input_text)
    #lemmatize
    lemmatizer=WordNetLemmatizer()
    lemmatized_text=[lemmatizer.lemmatize(w) for w in tokenized_text]
    #remove stop words, lower string case
    final_words=[w.lower() for w in lemmatized_text if not w in stopwords.words()]
    #get frequency/count
    fdist=FreqDist(final_words)
    #change to dataframe
    df=pd.DataFrame.from_dict(fdist, orient='index')
    df.columns=['freq']
    #get % frequency
    df_percent=df/df.sum()
    
    l=[]
    for sentiment in word_list.columns:
        sentiment_list=word_list[word_list[sentiment]==1].index
        sentiment_freq=df_percent.reindex(sentiment_list).dropna().sum() #use words in dictionary as "index", ignore all other words since no category for those
        l.append(sentiment_freq)
        
    sentiment_table=pd.concat(l,axis=1)
    sentiment_table.columns=word_list.columns
    sentiment_table=(sentiment_table*100).round(2)
    return sentiment_table.T.idxmax().freq +' Sentiment'#returns index of maximum frequency

In [33]:
def detect_sentiment2(input_text):
    #tokenize
    tokenized_text=TreebankWordTokenizer().tokenize(input_text)
    #lemmatize
    lemmatizer=WordNetLemmatizer()
    lemmatized_text=[lemmatizer.lemmatize(w) for w in tokenized_text]
    #remove stop words, lower string case
    final_words=[w.lower() for w in lemmatized_text if not w in stopwords.words()]
    #get frequency/count
    fdist=FreqDist(final_words)
    #change to dataframe
    df=pd.DataFrame.from_dict(fdist, orient='index')
    df.columns=['freq']
    #get % frequency
    df_percent=df/df.sum()
    
    l=[]
    for sentiment in word_list.columns:
        sentiment_list=word_list[word_list[sentiment]==1].index
        sentiment_freq=df_percent.reindex(sentiment_list).dropna().sum() #use words in dictionary as "index", ignore all other words since no category for those
        l.append(sentiment_freq)
        
    sentiment_table=pd.concat(l,axis=1)
    sentiment_table.columns=word_list.columns
    return (sentiment_table*100).round(2)

In [38]:
detect_sentiment("Are you happy or depressed")

'Negative Sentiment'

In [37]:
detect_sentiment2("Are you happy or depressed")

Unnamed: 0,Negative,Positive,Uncertainty,Litigious,StrongModal,WeakModal,Constraining
freq,33.33,33.33,0.0,0.0,0.0,0.0,0.0


In [39]:
sentiment_table=detect_sentiment2("Are you happy or depressed")
sentiment_table

Unnamed: 0,Negative,Positive,Uncertainty,Litigious,StrongModal,WeakModal,Constraining
freq,33.33,33.33,0.0,0.0,0.0,0.0,0.0


In [4]:
gr.Interface(detect_sentiment,'textbox',gr.outputs.Textbox(),title="Basic Sentiment Check",description="The output will produce the main sentiment of the input text. For example, an input text that contains several positive words like \"happy\" or \"great\" will be tagged as positive. However, if it primarily has negative words like \"dangers\" or \"dampened\", it will be tagged as negative. The categories and words used for analyzing the sentiment was taken from Loughran-McDonald Sentiment Word Lists.").launch(share=True)

Running locally at: http://127.0.0.1:7860/
This share link will expire in 24 hours. If you need a permanent link, visit: https://gradio.app/introducing-hosted (NEW!)
Running on External URL: https://36231.gradio.app
Interface loading below...


(<Flask 'gradio.networking'>,
 'http://127.0.0.1:7860/',
 'https://36231.gradio.app')