In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import RegexpTokenizer
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
%matplotlib inline

In [None]:
# import total text
df = pd.read_csv(r'./shakes_text.csv')

In [None]:
df_new = df[df['PlayerLine'].notnull()]
df_new.fillna(0)
df_new.head()

In [None]:
# import total history plays text
df1 = pd.read_csv(r'./hist_text.csv')

In [None]:
# import total comedy plays text
df2 = pd.read_csv(r'./comedy_text.csv')

In [None]:
# import total tragedy plays text
df3 = pd.read_csv(r'./tragedy_text.csv')

**Sentiment Analysis: Complete Theatrical Text**

In [None]:
df_new.columns

In [None]:
df_new.dtypes

In [None]:
# Lowercase all text in 'PlayerLine' series (includes direction & dialogue)
df_new['PlayerLine'].apply(lambda x: " ".join(x.lower() for x in x.split()))
df_new['PlayerLine'].head()

# Non-alphanumeric data removing: number, symbol, emoji, HTML tag…
df_new['PlayerLine'].str.replace(r"\d+", "")
df_new['PlayerLine'].str.replace('[^\w\s]','')
df_new['PlayerLine'].str.replace(r"[︰-＠]", "")
df_new['PlayerLine'].head()

# Punctuation removal (https://stackoverflow.com/questions/39782418/remove-punctuations-in-pandas)
df = df_new['PlayerLine'].str.replace('[^\w\s]','')
df.head()


In [None]:
df.shape, df.dtypes

In [None]:
df.to_frame()
df.dtypes

**Sentiment Analysis: Macbeth**

In [None]:
macbeth = df_new.loc[df_new['Play'].isin(['macbeth'])]
macbeth = macbeth.to_string()

In [None]:
# function to print sentiments (https://www.geeksforgeeks.org/python-sentiment-analysis-using-vader/)

def sentiment_scores(text): 
    sid_obj = SentimentIntensityAnalyzer() 
    sentiment_dict = sid_obj.polarity_scores(text) 
    print("Overall sentiment is : ", sentiment_dict) 
    print("Text was rated as ", sentiment_dict['neg']*100, "% Negative") 
    print("Text was rated as ", sentiment_dict['neu']*100, "% Neutral") 
    print("Text was rated as ", sentiment_dict['pos']*100, "% Positive") 
    print("Text Overall Rated As", end = " ") 

    # decide sentiment as positive, negative and neutral 
    if sentiment_dict['compound'] >= 0.05 : 
        print("Positive") 
    elif sentiment_dict['compound'] <= - 0.05 : 
        print("Negative") 
    else : 
        print("Neutral") 

In [None]:
sentiment_scores(macbeth)

**Sentiment Analysis: Much Ado About Nothing**

In [None]:
much = df_new.loc[df_new['Play'].isin(['Much Ado about nothing'])]
much = much.to_string()

In [None]:
# function to print sentiments (https://www.geeksforgeeks.org/python-sentiment-analysis-using-vader/)

def sentiment_scores(text): 
    sid_obj = SentimentIntensityAnalyzer() 
    sentiment_dict = sid_obj.polarity_scores(text) 
    print("Overall sentiment is : ", sentiment_dict) 
    print("Text was rated as ", sentiment_dict['neg']*100, "% Negative") 
    print("Text was rated as ", sentiment_dict['neu']*100, "% Neutral") 
    print("Text was rated as ", sentiment_dict['pos']*100, "% Positive") 
    print("Text Overall Rated As", end = " ") 

    # decide sentiment as positive, negative and neutral 
    if sentiment_dict['compound'] >= 0.05 : 
        print("Positive") 
    elif sentiment_dict['compound'] <= - 0.05 : 
        print("Negative") 
    else : 
        print("Neutral") 

In [None]:
sentiment_scores(much)

In [None]:
# This person used TextBlob to do a sentiment analysis of Hamlet:
# https://github.com/cyschneck/Billy-Bot
# Choosing my s.a. library: https://www.iflexion.com/blog/sentiment-analysis-python

**Sentiment Analysis: A Winter's Tale**

In [None]:
tale = df_new.loc[df_new['Play'].isin(['A Winters Tale'])]
tale = tale.to_string()

In [None]:
# function to print sentiments (https://www.geeksforgeeks.org/python-sentiment-analysis-using-vader/)

def sentiment_scores(text): 
    sid_obj = SentimentIntensityAnalyzer() 
    sentiment_dict = sid_obj.polarity_scores(text) 
    print("Overall sentiment is : ", sentiment_dict) 
    print("Text was rated as ", sentiment_dict['neg']*100, "% Negative") 
    print("Text was rated as ", sentiment_dict['neu']*100, "% Neutral") 
    print("Text was rated as ", sentiment_dict['pos']*100, "% Positive") 
    print("Text Overall Rated As", end = " ") 

    # decide sentiment as positive, negative and neutral 
    if sentiment_dict['compound'] >= 0.05 : 
        print("Positive") 
    elif sentiment_dict['compound'] <= - 0.05 : 
        print("Negative") 
    else : 
        print("Neutral") 

In [None]:
sentiment_scores(tale)

**Sentiment Analysis: Henry IV**

In [None]:
henry = df_new.loc[df_new['Play'].isin(['Henry IV'])]
henry = henry.to_string()

In [None]:
def sentiment_scores(text): 
    sid_obj = SentimentIntensityAnalyzer() 
    sentiment_dict = sid_obj.polarity_scores(text) 
    print("Overall sentiment is : ", sentiment_dict) 
    print("Text was rated as ", sentiment_dict['neg']*100, "% Negative") 
    print("Text was rated as ", sentiment_dict['neu']*100, "% Neutral") 
    print("Text was rated as ", sentiment_dict['pos']*100, "% Positive") 
    print("Text Overall Rated As", end = " ") 

    # decide sentiment as positive, negative and neutral 
    if sentiment_dict['compound'] >= 0.05 : 
        print("Positive") 
    elif sentiment_dict['compound'] <= - 0.05 : 
        print("Negative") 
    else : 
        print("Neutral") 

In [None]:
sentiment_scores(henry)

**Sentiment Analysis: Merry Wives of Windsor**

In [None]:
merry = df_new.loc[df_new['Play'].isin(['Merry Wives of Windsor'])]
merry = merry.to_string()

In [None]:
def sentiment_scores(text): 
    sid_obj = SentimentIntensityAnalyzer() 
    sentiment_dict = sid_obj.polarity_scores(text) 
    print("Overall sentiment is : ", sentiment_dict) 
    print("Text was rated as ", sentiment_dict['neg']*100, "% Negative") 
    print("Text was rated as ", sentiment_dict['neu']*100, "% Neutral") 
    print("Text was rated as ", sentiment_dict['pos']*100, "% Positive") 
    print("Text Overall Rated As", end = " ") 

    # decide sentiment as positive, negative and neutral 
    if sentiment_dict['compound'] >= 0.05 : 
        print("Positive") 
    elif sentiment_dict['compound'] <= - 0.05 : 
        print("Negative") 
    else : 
        print("Neutral") 

In [None]:
sentiment_scores(merry)

**Sentiment Analysis: Falstaff**

In [None]:
# 1 Henry IV, 2 Henry IV, and The Merry Wives of Windsor

In [None]:
'''Falstaff, generally held to be Shakespeare's greatest comic character, 
appears in three plays: 1 Henry IV, 2 Henry IV, and The Merry Wives of 
Windsor.'''

In [None]:
Falstaff1 = df_new.loc[(df_new['Play'] == 'Henry IV') & (df_new['Player'] == 'Falstaff')]
Falstaff1

In [None]:
df_new.loc[(df_new['Player'] == 'Falstaff')]

In [None]:
sorted(df_new["Player"].unique())

In [None]:
Falstaff1 = df_new.loc[(df_new['Play'] == 'Henry IV') & (df_new['Player'] == 'FALSTAFF')]

In [None]:
Falstaff1 = Falstaff1.to_string()

In [None]:
def sentiment_scores(text): 
    sid_obj = SentimentIntensityAnalyzer() 
    sentiment_dict = sid_obj.polarity_scores(text) 
    print("Overall sentiment is : ", sentiment_dict) 
    print("Text was rated as ", sentiment_dict['neg']*100, "% Negative") 
    print("Text was rated as ", sentiment_dict['neu']*100, "% Neutral") 
    print("Text was rated as ", sentiment_dict['pos']*100, "% Positive") 
    print("Text Overall Rated As", end = " ") 

    # decide sentiment as positive, negative and neutral 
    if sentiment_dict['compound'] >= 0.05 : 
        print("Positive") 
    elif sentiment_dict['compound'] <= - 0.05 : 
        print("Negative") 
    else : 
        print("Neutral") 

In [None]:
sentiment_scores(Falstaff1)

In [None]:
Falstaff2 = df_new.loc[(df_new['Play'] == 'Merry Wives of Windsor') & (df_new['Player'] == 'FALSTAFF')]

In [None]:
Falstaff2 = Falstaff2.to_string()

In [None]:
def sentiment_scores(text): 
    sid_obj = SentimentIntensityAnalyzer() 
    sentiment_dict = sid_obj.polarity_scores(text) 
    print("Overall sentiment is : ", sentiment_dict) 
    print("Text was rated as ", sentiment_dict['neg']*100, "% Negative") 
    print("Text was rated as ", sentiment_dict['neu']*100, "% Neutral") 
    print("Text was rated as ", sentiment_dict['pos']*100, "% Positive") 
    print("Text Overall Rated As", end = " ") 

    # decide sentiment as positive, negative and neutral 
    if sentiment_dict['compound'] >= 0.05 : 
        print("Positive") 
    elif sentiment_dict['compound'] <= - 0.05 : 
        print("Negative") 
    else : 
        print("Neutral") 

In [None]:
sentiment_scores(Falstaff2)

In [4]:
# import total comedy plays text
df_comedy = pd.read_csv(r'./comedy_text.csv')

In [5]:
total_com = df_comedy
total_com = total_com.to_string()

In [None]:
def sentiment_scores(text): 
    sid_obj = SentimentIntensityAnalyzer() 
    sentiment_dict = sid_obj.polarity_scores(text) 
    print("Overall sentiment is : ", sentiment_dict) 
    print("Text was rated as ", sentiment_dict['neg']*100, "% Negative") 
    print("Text was rated as ", sentiment_dict['neu']*100, "% Neutral") 
    print("Text was rated as ", sentiment_dict['pos']*100, "% Positive") 
    print("Text Overall Rated As", end = " ") 

    # decide sentiment as positive, negative and neutral 
    if sentiment_dict['compound'] >= 0.05 : 
        print("Positive") 
    elif sentiment_dict['compound'] <= - 0.05 : 
        print("Negative") 
    else : 
        print("Neutral") 

In [None]:
sentiment_scores(total_com)

In [None]:
total = df_new.to_string()