In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 

plt.style.use('ggplot')

import nltk
nltk.download('punkt')
nltk.download('vader_lexicon')

from nltk.sentiment import SentimentIntensityAnalyzer
from tqdm.notebook import tqdm

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/frankhigley/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/frankhigley/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


initialize sentimentIntensityAnalyser()

In [45]:
sia = SentimentIntensityAnalyzer()

Sentiment analysis function:
- uses tqdm for progress bar
- iterated through all data and if the data is string perform sentiment analysis
- the based of the compounded number the comment is either positive negaive or neutral
- return the sentiment array for each file this array consists of ones, zeros, and negative ones

In [62]:
def sentiment_analysis(data, sia):
   #results
    sent = []

    # run sentiment analysis
    for i, row in tqdm(data.iterrows(), total=len(data)):
        comment = row['Comments']

        #check if comment is a string
        if isinstance(comment, str):
            res = sia.polarity_scores(comment)
            compound = res.get('compound', None)
            if compound is not None:
                if compound > 0:
                    sentiment = 1
                elif compound < 0:
                    sentiment = -1
                else:
                    sentiment = 0

                sent.append(sentiment)    

    return sent

In [63]:
def get_sent_totals(sent):
    pos = 0
    neg = 0
    neu = 0

    for i in sent:
        if i == 1:
            pos += 1
        if i == -1:
            neg += 1
        else:
            neu +=1

    return pos, neu, neg

Runs sentiment analysis on a given file path returns the number of positive, negative, and neutral comments.

In [64]:
def run_sentiment_analysis(path):
    data = pd.read_csv(path)
    sentiment = sentiment_analysis(data, sia)
    numpos, numneu, numneg = get_sent_totals(sentiment)

    return numpos, numneu, numneg


Create data file array

In [65]:
csv_files = ['../data/CleanCSV/bcc_i.csv',
            '../data/CleanCSV/ccc_i.csv',
            '../data/CleanCSV/gcc_i.csv',
            '../data/CleanCSV/mcc_i.csv',
            '../data/CleanCSV/rcc_i.csv',
            '../data/CleanCSV/vcc_i.csv']

Run sentiment analysis on each dataset

In [68]:
beauty_sent_totals = []
contraversial_sent_totals = []
gaming_sent_totals = []
music_sent_totals = []
reaction_sent_totals = []
vlog_sent_totals =[]

blen = len(pd.read_csv(csv_files[0]))
clen = len(pd.read_csv(csv_files[1]))
glen = len(pd.read_csv(csv_files[2]))
mlen = len(pd.read_csv(csv_files[3]))
rlen = len(pd.read_csv(csv_files[4]))
vlen = len(pd.read_csv(csv_files[5]))

beauty_sent_totals = run_sentiment_analysis(csv_files[0])
contraversial_sent_totals= run_sentiment_analysis(csv_files[1])
gaming_sent_totals = run_sentiment_analysis(csv_files[2])
music_sent_totals = run_sentiment_analysis(csv_files[3])
reaction_sent_totals = run_sentiment_analysis(csv_files[4])
vlog_sent_totals = run_sentiment_analysis(csv_files[5])


  0%|          | 0/3136 [00:00<?, ?it/s]

  0%|          | 0/2235 [00:00<?, ?it/s]

  0%|          | 0/33370 [00:00<?, ?it/s]

  0%|          | 0/33501 [00:00<?, ?it/s]

  0%|          | 0/1581 [00:00<?, ?it/s]

  0%|          | 0/24479 [00:00<?, ?it/s]

Print results

In [69]:
print("Beauty:\n \tPositive:" , beauty_sent_totals[0], "\n\tNeutral:", beauty_sent_totals[1], "\n\tNegative:", beauty_sent_totals[2], "\n\tTotal:", blen)
print("Contraversial:\n \tPositive:" , contraversial_sent_totals[0], "\n\tNeutral:", contraversial_sent_totals[1], "\n\tNegative:", contraversial_sent_totals[2], "\n\tTotal:", clen)
print("Gaming:\n \tPositive:" , gaming_sent_totals[0], "\n\tNeutral:", gaming_sent_totals[1], "\n\tNegative:", gaming_sent_totals[2], "\n\tTotal:", glen)
print("Music:\n \tPositive:" , music_sent_totals[0], "\n\tNeutral:", music_sent_totals[1], "\n\tNegative:", music_sent_totals[2], "\n\tTotal:", mlen)
print("Reaction:\n \tPositive:" , reaction_sent_totals[0], "\n\tNeutral:", reaction_sent_totals[1], "\n\tNegative:", reaction_sent_totals[2], "\n\tTotal:", rlen)
print("Vlog:\n \tPositive:" , vlog_sent_totals[0], "\n\tNeutral:", vlog_sent_totals[1], "\n\tNegative:", vlog_sent_totals[2], "\n\tTotal:", vlen)

Beauty:
 	Positive: 1528 
	Neutral: 2843 
	Negative: 293 
	Total: 3136
Contraversial:
 	Positive: 882 
	Neutral: 1995 
	Negative: 240 
	Total: 2235
Gaming:
 	Positive: 14790 
	Neutral: 31543 
	Negative: 1827 
	Total: 33370
Music:
 	Positive: 5706 
	Neutral: 32670 
	Negative: 818 
	Total: 33501
Reaction:
 	Positive: 351 
	Neutral: 1463 
	Negative: 118 
	Total: 1581
Vlog:
 	Positive: 10775 
	Neutral: 22683 
	Negative: 1796 
	Total: 24479
