In [43]:
# Importing Dependencies and Setup
# We don't know what we need, so we'll import a bunch of different modules
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import requests
import json
import time
from scipy.stats import linregress

In [44]:
# Load CSV file and turn it into PD DataFrames
goOne_df = pd.read_csv("Resources/goemotions_1.csv")
goTwo_df = pd.read_csv("Resources/goemotions_2.csv")
goThree_df = pd.read_csv("Resources/goemotions_3.csv")

In [45]:
# Merge the three separate DataFrames into one.
dfMerge = goOne_df.merge(goTwo_df, how='outer')
dfMerge = dfMerge.merge(goThree_df, how='outer')

# Convert column into DateTime datatype, and apply the correct unit (seconds) to turn it readable.
dfMerge['created_utc'] = dfMerge['created_utc'].astype('datetime64[s]')
dfMerge

Unnamed: 0,text,id,author,subreddit,link_id,parent_id,created_utc,rater_id,example_very_unclear,admiration,...,love,nervousness,optimism,pride,realization,relief,remorse,sadness,surprise,neutral
0,That game hurt.,eew5j0j,Brdd9,nrl,t3_ajis4z,t1_eew18eq,2019-01-25 01:50:39,1,False,0,...,0,0,0,0,0,0,0,1,0,0
1,>sexuality shouldn’t be a grouping category I...,eemcysk,TheGreen888,unpopularopinion,t3_ai4q37,t3_ai4q37,2019-01-21 15:22:49,37,True,0,...,0,0,0,0,0,0,0,0,0,0
2,"You do right, if you don't care then fuck 'em!",ed2mah1,Labalool,confessions,t3_abru74,t1_ed2m7g7,2019-01-02 11:15:44,37,False,0,...,0,0,0,0,0,0,0,0,0,1
3,Man I love reddit.,eeibobj,MrsRobertshaw,facepalm,t3_ahulml,t3_ahulml,2019-01-20 06:17:34,18,False,0,...,1,0,0,0,0,0,0,0,0,0
4,"[NAME] was nowhere near them, he was by the Fa...",eda6yn6,American_Fascist713,starwarsspeculation,t3_ackt2f,t1_eda65q2,2019-01-05 06:10:01,2,False,0,...,0,0,0,0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
211220,Everyone likes [NAME].,ee6pagw,Senshado,heroesofthestorm,t3_agjf24,t3_agjf24,2019-01-16 10:25:22,16,False,0,...,1,0,0,0,0,0,0,0,0,0
211221,Well when you’ve imported about a gazillion of...,ef28nod,5inchloser,nottheonion,t3_ak26t3,t3_ak26t3,2019-01-27 01:39:42,15,False,0,...,0,0,0,0,0,0,0,0,0,0
211222,That looks amazing,ee8hse1,springt1me,shittyfoodporn,t3_agrnqb,t3_agrnqb,2019-01-17 00:08:54,70,False,1,...,0,0,0,0,0,0,0,0,0,0
211223,The FDA has plenty to criticize. But like here...,edrhoxh,enamedata,medicine,t3_aejqzd,t1_edrgdtx,2019-01-11 01:07:12,4,False,0,...,0,0,0,0,0,0,0,0,0,0


In [46]:
# Clean DataFrame by: 1.# Dropping columns, 2.) Choosing "Clear" selections, 3.) Dropping the "example_very_unclear" column after selection
dfStaging = dfMerge.drop(columns=['id', 'author', 'link_id', 'parent_id'])
dfStagingA = dfStaging.loc[(dfStaging['example_very_unclear'] == False)]
dfClean = dfStagingA.drop(columns=['example_very_unclear'])
dfClean
# dfClean will have "clear" examples

Unnamed: 0,text,subreddit,created_utc,rater_id,admiration,amusement,anger,annoyance,approval,caring,...,love,nervousness,optimism,pride,realization,relief,remorse,sadness,surprise,neutral
0,That game hurt.,nrl,2019-01-25 01:50:39,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,"You do right, if you don't care then fuck 'em!",confessions,2019-01-02 11:15:44,37,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,Man I love reddit.,facepalm,2019-01-20 06:17:34,18,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,"[NAME] was nowhere near them, he was by the Fa...",starwarsspeculation,2019-01-05 06:10:01,2,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
5,Right? Considering it’s such an important docu...,TrueReddit,2019-01-23 21:50:08,61,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
211219,"Well, I'm glad you're out of all that now. How...",raisedbynarcissists,2019-01-04 15:22:34,2,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
211220,Everyone likes [NAME].,heroesofthestorm,2019-01-16 10:25:22,16,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
211221,Well when you’ve imported about a gazillion of...,nottheonion,2019-01-27 01:39:42,15,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
211222,That looks amazing,shittyfoodporn,2019-01-17 00:08:54,70,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [47]:
# Clean DataFrame by: 1.# Dropping columns, 2.) Choosing "Unclear" selections, 3.) Dropping the "example_very_unclear" column after selection
# Same as above; 
dfStagingB = dfStaging.loc[(dfStaging['example_very_unclear'] == True)]
dfEVUClean = dfStagingB.drop(columns=['example_very_unclear'])
dfEVUClean
# dfEVUClean will have "unclear" examples

Unnamed: 0,text,subreddit,created_utc,rater_id,admiration,amusement,anger,annoyance,approval,caring,...,love,nervousness,optimism,pride,realization,relief,remorse,sadness,surprise,neutral
1,>sexuality shouldn’t be a grouping category I...,unpopularopinion,2019-01-21 15:22:49,37,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
62,Oi us cancers don’t want him either! We respec...,Justfuckmyshitup,2019-01-13 22:13:56,22,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
195,I unfortunately can not afford a lawyer. Lucki...,Divorce,2019-01-01 17:28:15,12,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
262,OMG THOSE TINY SHOES! *desire to boop snoot in...,chicago,2019-01-20 05:36:33,61,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
264,I would pay money for [NAME] to not be my nurs...,SoulCalibur,2019-01-17 03:50:29,22,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
211037,_____ is ruining _____ because kids these days.,moviescirclejerk,2019-01-10 23:51:59,57,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
211082,"yea I agree, and [NAME] is way younger so has ...",Dodgers,2019-01-04 22:09:22,37,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
211113,"Huh, assumed he was more of a grindr fella .....",90dayfianceuncensored,2019-01-30 23:05:53,61,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
211214,Well when you’ve imported about a gazillion of...,nottheonion,2019-01-27 01:39:42,61,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [48]:
dfEVUTrue = dfMerge.loc[dfMerge['example_very_unclear'] == True]

In [49]:
dfEmotions = dfClean

In [50]:
# applying Numpy.select to the clean dataframe which will scan selected columns based on conditions and output the sentiments for each emotion selected
emo_conditions = [
    (dfEmotions['amusement'] == 1),
    (dfEmotions['excitement'] == 1),
    (dfEmotions['joy'] == 1),
    (dfEmotions['love'] == 1),
    (dfEmotions['desire'] == 1),
    (dfEmotions['optimism'] == 1),
    (dfEmotions['caring'] == 1),
    (dfEmotions['pride'] == 1),
    (dfEmotions['admiration'] == 1),
    (dfEmotions['gratitude'] == 1),
    (dfEmotions['relief'] == 1),
    (dfEmotions['approval'] == 1),
    (dfEmotions['realization'] == 1),
    (dfEmotions['surprise'] == 1),
    (dfEmotions['curiosity'] == 1),
    (dfEmotions['confusion'] == 1),
    (dfEmotions['fear'] == 1),
    (dfEmotions['nervousness'] == 1),
    (dfEmotions['remorse'] == 1),
    (dfEmotions['embarrassment'] == 1),
    (dfEmotions['disappointment'] == 1),
    (dfEmotions['sadness'] == 1),
    (dfEmotions['grief'] == 1),
    (dfEmotions['disgust'] == 1),
    (dfEmotions['anger'] == 1),
    (dfEmotions['annoyance'] == 1),
    (dfEmotions['disapproval'] == 1),
    (dfEmotions['neutral'] == 1)
]

sentiments = ['Positive', 'Positive', 'Positive', 'Positive', 'Positive', 'Positive', 'Positive',
              'Positive', 'Positive', 'Positive', 'Positive', 'Positive',
               'Ambiguous', 'Ambiguous','Ambiguous','Ambiguous',
               'Negative', 'Negative', 'Negative', 'Negative', 'Negative', 'Negative', 'Negative',
               'Negative', 'Negative', 'Negative', 'Negative', 'Neutral']

dfEmotions['Sentiment'] = np.select(emo_conditions, sentiments)
dfEmotions.head(5)

Unnamed: 0,text,subreddit,created_utc,rater_id,admiration,amusement,anger,annoyance,approval,caring,...,nervousness,optimism,pride,realization,relief,remorse,sadness,surprise,neutral,Sentiment
0,That game hurt.,nrl,2019-01-25 01:50:39,1,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,Negative
2,"You do right, if you don't care then fuck 'em!",confessions,2019-01-02 11:15:44,37,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,Neutral
3,Man I love reddit.,facepalm,2019-01-20 06:17:34,18,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Positive
4,"[NAME] was nowhere near them, he was by the Fa...",starwarsspeculation,2019-01-05 06:10:01,2,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,Neutral
5,Right? Considering it’s such an important docu...,TrueReddit,2019-01-23 21:50:08,61,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Positive


In [51]:
# Adding another column to display the emotion chosen by the rater
emo_map_yin = [
    (dfEmotions['amusement'] == 1),
    (dfEmotions['excitement'] == 1),
    (dfEmotions['joy'] == 1),
    (dfEmotions['love'] == 1),
    (dfEmotions['desire'] == 1),
    (dfEmotions['optimism'] == 1),
    (dfEmotions['caring'] == 1),
    (dfEmotions['pride'] == 1),
    (dfEmotions['admiration'] == 1),
    (dfEmotions['gratitude'] == 1),
    (dfEmotions['relief'] == 1),
    (dfEmotions['approval'] == 1),
    (dfEmotions['realization'] == 1),
    (dfEmotions['surprise'] == 1),
    (dfEmotions['curiosity'] == 1),
    (dfEmotions['confusion'] == 1),
    (dfEmotions['fear'] == 1),
    (dfEmotions['nervousness'] == 1),
    (dfEmotions['remorse'] == 1),
    (dfEmotions['embarrassment'] == 1),
    (dfEmotions['disappointment'] == 1),
    (dfEmotions['sadness'] == 1),
    (dfEmotions['grief'] == 1),
    (dfEmotions['disgust'] == 1),
    (dfEmotions['anger'] == 1),
    (dfEmotions['annoyance'] == 1),
    (dfEmotions['disapproval'] == 1),
    (dfEmotions['neutral'] == 1)
]

emo_map_yang = ['Amusement', 'Excitement', 'Joy', 'Love', 'Desire', 'Optimism', 'Caring',
              'Pride', 'Admiration', 'Gratitude', 'Relief', 'Approval',
               'Realization', 'Surprise','Curiosity','Confusion',
               'Fear', 'Nervousness', 'Remorse', 'Embarrassment', 'Disappointment', 'Sadness', 'Grief',
               'Disgust', 'Anger', 'Annoyance', 'Disapproval', 'Neutral']
dfEmotions['Emotion'] = np.select(emo_map_yin, emo_map_yang)

In [52]:
dfEClean = dfEmotions.copy()
dfEClean.drop(columns = ['amusement', 'excitement', 'joy', 'love', 'desire', 'optimism', 'caring',\
                                                'pride', 'admiration', 'gratitude', 'relief', 'approval',\
                                                'realization', 'surprise','curiosity','confusion',\
                                                'fear', 'nervousness', 'remorse', 'embarrassment', 'disappointment', 'sadness', 'grief',\
                                                'disgust', 'anger', 'annoyance', 'disapproval', 'neutral'], inplace=True)

In [53]:
# Importing "CountsSubredditCat.csv" into a DataFrame, check pathing for errors!
SubredditCatDF = pd.read_csv("Resources/CountsSubredditCat.csv")

# Clean by dropping columns and renaming
SubredditCatClean = SubredditCatDF.drop(columns = ['Subscribers', 'Responses','Notes', '77', 'Discussion', '"General" discussion'])
SubredditCatClean.rename(columns={"Subreddit": "subreddit"}, inplace=True)
SubredditCatClean

# Merge cleaned catDataFrame into main DataFrame
dfCleanStaging = dfEClean.merge(SubredditCatClean)

# Rearranging columns by placing subreddit next to subgroup
dfCleanStagingA = dfCleanStaging[['text', 'subreddit', 'Grouping', 'created_utc', 'rater_id', 'Sentiment', 'Emotion']]

# Renaming titles for readability
dfCleanStagingA.rename(columns = {"text":"Text", "subreddit": "Subreddit", "Grouping":"Group", "created_utc":"UTC", "rater_id":"Rater ID"}, inplace=True)

# Moving "Group" column adjacent to "Subreddit"
dfCleanCat = dfCleanStagingA.iloc[:, [0,1,2,3,4,6,5]]

In [54]:
# Clean and categorized DataFrame
dfCleanCat.head()

Unnamed: 0,Text,Subreddit,Group,UTC,Rater ID,Emotion,Sentiment
0,That game hurt.,nrl,sports,2019-01-25 01:50:39,1,Sadness,Negative
1,My fans on patreon will be rewarded soon,nrl,sports,2019-01-29 07:24:40,33,Neutral,Neutral
2,Lets see how [NAME] goes in Round 6,nrl,sports,2019-01-15 23:34:30,49,Neutral,Neutral
3,Apparently lots,nrl,sports,2019-01-30 21:48:09,35,Neutral,Neutral
4,Support the Sydney ~~Blues~~ Sixers. Loved tha...,nrl,sports,2019-01-16 23:15:46,18,Excitement,Positive


### ========= Q3: Date & Time Relation to Emotion and Sentiment =========

In [61]:
DTdf = dfCleanCat.copy()
DTdf

Unnamed: 0,Text,Subreddit,Group,UTC,Rater ID,Emotion,Sentiment
0,That game hurt.,nrl,sports,2019-01-25 01:50:39,1,Sadness,Negative
1,My fans on patreon will be rewarded soon,nrl,sports,2019-01-29 07:24:40,33,Neutral,Neutral
2,Lets see how [NAME] goes in Round 6,nrl,sports,2019-01-15 23:34:30,49,Neutral,Neutral
3,Apparently lots,nrl,sports,2019-01-30 21:48:09,35,Neutral,Neutral
4,Support the Sydney ~~Blues~~ Sixers. Loved tha...,nrl,sports,2019-01-16 23:15:46,18,Excitement,Positive
...,...,...,...,...,...,...,...
207320,"Sadly, yes. You'll only be able to craft weapo...",farcry,games,2019-01-23 19:33:09,62,Sadness,Negative
207321,no its amazing.,farcry,games,2019-01-25 13:45:25,23,Joy,Positive
207322,This should be of interest to you. -,farcry,games,2019-01-11 10:49:44,51,Neutral,Neutral
207323,"I don't know, they'd have to reduce the accura...",farcry,games,2019-01-24 10:15:12,52,Neutral,Neutral


In [62]:
DTdf

Unnamed: 0,Text,Subreddit,Group,UTC,Rater ID,Emotion,Sentiment
0,That game hurt.,nrl,sports,2019-01-25 01:50:39,1,Sadness,Negative
1,My fans on patreon will be rewarded soon,nrl,sports,2019-01-29 07:24:40,33,Neutral,Neutral
2,Lets see how [NAME] goes in Round 6,nrl,sports,2019-01-15 23:34:30,49,Neutral,Neutral
3,Apparently lots,nrl,sports,2019-01-30 21:48:09,35,Neutral,Neutral
4,Support the Sydney ~~Blues~~ Sixers. Loved tha...,nrl,sports,2019-01-16 23:15:46,18,Excitement,Positive
...,...,...,...,...,...,...,...
207320,"Sadly, yes. You'll only be able to craft weapo...",farcry,games,2019-01-23 19:33:09,62,Sadness,Negative
207321,no its amazing.,farcry,games,2019-01-25 13:45:25,23,Joy,Positive
207322,This should be of interest to you. -,farcry,games,2019-01-11 10:49:44,51,Neutral,Neutral
207323,"I don't know, they'd have to reduce the accura...",farcry,games,2019-01-24 10:15:12,52,Neutral,Neutral


In [65]:
DT_df = DTdf.copy()
DT_df.index=DT_df.UTC

In [66]:
DTDay_df = DT_df.between_time('6:00', '18:00')
DTDayN_df = DTDay_df.loc[DTDay_df['Emotion'] != 'Neutral', :]
# DTDay_df for full DataFrame during 6am - 6pm. This DF has 79968 rows.
# DTDayN_df for DataFrame without "Neutral" emotion during 6am - 6pm. This DF has 58627 rows.

DTNight_df = DT_df.between_time('18:00', '6:00')
DTNightN_df = DTNight_df.loc[DTNight_df['Emotion'] != 'Neutral', :]
# DTNight_df for full DataFrame during 6pm - 6am. This DF has 127849 rows.
# DTNightN_df for DataFrame without "Neutral" emotion during 6pm - 6am. This DF has 93892 rows.

# More activity during night hours. 
# Difference in emotion isn't drastic, inline with overall averages. 

In [67]:
print(DTDayN_df.Sentiment.value_counts(normalize=True).mul(100).round(1).astype(str) + '%')
DTDayN_df.head(25)

Positive     53.6%
Negative     30.4%
Ambiguous    15.9%
Name: Sentiment, dtype: object


Unnamed: 0_level_0,Text,Subreddit,Group,UTC,Rater ID,Emotion,Sentiment
UTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-01-21 10:03:17,"Yeah, obviously. But that the joke relies on c...",nrl,sports,2019-01-21 10:03:17,24,Approval,Positive
2019-01-31 06:04:45,maybe post the article since it's already out lol,nrl,sports,2019-01-31 06:04:45,26,Amusement,Positive
2019-01-05 10:58:46,If it makes you feel better they'll probably m...,nrl,sports,2019-01-05 10:58:46,55,Optimism,Positive
2019-01-25 09:04:26,"Yeah, that was terrible. [NAME] laughing and j...",nrl,sports,2019-01-25 09:04:26,49,Amusement,Positive
2019-01-17 07:21:46,[NAME]? Hahha. He'll bring in the 22nd century.,nrl,sports,2019-01-17 07:21:46,50,Approval,Positive
2019-01-13 15:53:18,Nothing wrong with kayo,nrl,sports,2019-01-13 15:53:18,61,Approval,Positive
2019-01-03 07:18:02,Off the top of my head I don't think he lost a...,nrl,sports,2019-01-03 07:18:02,62,Disappointment,Negative
2019-01-31 07:16:05,[NAME] damn I love Coops,nrl,sports,2019-01-31 07:16:05,27,Love,Positive
2019-01-03 07:18:02,Off the top of my head I don't think he lost a...,nrl,sports,2019-01-03 07:18:02,3,Realization,Ambiguous
2019-01-31 12:58:42,Can't believe me and my family were gonna leav...,nrl,sports,2019-01-31 12:58:42,62,Surprise,Ambiguous


In [68]:
print(DTNightN_df.Sentiment.value_counts(normalize=True).mul(100).round(1).astype(str) + '%')
DTNightN_df.head(25)

Positive     54.8%
Negative     30.0%
Ambiguous    15.2%
Name: Sentiment, dtype: object


Unnamed: 0_level_0,Text,Subreddit,Group,UTC,Rater ID,Emotion,Sentiment
UTC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2019-01-25 01:50:39,That game hurt.,nrl,sports,2019-01-25 01:50:39,1,Sadness,Negative
2019-01-16 23:15:46,Support the Sydney ~~Blues~~ Sixers. Loved tha...,nrl,sports,2019-01-16 23:15:46,18,Excitement,Positive
2019-01-16 04:49:09,its ok i understood,nrl,sports,2019-01-16 04:49:09,37,Approval,Positive
2019-01-05 00:50:28,That's the longest dimple I've ever seen>,nrl,sports,2019-01-05 00:50:28,55,Amusement,Positive
2019-01-06 23:14:01,Shared misery *always* makes me feel better :D,nrl,sports,2019-01-06 23:14:01,12,Joy,Positive
2019-01-25 04:43:56,remember when he scored 4 tries in a game? tha...,nrl,sports,2019-01-25 04:43:56,78,Admiration,Positive
2019-01-06 22:56:26,In my day I had to wait 20 minutes for the int...,nrl,sports,2019-01-06 22:56:26,49,Curiosity,Ambiguous
2019-01-24 05:08:08,[NAME]? I'd be surprised if he plays.,nrl,sports,2019-01-24 05:08:08,78,Surprise,Ambiguous
2019-01-13 02:58:21,He doesn't know who leaked it. The guy filming...,nrl,sports,2019-01-13 02:58:21,57,Disappointment,Negative
2019-01-17 04:19:43,He is actually good now. No teddy but he was g...,nrl,sports,2019-01-17 04:19:43,5,Admiration,Positive


### ========= Q3: Date & Time Relation to Emotion and Sentiment =========
Notes: Difference in data isn't as drastic, we can redefine hours of day and hours of night. 
We can also see activity in specific subreddit groupings depending on day / night.