In [1]:
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns


In [2]:
import os, sys

# Add parent directory to path to import modules from src
rpath = os.path.abspath('..')
if rpath not in sys.path:
    sys.path.insert(0, rpath)

from src.loader import SlackDataLoader
import src.utils as utils

In [3]:
# Initialize DataLoader
data_loader = SlackDataLoader("../anonymized")
all_week_1_path = os.path.join("../anonymized", 'all-week1/')
week1_df = utils.slack_parser(all_week_1_path)
week1_df.head()

Unnamed: 0,msg_type,msg_content,sender_name,msg_sent_time,msg_dist_type,time_thread_start,reply_count,reply_users_count,reply_users,tm_thread_end,channel
0,message,I use vscode extension for notebook and it wor...,Phillip Atkins,1661324676.252859,text,1661318940.506229,0,0,0,0.0,
1,message,you have to fetch and merge first (if you are ...,Judith Bolton,1661324702.743019,text,1661324702.743019,1,1,U03UHB8CXDY,1661325717.923899,
2,message,"Try ""git pull"" first",Steven Garcia,1661324717.409309,text,1661324564.397769,0,0,0,0.0,
3,message,how do I combine handset type and handset manu...,Cheryl Hudson,1661325042.337519,text,1661325042.337519,3,3,"U03UH397319,U03U1J51VFZ,U03UVHCV6KB",1661330120.226419,
4,message,You can select first a subset of the top 3 man...,Brenda Hernandez,1661325357.761809,text,1661325042.337519,0,0,0,0.0,


In [4]:
example = week1_df['msg_content'].values[0]
example

'I use vscode extension for notebook and it works great. Maybe you should try that?'

In [17]:
import nltk
nltk.download('punkt')
nltk.download('vader_lexicon')


[nltk_data] Downloading package punkt to /Users/abdi/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/abdi/nltk_data...


True

In [15]:
tokens = nltk.word_tokenize(example)
tokens[:10]

['I',
 'use',
 'vscode',
 'extension',
 'for',
 'notebook',
 'and',
 'it',
 'works',
 'great']

Vader Sentiment Analysis

In [18]:
from nltk.sentiment import SentimentIntensityAnalyzer
from tqdm.notebook import tqdm

sia = SentimentIntensityAnalyzer()

In [22]:
print(example)
sia.polarity_scores(example)

I use vscode extension for notebook and it works great. Maybe you should try that?


{'neg': 0.0, 'neu': 0.76, 'pos': 0.24, 'compound': 0.6249}

In [36]:
# run the polarity test on the dataset
results = {}
for i, row in week1_df.iterrows():
    message_content = row['msg_content']
    results[message_content] =sia.polarity_scores(message_content)
    

# generate DataFrame that has been transposed
vaders = pd.DataFrame(results).T    
# renaming the index column so that we have a common column when merging
vaders = vaders.reset_index().rename(columns={'index': 'msg_content'})
vaders = vaders.merge(week1_df, how='right')
vaders

Unnamed: 0,msg_content,neg,neu,pos,compound,msg_type,sender_name,msg_sent_time,msg_dist_type,time_thread_start,reply_count,reply_users_count,reply_users,tm_thread_end,channel
0,I use vscode extension for notebook and it wor...,0.000,0.760,0.240,0.6249,message,Phillip Atkins,1661324676.252859,text,1661318940.506229,0,0,0,0,
1,you have to fetch and merge first (if you are ...,0.065,0.778,0.158,0.3779,message,Judith Bolton,1661324702.743019,text,1661324702.743019,1,1,U03UHB8CXDY,1661325717.923899,
2,"Try ""git pull"" first",0.000,1.000,0.000,0.0000,message,Steven Garcia,1661324717.409309,text,1661324564.397769,0,0,0,0,
3,how do I combine handset type and handset manu...,0.000,0.806,0.194,0.3818,message,Cheryl Hudson,1661325042.337519,text,1661325042.337519,3,3,"U03UH397319,U03U1J51VFZ,U03UVHCV6KB",1661330120.226419,
4,You can select first a subset of the top 3 man...,0.000,0.783,0.217,0.3818,message,Brenda Hernandez,1661325357.761809,text,1661325042.337519,0,0,0,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
546,Your final report includes limited number of s...,0.058,0.831,0.111,0.1779,message,Brittany Garcia,1661629467.510289,text,1661628615.670859,0,0,0,0,
547,<https://www.section.io/engineering-education/...,0.000,1.000,0.000,0.0000,message,Miguel Herring,1661638084.234439,link,1661638084.234439,1,1,U03UGB3T3MY,1662015960.997039,
548,okay. thank you,0.000,0.185,0.815,0.5267,message,Joshua Rhodes,1661638632.171329,text,1661628615.670859,0,0,0,0,
549,You African telecomms data,0.000,1.000,0.000,0.0000,message,Katherine Foster,1668158796.869069,text,1667932405.677889,0,0,0,0,
