In [1]:
import re
import pandas as pd
import numpy as np
from collections import defaultdict

# nltk
from nltk import tokenize
from nltk.corpus import stopwords 
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Plotting tools
from wordcloud import WordCloud, STOPWORDS
import matplotlib.pyplot as plt
import matplotlib
plt.style.use('fivethirtyeight')
%matplotlib inline

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

from tqdm import tqdm_notebook as tqdm
from tqdm import trange


In [2]:
data=pd.read_excel('Analysis.xlsx')
data

Unnamed: 0,ID,Q1,Q2,Q3,Q4,Q5,Q6,Q7
0,1,Of course. I am about to complete my degree in...,During my second year at University I started ...,"As I mentioned in my application, following an...",Yes quite recently actually. Since I applied f...,"I am friendly, reliable and a communicator. I...",I have a tendency to be a little bit disorgani...,If I was successful in getting selected for yo...
1,2,First of all I would like to thank you to intr...,This company is the leader in the industry and...,During a summer session I had a student who wa...,"Each day when I arrive to work, I create a to-...","I would describe myself as driven, helpful, an...",One thing I would like to improve is how often...,"After 5 years, I see myself as a valuable empl..."
2,3,"I am Varsha, I am born and raised in Hyderabad...",Indeed it shall be a privilege for any aspirin...,"The most challenging situation for me, I would...",I’d be lost without my daily to-do list! At th...,"Responsible, because I help look after my litt...","I sometimes act too quickly, but I’ve learned ...",I look forward to learn new skills and improve...
3,4,"Hi, I'm Ankita. I am pursuing my under graduat...","Firstly, I would like to take this moment to a...","Late one Friday afternoon at my last job, a cl...",I make a list. I work out what order to do thi...,"First, I am thoughtful. I have a habit of alwa...",I could improve my organisation skills. I thin...,"After 5 years, I want to be an expert in my ro..."
4,5,I am in my final year of studying English Hons...,"Well, there are many reasons why I want to joi...",I take every new task or responsibility of my ...,"""During my summer internship, I was assigned b...","I am curious, insightful, and passionate. By a...",My weakness is thinking I can handle more than...,"After 5 years, I would like to see myself lead..."
5,6,I have completed my Masters in English from TG...,The job opening at your organization suits my ...,"As part of my English degree, we had to cover ...",I experienced this situation again and again w...,"Creative, flexible, and adventurous are three ...",Public speaking has always been a challenge fo...,Well I’m really excited about this opening. In...
6,7,"This is Abhinav, I am 19, born & brought up in...",Apart from an ocean of opportunities this job ...,I started working in a café at the age of 16 a...,"his is my first job application, so I do not r...",The first word I’d use to describe myself is w...,"I am a naturally shy person, and it takes time...",I am certain that the coming five years will b...
7,8,"So, my name is John, and I’m a senior at Schoo...",Honestly speaking I feel like I am now ready t...,"In my previous internship, I was tasked with o...",I had to meet such deadlines on a daily basis...,"I am communicative, meaning that I always want...","Sometimes, I avoid asking others for help beca...","During my internship years, it was quite diffi..."
8,9,"My name is Jane Doe, I’m 22 years old and I re...",The responsibilities which come with this posi...,During my summer internship at a public relati...,"Even if my tasks seemed unachievable, I would ...",am passionate about my work. Because I love w...,I’ve always been a procrastinator. I used to t...,"Over the next few years, I want to explore and..."
9,10,"Sure, I’d be glad to. I’m an tech-focused proj...",As well as my ability to manage and administer...,I’ve had many difficult tasks throughout my ca...,My job has multiple conflicting priorities whe...,I’m a people person. I love meeting new people...,I tend to be a perfectionist and can linger on...,"Judging by my previous assumptions, I have sel..."


In [3]:
id=data['ID']
id

0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
Name: ID, dtype: int64

In [4]:
q1=data['Q1']
q2=data['Q2']
q3=data['Q3']
q4=data['Q4']
q5=data['Q5']
q6=data['Q6']
q7=data['Q7']

In [5]:
def sentimentvalue(q1):
    #convert to lowercase
    q1= [i.lower() for i in q1]
    #remove number
    q1= [re.sub(r'\d+', '', i) for i in q1]
    #remove punctuation
    q1= [re.sub(r'[^\w\s.]', '', i) for i in q1]
    #removing words of length 1
    q1= [re.compile(r'\W*\b\w{1}\b').sub('', i) for i in q1]
    analyzer1 = SentimentIntensityAnalyzer()
    sent1=[]
    for i in tqdm(q1, desc='Progress'):
        #tokenize sentence
        sentence_list=tokenize.sent_tokenize(i) 
        sentiments_1 = {'compound': 0.0, 'pos': 0.0, 'neg': 0.0, 'neu': 0.0}
        #sentiment analysis for each sentence as vader is more effective on sentence compared to paragraphs
        for sentence in sentence_list:
            vs = analyzer1.polarity_scores(sentence)
            #summing the scores for each sentence
            sentiments_1['compound'] += vs['compound']
            sentiments_1['neg'] += vs['neg']
            sentiments_1['neu'] += vs['neu']
            sentiments_1['pos'] += vs['pos']
        #taking average
        sentiments_1['compound'] = sentiments_1['compound'] / len(sentence_list)
        sentiments_1['neg'] = sentiments_1['neg'] / len(sentence_list)
        sentiments_1['neu'] = sentiments_1['neu'] / len(sentence_list)
        sentiments_1['pos'] = sentiments_1['pos'] / len(sentence_list)
        #appending in a list to get transcript wise scores
        sent1.append(sentiments_1)
    return sent1

In [6]:
df1=pd.DataFrame(sentimentvalue(q1))
df1.rename(columns = {'compound':'compound_q1','neg':'negative_q1', 'neu':'neutral_q1','pos':'positive_q1'}, inplace = True) 

df2=pd.DataFrame(sentimentvalue(q2))
df2.rename(columns = {'compound':'compound_q2','neg':'negative_q2', 'neu':'neutral_q2','pos':'positive_q2'}, inplace = True) 

df3=pd.DataFrame(sentimentvalue(q3))
df3.rename(columns = {'compound':'compound_q3','neg':'negative_q3', 'neu':'neutral_q3','pos':'positive_q3'}, inplace = True) 

df4=pd.DataFrame(sentimentvalue(q4))
df4.rename(columns = {'compound':'compound_q4','neg':'negative_q4', 'neu':'neutral_q4','pos':'positive_q4'}, inplace = True) 

df5=pd.DataFrame(sentimentvalue(q5))
df5.rename(columns = {'compound':'compound_q5','neg':'negative_q5', 'neu':'neutral_q5','pos':'positive_q5'}, inplace = True) 

df6=pd.DataFrame(sentimentvalue(q6))
df6.rename(columns = {'compound':'compound_q6','neg':'negative_q6', 'neu':'neutral_q6','pos':'positive_q6'}, inplace = True) 

df7=pd.DataFrame(sentimentvalue(q7))
df7.rename(columns = {'compound':'compound_q7','neg':'negative_q7', 'neu':'neutral_q7','pos':'positive_q7'}, inplace = True) 


###############################################################################################################################

df = pd.DataFrame(id)
result = pd.concat([df,df1, df2,df3,df4,df5,df6,df7], axis=1)


HBox(children=(FloatProgress(value=0.0, description='Progress', max=10.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='Progress', max=10.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='Progress', max=10.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='Progress', max=10.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='Progress', max=10.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='Progress', max=10.0, style=ProgressStyle(description_widt…




HBox(children=(FloatProgress(value=0.0, description='Progress', max=10.0, style=ProgressStyle(description_widt…




In [7]:
result

Unnamed: 0,ID,compound_q1,positive_q1,negative_q1,neutral_q1,compound_q2,positive_q2,negative_q2,neutral_q2,compound_q3,...,negative_q5,neutral_q5,compound_q6,positive_q6,negative_q6,neutral_q6,compound_q7,positive_q7,negative_q7,neutral_q7
0,1,0.588467,0.144333,0.0,0.855667,0.67405,0.16175,0.0,0.83825,0.15455,...,0.054,0.761667,0.30985,0.0955,0.054,0.851,0.9657,0.192,0.0,0.808
1,2,0.275,0.1605,0.065,0.774667,0.7248,0.184,0.0,0.816,0.0727,...,0.1352,0.7296,0.5081,0.239667,0.096667,0.663667,0.91,0.262,0.0,0.738
2,3,0.25674,0.1598,0.0532,0.787,0.8305,0.323,0.0,0.677,-0.089071,...,0.0,0.636,0.1695,0.054,0.0,0.946,0.35175,0.1665,0.0,0.8335
3,4,0.429557,0.220857,0.033143,0.746,0.83255,0.211,0.008,0.781,0.5383,...,0.0,0.796667,0.18805,0.073,0.0625,0.8645,0.8126,0.202,0.0,0.798
4,5,0.9888,0.332,0.04,0.628,0.7498,0.289,0.0,0.711,0.057825,...,0.0,0.734,0.309825,0.14475,0.04725,0.808,0.7162,0.2785,0.0,0.7215
5,6,0.2074,0.026667,0.006,0.967333,0.8663,0.19,0.023,0.787,-0.083983,...,0.013,0.663,0.7361,0.264667,0.0,0.735333,0.490638,0.217375,0.00825,0.774375
6,7,0.19558,0.0808,0.0,0.9192,0.537367,0.108667,0.0,0.891333,0.65875,...,0.0,0.783167,0.283967,0.130333,0.077333,0.792333,0.402875,0.1685,0.0,0.8315
7,8,0.233025,0.099625,0.00875,0.891625,0.8754,0.252,0.0,0.748,0.202543,...,0.022667,0.723,0.8234,0.264,0.0765,0.6595,0.2943,0.1432,0.0438,0.813
8,9,0.06622,0.032,0.023,0.945,0.673267,0.252,0.0,0.748,0.183883,...,0.016429,0.667143,-0.0484,0.09575,0.10275,0.8015,0.42775,0.1685,0.0,0.8315
9,10,0.255062,0.155,0.0,0.845,0.3304,0.142667,0.031667,0.825667,0.084875,...,0.0178,0.7996,0.08568,0.0922,0.0506,0.8572,0.245357,0.127429,0.037857,0.834714


In [8]:
result.to_excel('SentimentValues.xlsx',index=False)