# Insight into Job Reviews Data Set 

In [6]:
import pandas as pd
import sqlite3
import cufflinks as cf
from textblob import TextBlob
cf.go_offline()
cf.set_config_file(offline=False, world_readable=True)

# Reading the data

In [7]:
con = sqlite3.connect("collectors/data.sqlite3")
df = pd.read_sql_query("SELECT * from company_review", con)
con.close()

In [8]:
df.drop('title', axis=1, inplace=True)
df

Unnamed: 0,id,company,rating,author,author_status,location,date,description,source
0,1,bmo financial group,3.0,Credit Analyst (Current Employee),,"Burnaby, BC",11 July 2018,Not a bad job but can get boring easily. Manag...,indeed.com
1,2,bmo financial group,3.0,Project Manager (Former Employee),,"Toronto, ON",7 April 2020,co-workers are friendly but pay is not good at...,indeed.com
2,3,bmo financial group,1.0,Assistant Manager (Former Employee),,"Cambridge, ON",6 April 2020,They give you the illusion of how great of a j...,indeed.com
3,4,bmo financial group,2.0,Software Specialist (Former Employee),,"Toronto, ON",6 April 2020,"Basically, in BMO agile team, they hire 5+ per...",indeed.com
4,5,bmo financial group,5.0,Relationship Manager (Current Employee),,"Nanaimo, BC",4 April 2020,Work/life balance and customer centric. Traini...,indeed.com
...,...,...,...,...,...,...,...,...,...
790,791,amazon,2.0,Order Picker/Forklift Operator (Former Employee),,"Ottawa, ON",18 January 2020,Very fast paced hard on your feet need very co...,indeed.com
791,792,amazon,1.0,Hard worker (Current Employee),,"Brampton, ON",18 January 2020,Alotta bad stuff happened i would report it an...,indeed.com
792,793,amazon,4.0,Risk Analyst • Fraud Risk & Compliance (Former...,,"Barrie, ON",18 January 2020,It is a good company . I would recommend peopl...,indeed.com
793,794,amazon,4.0,seasonal warehouse associate (Former Employee),,"Brampton, ON",17 January 2020,it is a faced paced work environment and compe...,indeed.com


# Data Preprocessing

In [9]:
df = df[~df['description'].isnull()]

def preprocess(DescriptionText):
    DescriptionText = DescriptionText.str.replace("(<br/>)", "")
    DescriptionText = DescriptionText.str.replace('(<a).*(>).*(</a>)', '')
    DescriptionText = DescriptionText.str.replace('(&amp)', '')
    DescriptionText = DescriptionText.str.replace('(&gt)', '')
    DescriptionText = DescriptionText.str.replace('(&lt)', '')
    DescriptionText = DescriptionText.str.replace('(\xa0)', ' ')  
    return DescriptionText
df['description'] = preprocess(df['description'])



# Sentiment Analysis on Job Reviews given by Employees

In [10]:
df['polarity'] = df['description'].map(lambda text: TextBlob(text).sentiment.polarity)


In [11]:
df

Unnamed: 0,id,company,rating,author,author_status,location,date,description,source,polarity
0,1,bmo financial group,3.0,Credit Analyst (Current Employee),,"Burnaby, BC",11 July 2018,Not a bad job but can get boring easily. Manag...,indeed.com,0.154762
1,2,bmo financial group,3.0,Project Manager (Former Employee),,"Toronto, ON",7 April 2020,co-workers are friendly but pay is not good at...,indeed.com,0.151190
2,3,bmo financial group,1.0,Assistant Manager (Former Employee),,"Cambridge, ON",6 April 2020,They give you the illusion of how great of a j...,indeed.com,0.179167
3,4,bmo financial group,2.0,Software Specialist (Former Employee),,"Toronto, ON",6 April 2020,"Basically, in BMO agile team, they hire 5+ per...",indeed.com,0.000000
4,5,bmo financial group,5.0,Relationship Manager (Current Employee),,"Nanaimo, BC",4 April 2020,Work/life balance and customer centric. Traini...,indeed.com,0.250000
...,...,...,...,...,...,...,...,...,...,...
790,791,amazon,2.0,Order Picker/Forklift Operator (Former Employee),,"Ottawa, ON",18 January 2020,Very fast paced hard on your feet need very co...,indeed.com,0.047667
791,792,amazon,1.0,Hard worker (Current Employee),,"Brampton, ON",18 January 2020,Alotta bad stuff happened i would report it an...,indeed.com,-0.500000
792,793,amazon,4.0,Risk Analyst • Fraud Risk & Compliance (Former...,,"Barrie, ON",18 January 2020,It is a good company . I would recommend peopl...,indeed.com,0.511111
793,794,amazon,4.0,seasonal warehouse associate (Former Employee),,"Brampton, ON",17 January 2020,it is a faced paced work environment and compe...,indeed.com,0.100000


# Sentiment Polarity Distribution

In [12]:
df['polarity'].iplot(
    kind='hist',
    bins=50,
    xTitle='polarity',
    linecolor='black',
    yTitle='count',
    color = 'Blue',
    title='Sentiment Polarity Distribution')