In [1]:
# Importing Necessary Library
import pandas as pd
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize,sent_tokenize
from collections import Counter
import re

In [2]:
# Importing or Loading the Dataset for sentiment analysis
df = pd.read_csv("reviews.csv")

In [5]:
# Checking the Dataset 
df.head()

Unnamed: 0,Review,User_Type
0,"The job search process was smooth, and I found...",Job Seeker
1,The salary is too low compared to industry sta...,Job Seeker
2,Great learning opportunities and a friendly wo...,Job Seeker
3,"The hiring process was quick, but communicatio...",Job Seeker
4,I had a terrible experience with unresponsive HR.,Job Seeker


In [7]:
# Loading stopwords
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))

In [9]:
# Cleaning the Text
def clean(text):
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Remove special characters
    text = text.lower().strip()  # Convert to lowercase and trim spaces
    return text

In [11]:
# Appling Cleaned Text
df["Cleaned_Review"] = df["Review"].astype(str).apply(clean)

In [13]:
# Initialize VADER Sentiment Analyzer
s = SentimentIntensityAnalyzer()

In [15]:
# Creating Function to get sentiment
def get_sentiment(text):
    score = s.polarity_scores(text)["compound"]
    if score >= 0.05:
        return "Positive"
    elif score <= -0.05:
        return "Negative"
    else:
        return "Neutral"

In [17]:
# Appling sentiment analysis
df["Sentiment"] = df["Cleaned_Review"].apply(get_sentiment)

In [19]:
# Checking the Dataset
df.head()

Unnamed: 0,Review,User_Type,Cleaned_Review,Sentiment
0,"The job search process was smooth, and I found...",Job Seeker,the job search process was smooth and i found ...,Positive
1,The salary is too low compared to industry sta...,Job Seeker,the salary is too low compared to industry sta...,Negative
2,Great learning opportunities and a friendly wo...,Job Seeker,great learning opportunities and a friendly wo...,Positive
3,"The hiring process was quick, but communicatio...",Job Seeker,the hiring process was quick but communication...,Positive
4,I had a terrible experience with unresponsive HR.,Job Seeker,i had a terrible experience with unresponsive hr,Negative


In [21]:
# Function to extract top words
def get_common_words(text, n=15):
    words = word_tokenize(text)  # Tokenization
    words = [word for word in words if word not in stop_words and len(word) > 2]  # Remove stopwords & short words
    word_freq = Counter(words)  # Count word frequency
    return word_freq.most_common(n)  # Return top N words

In [23]:
# Extracting positive & negative reviews
positive_reviews = " ".join(df[df["Sentiment"] == "Positive"]["Cleaned_Review"])
negative_reviews = " ".join(df[df["Sentiment"] == "Negative"]["Cleaned_Review"])

In [25]:
# Get top words for positive and negative reviews
positive_words = get_common_words(positive_reviews)
negative_words = get_common_words(negative_reviews)

In [27]:
# Save words to DataFrame
pos_df = pd.DataFrame(positive_words, columns=["Word", "Frequency"])
neg_df = pd.DataFrame(negative_words, columns=["Word", "Frequency"])


In [55]:
# Save results for Excel
df.to_csv("sentiment_results.csv", index=False, encoding='utf-8')  # Main sentiment results
pos_df.to_csv("positive_words.csv", index=False, encoding='utf-8')  # Positive word insights
neg_df.to_csv("negative_words.csv", index=False, encoding='utf-8')  # Negative word insights

In [37]:
# Sentiment Result
df1 = pd.read_csv("sentiment_results.csv")

df1.head(50)

Unnamed: 0,Review,User_Type,Cleaned_Review,Sentiment
0,"The job search process was smooth, and I found...",Job Seeker,the job search process was smooth and i found ...,Positive
1,The salary is too low compared to industry sta...,Job Seeker,the salary is too low compared to industry sta...,Negative
2,Great learning opportunities and a friendly wo...,Job Seeker,great learning opportunities and a friendly wo...,Positive
3,"The hiring process was quick, but communicatio...",Job Seeker,the hiring process was quick but communication...,Positive
4,I had a terrible experience with unresponsive HR.,Job Seeker,i had a terrible experience with unresponsive hr,Negative
5,Long working hours with no proper compensation.,Job Seeker,long working hours with no proper compensation,Negative
6,The interview process was well-organized and t...,Job Seeker,the interview process was wellorganized and tr...,Neutral
7,The company ghosted me after the final interview.,Job Seeker,the company ghosted me after the final interview,Neutral
8,"Very supportive team, and the onboarding proce...",Job Seeker,very supportive team and the onboarding proces...,Positive
9,The job description didn't match the actual role.,Job Seeker,the job description didnt match the actual role,Neutral


In [39]:
# Positive word insights
df2 = pd.read_csv("positive_words.csv")

df2.head(50)

Unnamed: 0,Word,Frequency
0,process,3
1,great,3
2,job,2
3,opportunities,2
4,supportive,2
5,team,2
6,work,2
7,good,2
8,better,2
9,would,2


In [None]:
"""

Key Insights from the Positive Words (Frequency Analysis):-->

1)  Process & Smooth Experience:

"process" (3 mentions) and "smooth" (1 mention) suggest that users found the process of finding a job or interacting with the platform to be smooth and well-organized.

2)  Job Opportunities & Career Growth:

Words like "job" (2 mentions), "opportunities" (2 mentions), and "opportunity" (1 mention) indicate that users are satisfied with the variety and availability of job opportunities.

3)  Team and Support:

"supportive" (2 mentions) and "team" (2 mentions) highlight that users feel the platform has a strong, supportive environment, whether it’s from the team or the community.

4)  Positive Experience with the Platform:

"great" (3 mentions) and "good" (2 mentions) are positive adjectives often used in reviews, showing general satisfaction.
"better" (2 mentions) shows that users feel the platform is improving over time.

5)  Training and Career Preparation:

The term "training" (2 mentions) suggests that users find value in the training programs provided, possibly indicating that they feel more prepared for job opportunities after using the platform.

6)  Job Search and Success:

"search" (1 mention) and "found" (1 mention) indicate that users were able to find jobs through the platform, emphasizing the platform’s role in the job-search process.

"""

In [41]:
# Negative word insights
df3 = pd.read_csv("negative_words.csv")

df3.head(50)

Unnamed: 0,Word,Frequency
0,salary,1
1,low,1
2,compared,1
3,industry,1
4,standards,1
5,terrible,1
6,experience,1
7,unresponsive,1
8,long,1
9,working,1


In [None]:
"""

Key Insights from the Negative Words (Frequency Analysis):-->

1)  Salary and Compensation Issues:

"salary" (1 mention), "low" (1 mention), and "compensation" (1 mention) indicate that users feel dissatisfied with the pay or compensation offered through the platform.
The term "compared" (1 mention) and "industry" (1 mention) suggest that users are comparing the salary offered with industry standards, implying they think the pay is below expectations.

2)  Poor Experience and Communication:

"terrible" (1 mention) and "experience" (1 mention) indicate strong dissatisfaction with the overall user experience on the platform.
The word "unresponsive" (1 mention) indicates a lack of responsiveness from the platform, which could refer to customer support or user interactions.

3)  Long Working Hours:

"long" (1 mention) and "hours" (1 mention) suggest that users are unhappy with the long working hours associated with the job opportunities on the platform.

4)  Lack of Proper Work Environment:

"working" (1 mention) and "proper" (1 mention) indicate dissatisfaction with the work environment or conditions.

5)  Difficulties in Finding Jobs:

The word "difficult" (1 mention) suggests that users find it hard to navigate the platform or face challenges in finding the right job.
"find" (1 mention) indicates frustration with the job search process on the platform.

"""