In [1]:
import numpy as np
import pandas as pd

In [2]:
data = pd.read_csv('employee_experience_survey_data.csv')

In [3]:
data.head()

Unnamed: 0,Name,Age Bracket,Gender,Ethnicity,Job Title,Department,Date Survey Completed,Job Satisfaction,Work-Life Balance,Management Support,Team Collaboration,Workload Fairness,Career Development Opportunities,Workplace Inclusivity,Company Communication,Compensation Satisfaction,Job Security,Overall Engagement
0,John Doe,25-34,Female,Asian,Product Manager,Product Development,2024-10-05,Disagree,Strongly Agree,Neutral,Neutral,Neutral,Disagree,Agree,Strongly Agree,Strongly Agree,Agree,Strongly Agree
1,Jane Smith,18-24,Female,Middle Eastern,Operations Manager,Sales,2024-10-07,Agree,Strongly Disagree,Strongly Agree,Neutral,Neutral,Disagree,Neutral,Neutral,Neutral,Agree,Neutral
2,Carlos Reyes,45-54,Female,Indian,UX Designer,Consulting,2024-10-08,Neutral,Strongly Disagree,Agree,Disagree,Strongly Agree,Agree,Agree,Strongly Disagree,Neutral,Strongly Agree,Strongly Agree
3,Emily Zhang,35-44,Male,Caucasian,UX Designer,HR,2024-10-07,Neutral,Agree,Agree,Strongly Disagree,Strongly Disagree,Strongly Agree,Agree,Strongly Disagree,Strongly Disagree,Agree,Neutral
4,Michael Johnson,18-24,Female,Caucasian,UX Designer,Product Development,2024-10-07,Agree,Strongly Agree,Disagree,Neutral,Strongly Disagree,Disagree,Disagree,Disagree,Strongly Disagree,Neutral,Disagree


In [4]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15 entries, 0 to 14
Data columns (total 18 columns):
 #   Column                            Non-Null Count  Dtype 
---  ------                            --------------  ----- 
 0   Name                              15 non-null     object
 1   Age Bracket                       15 non-null     object
 2   Gender                            15 non-null     object
 3   Ethnicity                         15 non-null     object
 4   Job Title                         15 non-null     object
 5   Department                        15 non-null     object
 6   Date Survey Completed             15 non-null     object
 7   Job Satisfaction                  15 non-null     object
 8   Work-Life Balance                 15 non-null     object
 9   Management Support                15 non-null     object
 10  Team Collaboration                15 non-null     object
 11  Workload Fairness                 15 non-null     object
 12  Career Development Oppor

In [5]:
# Likert Scale Mapping

In [6]:
likert_scale ={
    'Strongly Disagree' : 1,
    'Disagree' : 2,
    'Neutral' : 3,
    'Agree' : 4,
    'Strongly Agree' : 5
}

In [7]:
likert_cols = ['Job Satisfaction', 'Work-Life Balance', 'Management Support', 'Team Collaboration', 'Workload Fairness',
              'Career Development Opportunities', 'Workplace Inclusivity', 'Company Communication',
              'Compensation Satisfaction', 'Job Security', 'Overall Engagement']

In [8]:
#Applying the likert scale mapping to convert those columns into numerical values
data[likert_cols] = data[likert_cols].replace(likert_scale)

In [9]:
data[likert_cols].head()

Unnamed: 0,Job Satisfaction,Work-Life Balance,Management Support,Team Collaboration,Workload Fairness,Career Development Opportunities,Workplace Inclusivity,Company Communication,Compensation Satisfaction,Job Security,Overall Engagement
0,2,5,3,3,3,2,4,5,5,4,5
1,4,1,5,3,3,2,3,3,3,4,3
2,3,1,4,2,5,4,4,1,3,5,5
3,3,4,4,1,1,5,4,1,1,4,3
4,4,5,2,3,1,2,2,2,1,3,2


## Descriptive Statistics

In [11]:
# Calculate mean, median, mode, standard deviation
def desc_stat(column):
    return{
        'mean' : data[column].mean(),
        'median' : data[column].median(),
        'mode' : data[column].mode(),
        'std_dev' : data[column].std()
    }

In [12]:
#Descriptive statistics for overall engagement and job satisfaction
overall_eng_stats = desc_stat('Overall Engagement')
job_satisfication_stats = desc_stat('Job Satisfaction')

In [13]:
print("Overall Engagement Stats: ", overall_eng_stats)
print("Job Satisfication Stats: ", job_satisfication_stats)

Overall Engagement Stats:  {'mean': 3.4, 'median': 3.0, 'mode': 0    3
1    5
Name: Overall Engagement, dtype: int64, 'std_dev': 1.2983506020002016}
Job Satisfication Stats:  {'mean': 3.0, 'median': 3.0, 'mode': 0    2
1    4
Name: Job Satisfaction, dtype: int64, 'std_dev': 1.3093073414159542}


The average Overall Engagement score is 3.4 out of 5, while the Job Satisfaction score is 3.0 out of 5. This suggests a 
moderate level of engagement and satisfaction across the organization.

In [15]:
age_grp_stats = data.groupby('Age Bracket')[['Job Satisfaction', 'Overall Engagement']].mean()
print('Job Satisfaction and Overall Engagement by Age Bracket: ')
print(age_grp_stats)

Job Satisfaction and Overall Engagement by Age Bracket: 
             Job Satisfaction  Overall Engagement
Age Bracket                                      
18-24                3.428571            3.428571
25-34                3.000000            3.666667
35-44                2.000000            2.500000
45-54                2.666667            3.666667


Employees in the 18-24 age bracket report the highest Job Satisfaction (3.4) and Overall Engagement (3.4), while those
in the 35-44 bracket report the lowest scores (Job Satisfaction: 2.0, Engagement: 2.5). This indicates a potential need
to focus on mid-career professionals.

In [16]:
depart_stats = data.groupby('Department')[['Job Satisfaction', 'Overall Engagement']].mean()
print('Job Satisfaction and Overall Engagement by Department: ')
print(depart_stats)

Job Satisfaction and Overall Engagement by Department: 
                     Job Satisfaction  Overall Engagement
Department                                               
Consulting                   3.000000            4.000000
Design                       5.000000            1.000000
Finance                      4.000000            5.000000
HR                           4.000000            3.500000
IT                           1.000000            2.000000
Operations                   2.000000            3.000000
Product Development          2.666667            3.666667
Sales                        2.750000            3.500000


The department-wise analysis reveals that Finance has the highest Overall Engagement (5.0) and Job Satisfaction (4.0),
whereas IT shows lower scores in both categories, with Overall Engagement at 2.0 and Job Satisfaction at 1.0.

## Inferential Statistics

### Hypothesis testing 

In [31]:
# comparing Job Satisfaction b/w Two Departments(Product development and HR)
from scipy import stats
from scipy.stats  import ttest_ind
dept1 = data[data['Department'] == 'Product Development']['Job Satisfaction']
dept2 = data[data['Department'] == 'HR']['Job Satisfaction']

In [22]:
# Perform an independent t-test
t_stat, p_val = ttest_ind(dept1, dept2)

In [23]:
print(f"t-stats: {t_stat}, p_value: {p_val}")

t-stats: -1.17108008753824, p_value: 0.32612059789896747


In [26]:
# result
if p_val < 0.05:
    print('Reject the NULL hypothesis. There is a significant difference in Job Satisfaction between two departments')
else:
    print('Failed to reject the NULL hypothesis. There is no significant difference in Job Satisfaction between two departments')

Failed to reject the NULL hypothesis. There is no significant difference in Job Satisfaction between two departments


A t-test comparing Job Satisfaction between Product Development and HR departments resulted in a p-value of 0.32, indicating
no statistically significant difference in satisfaction between the two departments.

### Correlation Analysis - WorkLife Balance vs Overall Engagement

In [33]:
from scipy.stats import pearsonr

In [34]:
# Calculating the pearson correlation coefficient between WorkLife Balance and Overall Engagement
corr_coeff, p_value = pearsonr(data['Work-Life Balance'], data['Overall Engagement']) 

In [35]:
print(f"Pearson correlation coefficient: {corr_coeff}, p_value: {p_value}")

Pearson correlation coefficient: -0.36104082040916596, p_value: 0.18612659993280437


In [37]:
if p_value < 0.05:
    print(f"There is a significant correlation (r = {corr_coeff}) between Work-Life Balance and Overall Engagement")
else:
    print("There is no significant correlation between Work-life Balance and Overall Engagement")

There is no significant correlation between Work-life Balance and Overall Engagement


The correlation analysis shows a weak negative correlation (r = -0.36) between Work-Life Balance and Overall Engagement,
and the p-value (0.18) suggests that this correlation is not statistically significant. However, departments with better
work-life balance (e.g., HR) generally have higher engagement.