In [14]:
# Does familial support help lower stress? 
# Is there a correlation between working remotely and work stress?


import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

file_path = "corporate_stress_dataset.csv"  # Adjust if needed
df = pd.read_csv(file_path)
df.info()
df.columns

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50000 entries, 0 to 49999
Data columns (total 30 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   ID                                50000 non-null  int64  
 1   Age                               50000 non-null  int64  
 2   Gender                            50000 non-null  object 
 3   Marital_Status                    50000 non-null  object 
 4   Job_Role                          50000 non-null  object 
 5   Experience_Years                  50000 non-null  int64  
 6   Monthly_Salary_INR                50000 non-null  float64
 7   Working_Hours_per_Week            50000 non-null  int64  
 8   Commute_Time_Hours                50000 non-null  float64
 9   Remote_Work                       50000 non-null  bool   
 10  Stress_Level                      50000 non-null  int64  
 11  Health_Issues                     37459 non-null  object 
 12  Comp

Index(['ID', 'Age', 'Gender', 'Marital_Status', 'Job_Role', 'Experience_Years',
       'Monthly_Salary_INR', 'Working_Hours_per_Week', 'Commute_Time_Hours',
       'Remote_Work', 'Stress_Level', 'Health_Issues', 'Company_Size',
       'Department', 'Sleep_Hours', 'Physical_Activity_Hours_per_Week',
       'Mental_Health_Leave_Taken', 'Manager_Support_Level',
       'Work_Pressure_Level', 'Annual_Leaves_Taken', 'Work_Life_Balance',
       'Family_Support_Level', 'Job_Satisfaction', 'Performance_Rating',
       'Team_Size', 'Training_Opportunities', 'Gender_Bias_Experienced',
       'Discrimination_Experienced', 'Burnout_Symptoms', 'Location'],
      dtype='object')

In [15]:
# Check for missing values in relevant columns
missing_values = df[['Family_Support_Level', 'Stress_Level']].isnull().sum()
print("Missing values:\n", missing_values)

# Compute correlation (Spearman for ordinal data)
correlation = df[['Family_Support_Level', 'Stress_Level']].corr(method='spearman')
print("\nSpearman Correlation between Family Support Level and Stress Level:")
print(correlation)


Missing values:
 Family_Support_Level    0
Stress_Level            0
dtype: int64

Spearman Correlation between Family Support Level and Stress Level:
                      Family_Support_Level  Stress_Level
Family_Support_Level              1.000000      0.000062
Stress_Level                      0.000062      1.000000


In [16]:
# Check unique values in Remote_Work
print("Unique values in Remote_Work:", df['Remote_Work'].unique())

# Convert Remote_Work to numerical (if needed)
df['Remote_Work'] = df['Remote_Work'].map({'Yes': 1, 'No': 0})

# Extract stress levels for remote and non-remote workers
remote_stress = df[df['Remote_Work'] == 1]['Stress_Level']
non_remote_stress = df[df['Remote_Work'] == 0]['Stress_Level']

# Perform an independent t-test
t_stat, p_value = stats.ttest_ind(remote_stress, non_remote_stress, nan_policy='omit')

print("\nT-test results:")
print(f"T-statistic: {t_stat}")
print(f"P-value: {p_value}")

Unique values in Remote_Work: [ True False]

T-test results:
T-statistic: nan
P-value: nan
