# Project 1: Analysis of Various Metrics and How They Relate to Stress and Work-Life Balance

## Summary Statistics for Poor, Good, and Excellent Life Balance Scores 

In [3]:
# Dependencies
import pandas as pd
import matplotlib.pyplot as plt

## Data Cleaning 

In [4]:
# Read csv file
wellbeing_df = pd.read_csv('data/Wellbeing_and_lifestyle_data_Kaggle.csv')
wellbeing_df.head()

Unnamed: 0,Timestamp,FRUITS_VEGGIES,DAILY_STRESS,PLACES_VISITED,CORE_CIRCLE,SUPPORTING_OTHERS,SOCIAL_NETWORK,ACHIEVEMENT,DONATION,BMI_RANGE,...,SLEEP_HOURS,LOST_VACATION,DAILY_SHOUTING,SUFFICIENT_INCOME,PERSONAL_AWARDS,TIME_FOR_PASSION,WEEKLY_MEDITATION,AGE,GENDER,WORK_LIFE_BALANCE_SCORE
0,7/7/15,3,2,2,5,0,5,2,0,1,...,7,5,5,1,4,0,5,36 to 50,Female,609.5
1,7/7/15,2,3,4,3,8,10,5,2,2,...,8,2,2,2,3,2,6,36 to 50,Female,655.6
2,7/7/15,2,3,3,4,4,10,3,2,2,...,8,10,2,2,4,8,3,36 to 50,Female,631.6
3,7/7/15,3,3,10,3,10,7,2,5,2,...,5,7,5,1,5,2,0,51 or more,Female,622.7
4,7/7/15,5,1,3,3,10,4,2,4,2,...,7,0,0,2,8,1,5,51 or more,Female,663.9


In [5]:
# Change the datatype of the stress column to an integer

# First find the row(s) that is NOT a number on the stress scale
stress_scores = wellbeing_df['DAILY_STRESS'].unique()

# Convert the stress scores into a list
stress_scores_list = list(stress_scores)
print(stress_scores_list)

# From the list, drop the values that are not valid stress scores
invalid_response = '1/1/00'

# Create a variable to filter for the invalid response
drop_response = (wellbeing_df['DAILY_STRESS'] == invalid_response)

# Drop the row containing the invalid response
wellbeing_df = wellbeing_df.loc[~drop_response]
wellbeing_df

# Convert the daily stress column to an integer and check it was converted correctly
wellbeing_df['DAILY_STRESS'] = wellbeing_df['DAILY_STRESS'].astype(int)
wellbeing_df.info()

['2', '3', '1', '4', '5', '0', '1/1/00']
<class 'pandas.core.frame.DataFrame'>
Int64Index: 15971 entries, 0 to 15971
Data columns (total 24 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Timestamp                15971 non-null  object 
 1   FRUITS_VEGGIES           15971 non-null  int64  
 2   DAILY_STRESS             15971 non-null  int32  
 3   PLACES_VISITED           15971 non-null  int64  
 4   CORE_CIRCLE              15971 non-null  int64  
 5   SUPPORTING_OTHERS        15971 non-null  int64  
 6   SOCIAL_NETWORK           15971 non-null  int64  
 7   ACHIEVEMENT              15971 non-null  int64  
 8   DONATION                 15971 non-null  int64  
 9   BMI_RANGE                15971 non-null  int64  
 10  TODO_COMPLETED           15971 non-null  int64  
 11  FLOW                     15971 non-null  int64  
 12  DAILY_STEPS              15971 non-null  int64  
 13  LIVE_VISION              15971 non-

## Summary Statistics Tables

In [6]:
# Change settings so all of the rows/columns can be displayed at once
pd.options.display.max_columns = None
pd.options.display.max_rows = None

# Create a dataframe for those who scored in the poor, good, and excellent ranges

poor_score = wellbeing_df[wellbeing_df['WORK_LIFE_BALANCE_SCORE'] < 550]

good_score = wellbeing_df[(wellbeing_df['WORK_LIFE_BALANCE_SCORE'] >= 550) & (wellbeing_df['WORK_LIFE_BALANCE_SCORE'] < 680)]

excellent_score = wellbeing_df[wellbeing_df['WORK_LIFE_BALANCE_SCORE'] > 680]

poor_score.head()

Unnamed: 0,Timestamp,FRUITS_VEGGIES,DAILY_STRESS,PLACES_VISITED,CORE_CIRCLE,SUPPORTING_OTHERS,SOCIAL_NETWORK,ACHIEVEMENT,DONATION,BMI_RANGE,TODO_COMPLETED,FLOW,DAILY_STEPS,LIVE_VISION,SLEEP_HOURS,LOST_VACATION,DAILY_SHOUTING,SUFFICIENT_INCOME,PERSONAL_AWARDS,TIME_FOR_PASSION,WEEKLY_MEDITATION,AGE,GENDER,WORK_LIFE_BALANCE_SCORE
72,7/19/15,0,4,0,2,2,3,1,0,2,1,0,4,0,4,0,10,1,1,0,0,36 to 50,Female,528.8
79,7/20/15,0,5,0,2,0,0,3,0,2,0,1,1,10,6,10,10,1,2,1,10,21 to 35,Male,538.8
82,7/20/15,0,5,0,2,0,0,3,0,2,0,1,1,10,6,10,10,1,2,1,10,21 to 35,Male,538.8
847,9/19/15,1,4,1,1,3,0,0,0,1,6,0,3,0,8,10,10,1,0,0,3,36 to 50,Female,544.7
872,9/22/15,1,4,0,3,2,1,0,0,2,2,0,1,0,4,0,9,1,2,0,3,21 to 35,Female,533.3


In [7]:
# Create a summary for the group that scored poor and save as csv
poor_score_summary = poor_score.groupby('GENDER').agg({'FRUITS_VEGGIES':['mean','median','max','min',],
                                  'DAILY_STRESS':['mean','median','max','min',],
                                  'PLACES_VISITED':['mean','median','max','min',],
                                  'CORE_CIRCLE':['mean','median','max','min',],
                                  'SUPPORTING_OTHERS':['mean','median','max','min',],
                                  'SOCIAL_NETWORK':['mean','median','max','min',],
                                  'ACHIEVEMENT':['mean','median','max','min',],
                                  'DONATION':['mean','median','max','min',],
                                  'BMI_RANGE':['mean','median','max','min',],
                                  'TODO_COMPLETED':['mean','median','max','min',],
                                  'FLOW':['mean','median','max','min',],
                                  'DAILY_STEPS':['mean','median','max','min',],
                                  'LIVE_VISION':['mean','median','max','min',],
                                  'SLEEP_HOURS':['mean','median','max','min',],
                                  'LOST_VACATION':['mean','median','max','min',],
                                  'DAILY_SHOUTING':['mean','median','max','min',],
                                  'SUFFICIENT_INCOME':['mean','median','max','min',],
                                  'PERSONAL_AWARDS':['mean','median','max','min',],
                                  'TIME_FOR_PASSION':['mean','median','max','min',],
                                  'WEEKLY_MEDITATION':['mean','median','max','min',]}).unstack().transpose()
poor_score_summary.to_csv('data/poor_score_summary.csv')
print(poor_score_summary)

                           GENDER
FRUITS_VEGGIES     mean    Female     1.000000
                           Male       0.566667
                   median  Female     1.000000
                           Male       0.000000
                   max     Female     5.000000
                           Male       5.000000
                   min     Female     0.000000
                           Male       0.000000
DAILY_STRESS       mean    Female     4.477273
                           Male       4.366667
                   median  Female     5.000000
                           Male       5.000000
                   max     Female     5.000000
                           Male       5.000000
                   min     Female     0.000000
                           Male       0.000000
PLACES_VISITED     mean    Female     0.295455
                           Male       0.633333
                   median  Female     0.000000
                           Male       0.000000
                   max    

In [8]:
# Create a summary for the group that scored good 
good_score_summary = good_score.groupby('GENDER').agg({'FRUITS_VEGGIES':['mean','median','max','min',],
                                  'DAILY_STRESS':['mean','median','max','min',],
                                  'PLACES_VISITED':['mean','median','max','min',],
                                  'CORE_CIRCLE':['mean','median','max','min',],
                                  'SUPPORTING_OTHERS':['mean','median','max','min',],
                                  'SOCIAL_NETWORK':['mean','median','max','min',],
                                  'ACHIEVEMENT':['mean','median','max','min',],
                                  'DONATION':['mean','median','max','min',],
                                  'BMI_RANGE':['mean','median','max','min',],
                                  'TODO_COMPLETED':['mean','median','max','min',],
                                  'FLOW':['mean','median','max','min',],
                                  'DAILY_STEPS':['mean','median','max','min',],
                                  'LIVE_VISION':['mean','median','max','min',],
                                  'SLEEP_HOURS':['mean','median','max','min',],
                                  'LOST_VACATION':['mean','median','max','min',],
                                  'DAILY_SHOUTING':['mean','median','max','min',],
                                  'SUFFICIENT_INCOME':['mean','median','max','min',],
                                  'PERSONAL_AWARDS':['mean','median','max','min',],
                                  'TIME_FOR_PASSION':['mean','median','max','min',],
                                  'WEEKLY_MEDITATION':['mean','median','max','min',]}).unstack().transpose()
good_score_summary.to_csv('data/good_score_summary.csv')
print(good_score_summary)

                           GENDER
FRUITS_VEGGIES     mean    Female     2.637961
                           Male       2.339657
                   median  Female     3.000000
                           Male       2.000000
                   max     Female     5.000000
                           Male       5.000000
                   min     Female     0.000000
                           Male       0.000000
DAILY_STRESS       mean    Female     3.243713
                           Male       2.851910
                   median  Female     3.000000
                           Male       3.000000
                   max     Female     5.000000
                           Male       5.000000
                   min     Female     0.000000
                           Male       0.000000
PLACES_VISITED     mean    Female     4.203066
                           Male       4.019236
                   median  Female     4.000000
                           Male       3.000000
                   max    

In [9]:
# Create a summary for the group that scored good 
excellent_score_summary = excellent_score.groupby('GENDER').agg({'FRUITS_VEGGIES':['mean','median','max','min',],
                                  'DAILY_STRESS':['mean','median','max','min',],
                                  'PLACES_VISITED':['mean','median','max','min',],
                                  'CORE_CIRCLE':['mean','median','max','min',],
                                  'SUPPORTING_OTHERS':['mean','median','max','min',],
                                  'SOCIAL_NETWORK':['mean','median','max','min',],
                                  'ACHIEVEMENT':['mean','median','max','min',],
                                  'DONATION':['mean','median','max','min',],
                                  'BMI_RANGE':['mean','median','max','min',],
                                  'TODO_COMPLETED':['mean','median','max','min',],
                                  'FLOW':['mean','median','max','min',],
                                  'DAILY_STEPS':['mean','median','max','min',],
                                  'LIVE_VISION':['mean','median','max','min',],
                                  'SLEEP_HOURS':['mean','median','max','min',],
                                  'LOST_VACATION':['mean','median','max','min',],
                                  'DAILY_SHOUTING':['mean','median','max','min',],
                                  'SUFFICIENT_INCOME':['mean','median','max','min',],
                                  'PERSONAL_AWARDS':['mean','median','max','min',],
                                  'TIME_FOR_PASSION':['mean','median','max','min',],
                                  'WEEKLY_MEDITATION':['mean','median','max','min',]}).unstack().transpose()
excellent_score_summary.to_csv('data/excellent_score_summary.csv')
print(excellent_score_summary)

                           GENDER
FRUITS_VEGGIES     mean    Female     3.662494
                           Male       3.385055
                   median  Female     4.000000
                           Male       3.000000
                   max     Female     5.000000
                           Male       5.000000
                   min     Female     0.000000
                           Male       0.000000
DAILY_STRESS       mean    Female     2.438908
                           Male       2.101099
                   median  Female     2.000000
                           Male       2.000000
                   max     Female     5.000000
                           Male       5.000000
                   min     Female     0.000000
                           Male       0.000000
PLACES_VISITED     mean    Female     7.053580
                           Male       6.843077
                   median  Female     8.000000
                           Male       7.000000
                   max    