In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

<h1> HASE </h1>

<p> 
Before importing the data, we manually made some adjustments to the data. For commute times we for instance had to remove some unnecessary tokens (e.g. '-10 min' should just be '-10')
Also, for entries with missing data (which happened for 'GPA', the average of all the valid entries was used for that.
</p>

<h2> Testing our hypothesis </h2>

<h3> Predicting Perceived productivity </h3>

In [2]:
df = pd.read_excel('data/survey_results.xlsx', header=0, names=['timestamp', 'consent_form_approval', 'semester', 'time_in_ch', 'in_ch_over_5_years', 'family_lives_in_ch', 'social_life', 'hobbies', 'class_in_person', 'live_online', 'recorded', 'study_place',
                                                            'commute_minutes', 'commute_feeling', 'support', 'schedule', 'task_management', 'discipline', 'time_outside_work', 'seperation', 'workload', 'exams',
                                                            'focus', 'distractions', 'switch', 'gpa', 'sleep', 'physical_health', 'productivity_change', 'well_being_1', 'well_being_2', 'well_being_3', 'well_being_4',
                                                            'well_being_5', 'well_being_6', 'well_being_7', 'well_being_8', 'well_being_9', 'well_being_10', 'well_being_11'])

print(df.columns)
df.drop(['timestamp', 'consent_form_approval', 'time_in_ch'], axis=1, inplace=True)


# Does your family live in Switzerland?
# create dummy variables
family_in_ch = pd.get_dummies(df['family_lives_in_ch'])
# delete last dummy variable to escape the dummy variable trap
family_in_ch.drop(["No"], axis=1, inplace=True)
family_in_ch.rename(columns = {"Yes": "family_in_ch"}, inplace=True)
df = pd.concat([df, family_in_ch], axis=1)
df.drop(['family_lives_in_ch'], axis=1, inplace=True)

df['intercept'] = 1


df.to_excel('data/simple_pre_processed.xlsx')

df.head()

Index(['timestamp', 'consent_form_approval', 'semester', 'time_in_ch',
       'in_ch_over_5_years', 'family_lives_in_ch', 'social_life', 'hobbies',
       'class_in_person', 'live_online', 'recorded', 'study_place',
       'commute_minutes', 'commute_feeling', 'support', 'schedule',
       'task_management', 'discipline', 'time_outside_work', 'seperation',
       'workload', 'exams', 'focus', 'distractions', 'switch', 'gpa', 'sleep',
       'physical_health', 'productivity_change', 'well_being_1',
       'well_being_2', 'well_being_3', 'well_being_4', 'well_being_5',
       'well_being_6', 'well_being_7', 'well_being_8', 'well_being_9',
       'well_being_10', 'well_being_11'],
      dtype='object')


Unnamed: 0,semester,in_ch_over_5_years,social_life,hobbies,class_in_person,live_online,recorded,study_place,commute_minutes,commute_feeling,...,well_being_4,well_being_5,well_being_6,well_being_7,well_being_8,well_being_9,well_being_10,well_being_11,family_in_ch,intercept
0,5,0,2,2,4,1,4,2,-45,2,...,1,3,1,4,1,4,2,4,0,1
1,15,1,2,4,4,3,3,3,-80,4,...,2,3,2,2,2,2,4,2,1,1
2,9,1,2,2,3,4,4,2,0,3,...,3,2,1,2,5,2,2,3,1,1
3,10,1,3,5,2,3,4,4,-240,5,...,4,2,2,3,4,2,3,3,1,1
4,3,1,5,4,2,2,2,5,0,5,...,3,5,3,5,5,3,3,2,1,1


In [3]:
Y = df['productivity_change']

# original, wich all the columns in there
#X = df[['intercept', 'semester', 'in_ch_over_5_years', 'family_in_ch', 'social_life', 'hobbies', 'class_in_person', 'live_online', 'recorded', 'study_place','commute_minutes', 'commute_feeling', 'support', 
#              'schedule', 'task_management', 'discipline', 'time_outside_work', 'seperation', 'workload', 'exams', 'focus', 'switch', 'gpa', 'sleep', 'physical_health']]

X = df[['intercept', 'schedule', 'focus', 'time_outside_work']]



results = sm.OLS(Y,X).fit()
results.summary()  

0,1,2,3
Dep. Variable:,productivity_change,R-squared:,0.729
Model:,OLS,Adj. R-squared:,0.708
Method:,Least Squares,F-statistic:,34.94
Date:,"Fri, 04 Dec 2020",Prob (F-statistic):,3.88e-11
Time:,16:41:28,Log-Likelihood:,-40.036
No. Observations:,43,AIC:,88.07
Df Residuals:,39,BIC:,95.12
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
intercept,0.4215,0.241,1.752,0.088,-0.065,0.908
schedule,0.2592,0.087,2.967,0.005,0.083,0.436
focus,0.5093,0.092,5.557,0.000,0.324,0.695
time_outside_work,0.0608,0.027,2.284,0.028,0.007,0.115

0,1,2,3
Omnibus:,1.907,Durbin-Watson:,1.803
Prob(Omnibus):,0.385,Jarque-Bera (JB):,1.73
Skew:,-0.472,Prob(JB):,0.421
Kurtosis:,2.725,Cond. No.,17.0


<h3>Predicting Well-being</h3>

In [4]:
# Predicting well_being
# https://link.springer.com/article/10.1186/1477-7525-11-66

Y = df[['well_being_1', 'well_being_2', 'well_being_3', 'well_being_4', 'well_being_5', 'well_being_6', 'well_being_7', 'well_being_8', 'well_being_9', 'well_being_10', 'well_being_11']]
Y['well_being_10'] = Y['well_being_10'] * -1

well_being_sum = Y.sum(axis=1)
Y.loc[:,'well_being_sum'] = well_being_sum

Y_well_being = Y['well_being_sum']
Y_well_being.head()

#X = df[['intercept', 'semester', 'in_ch_over_5_years', 'family_in_ch', 'social_life', 'hobbies', 'class_in_person', 'live_online', 'recorded', 'study_place','commute_minutes', 'commute_feeling', 'support', 
#              'schedule', 'task_management', 'discipline', 'time_outside_work', 'seperation', 'workload', 'exams', 'focus', 'switch', 'gpa', 'sleep', 'physical_health']]

X = df[['intercept', 'support', 'study_place', 'sleep']]


results = sm.OLS(Y_well_being,X).fit()
results.summary()  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Y['well_being_10'] = Y['well_being_10'] * -1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


0,1,2,3
Dep. Variable:,well_being_sum,R-squared:,0.662
Model:,OLS,Adj. R-squared:,0.636
Method:,Least Squares,F-statistic:,25.47
Date:,"Fri, 04 Dec 2020",Prob (F-statistic):,2.71e-09
Time:,16:41:28,Log-Likelihood:,-136.02
No. Observations:,43,AIC:,280.0
Df Residuals:,39,BIC:,287.1
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
intercept,-0.1551,4.026,-0.039,0.969,-8.298,7.988
support,2.7922,1.056,2.644,0.012,0.656,4.929
study_place,5.2870,0.935,5.657,0.000,3.397,7.177
sleep,1.6683,0.827,2.018,0.050,-0.004,3.340

0,1,2,3
Omnibus:,2.242,Durbin-Watson:,1.848
Prob(Omnibus):,0.326,Jarque-Bera (JB):,1.405
Skew:,0.155,Prob(JB):,0.495
Kurtosis:,2.17,Cond. No.,25.8


<h3>Predicting perceived Productivity from Well being</h3>

In [5]:
# Predicting well_being
# https://link.springer.com/article/10.1186/1477-7525-11-66

Y = df['productivity_change']
X = df[['well_being_1', 'well_being_2', 'well_being_3', 'well_being_4', 'well_being_5', 'well_being_6', 'well_being_7', 'well_being_8', 'well_being_9', 'well_being_10', 'well_being_11']]
X['well_being_10'] = X['well_being_10'] * -1

well_being_sum = X.sum(axis=1)
X.loc[:,'well_being_sum'] = well_being_sum

X.head()

X_well_being = X['well_being_sum']

results = sm.OLS(Y, X_well_being).fit()
results.summary()  

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['well_being_10'] = X['well_being_10'] * -1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


0,1,2,3
Dep. Variable:,productivity_change,R-squared (uncentered):,0.861
Model:,OLS,Adj. R-squared (uncentered):,0.858
Method:,Least Squares,F-statistic:,260.1
Date:,"Fri, 04 Dec 2020",Prob (F-statistic):,1.33e-19
Time:,16:41:28,Log-Likelihood:,-64.401
No. Observations:,43,AIC:,130.8
Df Residuals:,42,BIC:,132.6
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
well_being_sum,0.0830,0.005,16.126,0.000,0.073,0.093

0,1,2,3
Omnibus:,0.899,Durbin-Watson:,1.813
Prob(Omnibus):,0.638,Jarque-Bera (JB):,0.966
Skew:,-0.283,Prob(JB):,0.617
Kurtosis:,2.531,Cond. No.,1.0
