In [1]:
import pandas as pd

In [2]:
data = pd.read_csv('data/第04章/ストレス.csv', encoding='shift_jis')

In [3]:
data.columns = ['stress', 'support', 'burn_out_1', 'burn_out_2']

In [4]:
data.info()
data.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 300 entries, 0 to 299
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   stress      300 non-null    float64
 1   support     300 non-null    float64
 2   burn_out_1  300 non-null    float64
 3   burn_out_2  300 non-null    float64
dtypes: float64(4)
memory usage: 9.5 KB


Unnamed: 0,stress,support,burn_out_1,burn_out_2
0,2.9,3.3,2.2,2.3
1,3.1,3.6,2.7,3.0
2,2.3,3.5,3.1,3.3
3,3.7,3.2,3.4,2.7
4,3.7,3.3,3.7,3.5


In [5]:
import statsmodels.formula.api as smf

In [6]:
result = smf.ols('burn_out_2 ~ burn_out_1', data).fit()
result.summary2()

0,1,2,3
Model:,OLS,Adj. R-squared:,0.402
Dependent Variable:,burn_out_2,AIC:,217.317
Date:,2022-07-09 11:16,BIC:,224.7245
No. Observations:,300,Log-Likelihood:,-106.66
Df Model:,1,F-statistic:,202.0
Df Residuals:,298,Prob (F-statistic):,2.3800000000000002e-35
R-squared:,0.404,Scale:,0.12002

0,1,2,3,4,5,6
,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
Intercept,1.0762,0.1184,9.0871,0.0000,0.8431,1.3092
burn_out_1,0.6125,0.0431,14.2111,0.0000,0.5277,0.6973

0,1,2,3
Omnibus:,24.543,Durbin-Watson:,1.979
Prob(Omnibus):,0.0,Jarque-Bera (JB):,68.624
Skew:,-0.307,Prob(JB):,0.0
Kurtosis:,5.261,Condition No.:,18.0


In [7]:
result2 = smf.ols('burn_out_2 ~ burn_out_1 + stress + support', data).fit()
result2.summary2()

0,1,2,3
Model:,OLS,Adj. R-squared:,0.411
Dependent Variable:,burn_out_2,AIC:,214.7723
Date:,2022-07-09 11:16,BIC:,229.5874
No. Observations:,300,Log-Likelihood:,-103.39
Df Model:,3,F-statistic:,70.52
Df Residuals:,296,Prob (F-statistic):,1.95e-34
R-squared:,0.417,Scale:,0.11822

0,1,2,3,4,5,6
,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
Intercept,1.1923,0.2091,5.7035,0.0000,0.7809,1.6038
burn_out_1,0.5801,0.0447,12.9894,0.0000,0.4922,0.6679
stress,0.0742,0.0345,2.1490,0.0324,0.0062,0.1421
support,-0.0809,0.0477,-1.6962,0.0909,-0.1748,0.0130

0,1,2,3
Omnibus:,22.823,Durbin-Watson:,1.953
Prob(Omnibus):,0.0,Jarque-Bera (JB):,69.813
Skew:,-0.219,Prob(JB):,0.0
Kurtosis:,5.322,Condition No.:,57.0


In [8]:
# 決定係数の増分に関する検定
r2_1 = result.rsquared
r2_2 = result2.rsquared
j1 = 1
j2 = 3
n = len(data)

In [9]:
from scipy.stats import f as sp_f

In [10]:
dfn = j2 - j1
dfd = n - j2 - 1
f = (r2_2 - r2_1) / dfn / (1 - r2_2) * dfd

1 - sp_f.cdf(f, dfn, dfd)

0.03960850449404807

In [11]:
result.aic

217.3169588000103

In [12]:
result2.aic

214.77227348143902

In [13]:
result.bic

224.7245237493227

In [14]:
result2.bic

229.58740338006382

In [15]:
# 相互作用項を入れるときには、多重共線性を防ぐため中心化してから行う
data2 = data.copy()

In [16]:
data2['inter'] = data2['stress'] * data2['support']
data2[['stress', 'support', 'inter']].corr()

Unnamed: 0,stress,support,inter
stress,1.0,0.118076,0.848102
support,0.118076,1.0,0.609125
inter,0.848102,0.609125,1.0


In [17]:
def centerize(x):
    return x - x.mean()

data3 = data[['stress', 'support']].apply(centerize)
data3['inter'] = data3['stress'] * data3['support']

data3.corr()

Unnamed: 0,stress,support,inter
stress,1.0,0.118076,-0.099772
support,0.118076,1.0,-0.235947
inter,-0.099772,-0.235947,1.0


In [18]:
# 標準化でもOK

from scipy.stats import zscore

data4 = data[['stress', 'support']].apply(zscore)
data4['inter'] = data4['stress'] * data4['support']

data4.corr()

Unnamed: 0,stress,support,inter
stress,1.0,0.118076,-0.099772
support,0.118076,1.0,-0.235947
inter,-0.099772,-0.235947,1.0


In [19]:
data2 = data.copy()
data2[['burn_out_1', 'stress', 'support']] = data2[['burn_out_1', 'stress', 'support']].apply(lambda x: x - x.mean())

In [20]:
result = smf.ols('burn_out_2 ~ burn_out_1 + stress * support', data2).fit()
result.summary2()

0,1,2,3
Model:,OLS,Adj. R-squared:,0.422
Dependent Variable:,burn_out_2,AIC:,210.2728
Date:,2022-07-09 11:16,BIC:,228.7917
No. Observations:,300,Log-Likelihood:,-100.14
Df Model:,4,F-statistic:,55.48
Df Residuals:,295,Prob (F-statistic):,7.55e-35
R-squared:,0.429,Scale:,0.11608

0,1,2,3,4,5,6
,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
Intercept,2.7397,0.0198,138.6733,0.0000,2.7008,2.7786
burn_out_1,0.5715,0.0444,12.8784,0.0000,0.4842,0.6589
stress,0.0696,0.0343,2.0325,0.0430,0.0022,0.1370
support,-0.1101,0.0486,-2.2630,0.0244,-0.2058,-0.0143
stress:support,-0.1556,0.0612,-2.5418,0.0115,-0.2760,-0.0351

0,1,2,3
Omnibus:,23.121,Durbin-Watson:,1.955
Prob(Omnibus):,0.0,Jarque-Bera (JB):,67.951
Skew:,-0.249,Prob(JB):,0.0
Kurtosis:,5.278,Condition No.:,3.0


In [21]:
# 標準回帰係数
data2 = data.apply(zscore)

result = smf.ols('burn_out_2 ~ burn_out_1 + stress * support', data2).fit()
result.summary2()

0,1,2,3
Model:,OLS,Adj. R-squared:,0.422
Dependent Variable:,burn_out_2,AIC:,693.0917
Date:,2022-07-09 11:16,BIC:,711.6106
No. Observations:,300,Log-Likelihood:,-341.55
Df Model:,4,F-statistic:,55.48
Df Residuals:,295,Prob (F-statistic):,7.55e-35
R-squared:,0.429,Scale:,0.58037

0,1,2,3,4,5,6
,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
Intercept,0.0105,0.0442,0.2368,0.8130,-0.0765,0.0974
burn_out_1,0.5931,0.0460,12.8784,0.0000,0.5024,0.6837
stress,0.0934,0.0460,2.0325,0.0430,0.0030,0.1838
support,-0.1045,0.0462,-2.2630,0.0244,-0.1953,-0.0136
stress:support,-0.0886,0.0349,-2.5418,0.0115,-0.1572,-0.0200

0,1,2,3
Omnibus:,23.121,Durbin-Watson:,1.955
Prob(Omnibus):,0.0,Jarque-Bera (JB):,67.951
Skew:,-0.249,Prob(JB):,0.0
Kurtosis:,5.278,Condition No.:,2.0


In [22]:
# 単純傾斜分析
data_h = data.copy()
data_h[['burn_out_1', 'stress', 'support']] = data_h[['burn_out_1', 'stress', 'support']].apply(lambda x: x - x.mean())
data_h['support_h'] = data_h['support'] - data_h['support'].std()
smf.ols('burn_out_2 ~ burn_out_1 + stress * support_h', data_h).fit().summary2()

0,1,2,3
Model:,OLS,Adj. R-squared:,0.422
Dependent Variable:,burn_out_2,AIC:,210.2728
Date:,2022-07-09 11:16,BIC:,228.7917
No. Observations:,300,Log-Likelihood:,-100.14
Df Model:,4,F-statistic:,55.48
Df Residuals:,295,Prob (F-statistic):,7.55e-35
R-squared:,0.429,Scale:,0.11608

0,1,2,3,4,5,6
,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
Intercept,2.6929,0.0283,95.2127,0.0000,2.6372,2.7485
burn_out_1,0.5715,0.0444,12.8784,0.0000,0.4842,0.6589
stress,0.0035,0.0441,0.0788,0.9373,-0.0833,0.0902
support_h,-0.1101,0.0486,-2.2630,0.0244,-0.2058,-0.0143
stress:support_h,-0.1556,0.0612,-2.5418,0.0115,-0.2760,-0.0351

0,1,2,3
Omnibus:,23.121,Durbin-Watson:,1.955
Prob(Omnibus):,0.0,Jarque-Bera (JB):,67.951
Skew:,-0.249,Prob(JB):,0.0
Kurtosis:,5.278,Condition No.:,4.0


In [23]:
data_l = data.copy()
data_l[['burn_out_1', 'stress', 'support']] = data_l[['burn_out_1', 'stress', 'support']].apply(lambda x: x - x.mean())
data_l['support_l'] = data_l['support'] + data_l['support'].std()
smf.ols('burn_out_2 ~ burn_out_1 + stress * support_l', data_l).fit().summary2()

0,1,2,3
Model:,OLS,Adj. R-squared:,0.422
Dependent Variable:,burn_out_2,AIC:,210.2728
Date:,2022-07-09 11:16,BIC:,228.7917
No. Observations:,300,Log-Likelihood:,-100.14
Df Model:,4,F-statistic:,55.48
Df Residuals:,295,Prob (F-statistic):,7.55e-35
R-squared:,0.429,Scale:,0.11608

0,1,2,3,4,5,6
,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
Intercept,2.7865,0.0289,96.3826,0.0000,2.7296,2.8434
burn_out_1,0.5715,0.0444,12.8784,0.0000,0.4842,0.6589
stress,0.1358,0.0419,3.2389,0.0013,0.0533,0.2183
support_l,-0.1101,0.0486,-2.2630,0.0244,-0.2058,-0.0143
stress:support_l,-0.1556,0.0612,-2.5418,0.0115,-0.2760,-0.0351

0,1,2,3
Omnibus:,23.121,Durbin-Watson:,1.955
Prob(Omnibus):,0.0,Jarque-Bera (JB):,67.951
Skew:,-0.249,Prob(JB):,0.0
Kurtosis:,5.278,Condition No.:,4.0


In [24]:
# TODO: 変数選択 (特にstepAIC)