In [1]:
# 데이터 분석
import pandas as pd
import numpy as np

# 통계
import pingouin as pg
from scipy.stats import chisquare

# 시각화
import seaborn as sns
import matplotlib.pyplot as plt

# 컬럼 출력 생략 해제
pd.set_option('display.max_columns', None)

# 시각화 한글 표시
import koreanize_matplotlib

# 그래프에 retina display 적용
%config InlineBackend.figure_format = 'retina'

# 머신러닝
from sklearn import preprocessing

In [2]:
# 데이터 로드
df_raw = pd.read_csv("https://raw.githubusercontent.com/JounKK/AIS8_Final_HRA/main/dataset/WA_Fn-UseC_-HR-Employee-Attrition.csv", index_col="EmployeeNumber")
df_raw.shape

(1470, 34)

In [3]:
# 불필요한 컬럼 제거
df_raw = df_raw.drop(columns=['Over18', 'EmployeeCount', 'StandardHours','DailyRate', 'HourlyRate', 'MonthlyRate'])
df_raw.shape

(1470, 28)

**언더샘플링**

In [15]:
# 언더샘플링
df_raw['Attrition'].value_counts()

No     1233
Yes     237
Name: Attrition, dtype: int64

In [17]:
df_yes = df_raw[df_raw['Attrition']=='Yes']
df_no = df_raw[df_raw['Attrition']=='No'].sample(237)

df_yes.shape, df_no.shape

((237, 28), (237, 28))

In [18]:
df_under = pd.concat([df_yes, df_no])

In [19]:
df_under['Attrition'].value_counts()

Yes    237
No     237
Name: Attrition, dtype: int64

**인코딩**

In [51]:
# Object 컬럼들
categorical_column = ['Attrition', 'BusinessTravel', 'Department', 'EducationField',
                      'Gender', 'JobRole', 'MaritalStatus', 'OverTime']

# 깊은 복사 해주기
df_encoded = df_raw.copy(deep=True)

# Scikit-learn 사용해서 라벨 인코딩하기
lab_enc = preprocessing.LabelEncoder()
for col in categorical_column:
        df_encoded[col] = lab_enc.fit_transform(df_raw[col])
        le_name_mapping = dict(zip(lab_enc.classes_, lab_enc.transform(lab_enc.classes_)))
        print('Feature', col)
        print('mapping', le_name_mapping)

df_encoded.head()

Feature Attrition
mapping {'No': 0, 'Yes': 1}
Feature BusinessTravel
mapping {'Non-Travel': 0, 'Travel_Frequently': 1, 'Travel_Rarely': 2}
Feature Department
mapping {'Human Resources': 0, 'Research & Development': 1, 'Sales': 2}
Feature EducationField
mapping {'Human Resources': 0, 'Life Sciences': 1, 'Marketing': 2, 'Medical': 3, 'Other': 4, 'Technical Degree': 5}
Feature Gender
mapping {'Female': 0, 'Male': 1}
Feature JobRole
mapping {'Healthcare Representative': 0, 'Human Resources': 1, 'Laboratory Technician': 2, 'Manager': 3, 'Manufacturing Director': 4, 'Research Director': 5, 'Research Scientist': 6, 'Sales Executive': 7, 'Sales Representative': 8}
Feature MaritalStatus
mapping {'Divorced': 0, 'Married': 1, 'Single': 2}
Feature OverTime
mapping {'No': 0, 'Yes': 1}


Unnamed: 0_level_0,Age,Attrition,BusinessTravel,Department,DistanceFromHome,Education,EducationField,EnvironmentSatisfaction,Gender,JobInvolvement,JobLevel,JobRole,JobSatisfaction,MaritalStatus,MonthlyIncome,NumCompaniesWorked,OverTime,PercentSalaryHike,PerformanceRating,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
EmployeeNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
1,41,1,2,2,1,2,1,2,0,3,2,7,4,2,5993,8,1,11,3,1,0,8,0,1,6,4,0,5
2,49,0,1,1,8,1,1,3,1,2,2,6,2,1,5130,1,0,23,4,4,1,10,3,3,10,7,1,7
4,37,1,2,1,2,2,4,4,1,2,1,2,3,2,2090,6,1,15,3,2,0,7,3,3,0,0,0,0
5,33,0,1,1,3,4,1,4,0,3,1,6,3,1,2909,1,1,11,3,3,0,8,3,3,8,7,3,0
7,27,0,2,1,2,1,3,1,1,3,1,2,2,1,3468,9,0,12,3,4,1,6,3,3,2,2,2,2


**오버샘플링**

In [26]:
# df_attr = df_raw['Attrition'].to_frame()
# df_attr.head(1)

Unnamed: 0_level_0,Attrition
EmployeeNumber,Unnamed: 1_level_1
1,Yes


In [27]:
# df_num = df_raw[number_cols]
# df_num.head(1)

Unnamed: 0_level_0,Age,DistanceFromHome,MonthlyIncome,NumCompaniesWorked,PercentSalaryHike,TotalWorkingYears,TrainingTimesLastYear,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
EmployeeNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,41,1,5993,8,11,8,0,6,4,0,5


In [37]:
# df_over = pd.concat([df_attr, df_num], axis=1)
# df_over

Unnamed: 0_level_0,Attrition,Age,DistanceFromHome,MonthlyIncome,NumCompaniesWorked,PercentSalaryHike,TotalWorkingYears,TrainingTimesLastYear,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager
EmployeeNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,Yes,41,1,5993,8,11,8,0,6,4,0,5
2,No,49,8,5130,1,23,10,3,10,7,1,7
4,Yes,37,2,2090,6,15,7,3,0,0,0,0
5,No,33,3,2909,1,11,8,3,8,7,3,0
7,No,27,2,3468,9,12,6,3,2,2,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...
2061,No,36,23,2571,4,17,17,3,5,2,0,3
2062,No,39,6,9991,4,15,9,5,7,7,1,7
2064,No,27,4,6142,1,20,6,0,6,2,0,3
2065,No,49,2,5390,2,14,17,3,9,6,0,8


In [53]:
df_over = df_encoded.copy()

In [54]:
label_name = "Attrition"

X = df_over.drop(label_name, axis=1)
y = df_over[label_name]
X.shape, y.shape

((1470, 27), (1470,))

In [55]:
from imblearn.over_sampling import SMOTE

sm = SMOTE(random_state=42)
X_over, y_over = sm.fit_resample(X, y)

In [56]:
X_over.shape, y_over.shape

((2466, 27), (2466,))

In [57]:
# y의 value_counts
y.value_counts(1)

0    0.838776
1    0.161224
Name: Attrition, dtype: float64

In [58]:
# y_resample의 value_counts
y_over.value_counts(1)

1    0.5
0    0.5
Name: Attrition, dtype: float64

In [61]:
df_over = pd.concat([X_over, y_over], axis=1)
df_over

Unnamed: 0,Age,BusinessTravel,Department,DistanceFromHome,Education,EducationField,EnvironmentSatisfaction,Gender,JobInvolvement,JobLevel,JobRole,JobSatisfaction,MaritalStatus,MonthlyIncome,NumCompaniesWorked,OverTime,PercentSalaryHike,PerformanceRating,RelationshipSatisfaction,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,WorkLifeBalance,YearsAtCompany,YearsInCurrentRole,YearsSinceLastPromotion,YearsWithCurrManager,Attrition
0,41,2,2,1,2,1,2,0,3,2,7,4,2,5993,8,1,11,3,1,0,8,0,1,6,4,0,5,1
1,49,1,1,8,1,1,3,1,2,2,6,2,1,5130,1,0,23,4,4,1,10,3,3,10,7,1,7,0
2,37,2,1,2,2,4,4,1,2,1,2,3,2,2090,6,1,15,3,2,0,7,3,3,0,0,0,0,1
3,33,1,1,3,4,1,4,0,3,1,6,3,1,2909,1,1,11,3,3,0,8,3,3,8,7,3,0,0
4,27,2,1,2,1,3,1,1,3,1,2,2,1,3468,9,0,12,3,4,1,6,3,3,2,2,2,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2461,24,2,1,12,1,4,4,0,3,1,6,4,2,2690,1,0,17,3,1,0,3,2,2,3,2,2,2,1
2462,33,0,2,18,3,2,1,0,3,2,7,3,2,5310,7,0,13,3,2,0,8,3,2,4,2,0,3,1
2463,47,1,1,1,4,3,3,1,3,1,6,3,1,4735,3,1,14,3,3,0,4,3,3,0,0,0,0,1
2464,23,2,1,5,1,2,2,0,3,1,7,1,2,2673,2,0,12,3,2,0,2,3,3,2,0,0,1,1


In [None]:
# 원핫 인코딩 했던걸 다시 문자형으로 바꿔서 해보기. 차이가 있나없나...

**변수 유형별 모음**

In [4]:
# 수치형 변수 모음
number_cols = ['Age', 'DistanceFromHome', 'MonthlyIncome', 'NumCompaniesWorked',
               'PercentSalaryHike', 'TotalWorkingYears', 'TrainingTimesLastYear',
               'YearsAtCompany', 'YearsInCurrentRole', 'YearsSinceLastPromotion',
               'YearsWithCurrManager']
number_cols

['Age',
 'DistanceFromHome',
 'MonthlyIncome',
 'NumCompaniesWorked',
 'PercentSalaryHike',
 'TotalWorkingYears',
 'TrainingTimesLastYear',
 'YearsAtCompany',
 'YearsInCurrentRole',
 'YearsSinceLastPromotion',
 'YearsWithCurrManager']

In [7]:
# 순서형 변수 모음
ordinal_cols = ['Education', 'EnvironmentSatisfaction', 'JobInvolvement',
                'JobLevel', 'JobSatisfaction', 'PerformanceRating',
                'RelationshipSatisfaction', 'StockOptionLevel', 'WorkLifeBalance']
ordinal_cols

['Education',
 'EnvironmentSatisfaction',
 'JobInvolvement',
 'JobLevel',
 'JobSatisfaction',
 'PerformanceRating',
 'RelationshipSatisfaction',
 'StockOptionLevel',
 'WorkLifeBalance']

In [6]:
# 범주형 변수 모음
cat_cols = ['Attrition', 'BusinessTravel', 'Department', 'EducationField', 'Gender', 'JobRole', 'MaritalStatus', 'OverTime']
cat_cols

['Attrition',
 'BusinessTravel',
 'Department',
 'EducationField',
 'Gender',
 'JobRole',
 'MaritalStatus',
 'OverTime']

In [8]:
# 수치형 + 범주형 변수 모음
cat_ord_list = ordinal_cols + cat_cols
cat_ord_list

['Education',
 'EnvironmentSatisfaction',
 'JobInvolvement',
 'JobLevel',
 'JobSatisfaction',
 'PerformanceRating',
 'RelationshipSatisfaction',
 'StockOptionLevel',
 'WorkLifeBalance',
 'Attrition',
 'BusinessTravel',
 'Department',
 'EducationField',
 'Gender',
 'JobRole',
 'MaritalStatus',
 'OverTime']

## 수치형. df_raw. t-test

In [14]:
# Attrition에 따른 수치형 변수간 t-test
# Attrition은 각각의 수치형 변수에 대해 유의미한 차이가 있는가?


try:
    for col in number_cols:
        Attrition_yes = df_raw[df_raw['Attrition'] == 'Yes'][col]
        Attrition_no = df_raw[df_raw['Attrition'] == 'No'][col]
        display(col, pg.ttest(Attrition_yes, Attrition_no))
except:
    pass

'Age'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-5.828012,316.931112,two-sided,1.37976e-08,"[-5.29, -2.62]",0.438225,1181000.0,1.0


'DistanceFromHome'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,2.888183,322.724279,two-sided,0.004137,"[0.55, 2.89]",0.212401,4.71,0.849148


'MonthlyIncome'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-7.482622,412.740748,two-sided,4.433589e-13,"[-2583.05, -1508.24]",0.440018,44710000000.0,1.0


'NumCompaniesWorked'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,1.574651,317.138829,two-sided,0.116334,"[-0.07, 0.66]",0.118305,0.267,0.384891


'PercentSalaryHike'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-0.504245,326.107117,two-sided,0.61443,"[-0.66, 0.39]",0.03663,0.09,0.081033


'TotalWorkingYears'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-7.019179,350.876864,two-sided,1.159817e-11,"[-4.63, -2.6]",0.471815,1817000000.0,1.0


'TrainingTimesLastYear'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-2.330522,339.557098,two-sided,0.020364,"[-0.38, -0.03]",0.161916,1.135,0.626075


'YearsAtCompany'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-5.282596,338.213101,two-sided,2.285905e-07,"[-3.07, -1.4]",0.368551,63410.0,0.999387


'YearsInCurrentRole'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-6.847079,366.568825,two-sided,3.18739e-11,"[-2.04, -1.13]",0.442011,581100000.0,1.0


'YearsSinceLastPromotion'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-1.287927,338.491595,two-sided,0.198651,"[-0.73, 0.15]",0.089777,0.179,0.244156


'YearsWithCurrManager'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-6.633399,365.098101,two-sided,1.185022e-10,"[-1.96, -1.07]",0.429743,146400000.0,1.0


## 수치형. df_under. t-test

In [43]:
# Attrition에 따른 수치형 변수간 t-test
# Attrition은 각각의 수치형 변수에 대해 유의미한 차이가 있는가?


try:
    for col in number_cols:
        Attrition_yes = df_under[df_under['Attrition'] == 'Yes'][col]
        Attrition_no = df_under[df_under['Attrition'] == 'No'][col]
        display(col, pg.ttest(Attrition_yes, Attrition_no))
except:
    pass

'Age'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-4.884612,472,two-sided,1e-06,"[-5.96, -2.54]",0.448715,8592.892,0.99822


'DistanceFromHome'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,2.100725,472,two-sided,0.036195,"[0.1, 3.03]",0.192979,0.862,0.554306


'MonthlyIncome'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-4.925403,472,two-sided,1e-06,"[-2601.78, -1117.83]",0.452463,10350.0,0.998439


'NumCompaniesWorked'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,1.079491,472,two-sided,0.28092,"[-0.21, 0.74]",0.099165,0.179,0.189901


'PercentSalaryHike'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-0.368195,472,two-sided,0.712893,"[-0.78, 0.53]",0.033824,0.109,0.065606


'TotalWorkingYears'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-4.891782,472,two-sided,1e-06,"[-4.63, -1.98]",0.449374,8877.748,0.99826


'TrainingTimesLastYear'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-2.02444,472,two-sided,0.043487,"[-0.49, -0.01]",0.185971,0.741,0.524098


'YearsAtCompany'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-3.438501,472,two-sided,0.000637,"[-2.93, -0.8]",0.315871,29.887,0.929426


'YearsInCurrentRole'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-4.601423,472,two-sided,5e-06,"[-1.93, -0.78]",0.422701,2454.553,0.995757


'YearsSinceLastPromotion'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-0.334286,472,two-sided,0.738313,"[-0.64, 0.45]",0.030708,0.108,0.062844


'YearsWithCurrManager'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-4.160077,472,two-sided,3.8e-05,"[-1.91, -0.69]",0.382157,400.002,0.985797


## 수치형. df_over. t-test

In [63]:
# Attrition에 따른 수치형 변수간 t-test
# Attrition은 각각의 수치형 변수에 대해 유의미한 차이가 있는가?


try:
    for col in number_cols:
        Attrition_yes = df_over[df_over['Attrition'] == 1][col]
        Attrition_no = df_over[df_over['Attrition'] == 0][col]
        display(col, pg.ttest(Attrition_yes, Attrition_no))
except:
    pass

'Age'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-12.991969,2464,two-sided,2.230392e-37,"[-5.25, -3.88]",0.523249,8.596e+33,1.0


'DistanceFromHome'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,5.036517,2464,two-sided,5.083946e-07,"[0.95, 2.15]",0.202845,12720.0,0.998946


'MonthlyIncome'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-12.04069,2464,two-sided,1.739665e-32,"[-2380.11, -1713.44]",0.484936,1.239e+29,1.0


'NumCompaniesWorked'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,0.042119,2464,two-sided,0.966407,"[-0.18, 0.19]",0.001696,0.045,0.050203


'PercentSalaryHike'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-3.53627,2464,two-sided,0.000413,"[-0.76, -0.22]",0.142423,22.327,0.942363


'TotalWorkingYears'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-13.879714,2464,two-sided,3.131773e-42,"[-4.59, -3.46]",0.559003,5.5119999999999995e+38,1.0


'TrainingTimesLastYear'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-9.762178,2464,two-sided,4.0941e-22,"[-0.56, -0.37]",0.39317,7.123e+18,1.0


'YearsAtCompany'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-11.134079,2464,two-sided,3.967392e-28,"[-3.01, -2.11]",0.448423,6.102e+24,1.0


'YearsInCurrentRole'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-14.133474,2464,two-sided,1.145011e-43,"[-2.1, -1.59]",0.569223,1.4639999999999999e+40,1.0


'YearsSinceLastPromotion'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-4.431943,2464,two-sided,1e-05,"[-0.78, -0.3]",0.178496,758.141,0.993249


'YearsWithCurrManager'

Unnamed: 0,T,dof,alternative,p-val,CI95%,cohen-d,BF10,power
T-test,-13.858219,2464,two-sided,4.135209e-42,"[-2.04, -1.53]",0.558137,4.185e+38,1.0


## 범주형. df_raw. chi-square

In [64]:
# Attrition에 따른 범주형(순서형) 변수간 카이제곱 검정
# 귀무가설 : 범주형(순서형) 변수에 따라 퇴사 비율에 차이가 없다.
# 대립가설 : 범주형(순서형) 변수에 따라 퇴사 비율에 차이가 있다.

try:
    for col in cat_ord_list:
        expected, observed, stats = pg.chi2_independence(x='Attrition', y=col, data=df_raw)
        display(df_raw.pivot_table(index=col, columns='Attrition', aggfunc='size')) # 분할표
        display(stats)
except:
    pass

Attrition,No,Yes
Education,Unnamed: 1_level_1,Unnamed: 2_level_1
1,139,31
2,238,44
3,473,99
4,340,58
5,43,5


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,3.073961,4.0,0.545525,0.045729,0.24971
1,cressie-read,0.666667,3.11246,4.0,0.539185,0.046014,0.252603
2,log-likelihood,0.0,3.199999,4.0,0.524931,0.046657,0.259195
3,freeman-tukey,-0.5,3.276024,4.0,0.51274,0.047208,0.264935
4,mod-log-likelihood,-1.0,3.362199,4.0,0.499138,0.047825,0.271459
5,neyman,-2.0,3.570535,4.0,0.467235,0.049284,0.287287


Attrition,No,Yes
EnvironmentSatisfaction,Unnamed: 1_level_1,Unnamed: 2_level_1
1,212,72
2,244,43
3,391,62
4,386,60


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,22.503881,3.0,5.1e-05,0.123729,0.986916
1,cressie-read,0.666667,21.798382,3.0,7.2e-05,0.121774,0.98427
2,log-likelihood,0.0,20.575349,3.0,0.000129,0.118308,0.978454
3,freeman-tukey,-0.5,19.802549,3.0,0.000187,0.116065,0.973799
4,mod-log-likelihood,-1.0,19.137295,3.0,0.000256,0.114099,0.969058
5,neyman,-2.0,18.081705,3.0,0.000423,0.110908,0.959879


Attrition,No,Yes
JobInvolvement,Unnamed: 1_level_1,Unnamed: 2_level_1
1,55,28
2,304,71
3,743,125
4,131,13


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,28.492021,3.0,3e-06,0.13922,0.997445
1,cressie-read,0.666667,27.348486,3.0,5e-06,0.136398,0.996479
2,log-likelihood,0.0,25.67222,3.0,1.1e-05,0.132152,0.994405
3,freeman-tukey,-0.5,24.869287,3.0,1.6e-05,0.130069,0.993038
4,mod-log-likelihood,-1.0,24.401459,3.0,2.1e-05,0.12884,0.992101
5,neyman,-2.0,24.360116,3.0,2.1e-05,0.12873,0.992012


Attrition,No,Yes
JobLevel,Unnamed: 1_level_1,Unnamed: 2_level_1
1,400,143
2,482,52
3,186,32
4,101,5
5,64,5


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,72.529013,4.0,6.634685e-15,0.222125,1.0
1,cressie-read,0.666667,72.380296,4.0,7.132576e-15,0.221897,1.0
2,log-likelihood,0.0,73.441263,4.0,4.256096e-15,0.223517,1.0
3,freeman-tukey,-0.5,75.620927,4.0,1.472561e-15,0.22681,1.0
4,mod-log-likelihood,-1.0,79.289566,4.0,2.463169e-16,0.232247,1.0
5,neyman,-2.0,93.089319,4.0,2.9041089999999996e-19,0.251647,1.0


Attrition,No,Yes
JobSatisfaction,Unnamed: 1_level_1,Unnamed: 2_level_1
1,223,66
2,234,46
3,369,73
4,407,52


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,17.505077,3.0,0.000556,0.109125,0.953865
1,cressie-read,0.666667,17.422055,3.0,0.000579,0.108866,0.952934
2,log-likelihood,0.0,17.356477,3.0,0.000597,0.108661,0.952186
3,freeman-tukey,-0.5,17.394341,3.0,0.000586,0.108779,0.952619
4,mod-log-likelihood,-1.0,17.50701,3.0,0.000556,0.109131,0.953886
5,neyman,-2.0,17.962352,3.0,0.000448,0.110541,0.958697


Attrition,No,Yes
PerformanceRating,Unnamed: 1_level_1,Unnamed: 2_level_1
3,1044,200
4,189,37


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,0.000155,1.0,0.990075,0.000324,0.050018
1,cressie-read,0.666667,0.000155,1.0,0.990075,0.000324,0.050018
2,log-likelihood,0.0,0.000155,1.0,0.990076,0.000324,0.050018
3,freeman-tukey,-0.5,0.000155,1.0,0.990077,0.000324,0.050018
4,mod-log-likelihood,-1.0,0.000155,1.0,0.990078,0.000324,0.050018
5,neyman,-2.0,0.000155,1.0,0.99008,0.000324,0.050018


Attrition,No,Yes
RelationshipSatisfaction,Unnamed: 1_level_1,Unnamed: 2_level_1
1,219,57
2,258,45
3,388,71
4,368,64


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,5.241068,3.0,0.154972,0.059711,0.459687
1,cressie-read,0.666667,5.151907,3.0,0.161006,0.0592,0.452626
2,log-likelihood,0.0,4.986337,3.0,0.1728,0.058241,0.439414
3,freeman-tukey,-0.5,4.872546,3.0,0.181371,0.057573,0.430263
4,mod-log-likelihood,-1.0,4.766982,3.0,0.189677,0.056946,0.421724
5,neyman,-2.0,4.578356,3.0,0.205407,0.055808,0.406358


Attrition,No,Yes
StockOptionLevel,Unnamed: 1_level_1,Unnamed: 2_level_1
0,477,154
1,540,56
2,146,12
3,70,15


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,60.598301,3.0,4.37939e-13,0.203035,1.0
1,cressie-read,0.666667,60.722933,3.0,4.118909e-13,0.203244,1.0
2,log-likelihood,0.0,61.69301,3.0,2.555448e-13,0.204861,1.0
3,freeman-tukey,-0.5,63.102773,3.0,1.276724e-13,0.207188,1.0
4,mod-log-likelihood,-1.0,65.167963,3.0,4.617756e-14,0.210552,1.0
5,neyman,-2.0,71.64999,3.0,1.891863e-15,0.220775,1.0


Attrition,No,Yes
WorkLifeBalance,Unnamed: 1_level_1,Unnamed: 2_level_1
1,55,25
2,286,58
3,766,127
4,126,27


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,16.325097,3.0,0.000973,0.105383,0.938916
1,cressie-read,0.666667,15.47368,3.0,0.001454,0.102598,0.925542
2,log-likelihood,0.0,14.073324,3.0,0.002807,0.097845,0.897793
3,freeman-tukey,-0.5,13.240219,3.0,0.004145,0.094905,0.877285
4,mod-log-likelihood,-1.0,12.557144,3.0,0.005699,0.092424,0.857907
5,neyman,-2.0,11.544252,3.0,0.009119,0.088618,0.824424




Attrition,No,Yes
Attrition,Unnamed: 1_level_1,Unnamed: 2_level_1
No,1233.0,
Yes,,237.0


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,1462.614554,1.0,0.0,0.997485,1.0
1,cressie-read,0.666667,1282.801721,1.0,6.175441e-281,0.93416,1.0
2,log-likelihood,0.0,1282.612399,1.0,6.789077e-281,0.934091,1.0
3,freeman-tukey,-0.5,1807.910027,1.0,0.0,1.108995,1.0
4,mod-log-likelihood,-1.0,4257.224764,1.0,0.0,1.701785,1.0
5,neyman,-2.0,157473.527396,1.0,0.0,10.350113,1.0


Attrition,No,Yes
BusinessTravel,Unnamed: 1_level_1,Unnamed: 2_level_1
Non-Travel,138,12
Travel_Frequently,208,69
Travel_Rarely,887,156


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,24.182414,2.0,6e-06,0.12826,0.995516
1,cressie-read,0.666667,23.942197,2.0,6e-06,0.127621,0.99519
2,log-likelihood,0.0,23.760237,2.0,7e-06,0.127135,0.994928
3,freeman-tukey,-0.5,23.89847,2.0,6e-06,0.127505,0.995128
4,mod-log-likelihood,-1.0,24.29439,2.0,5e-06,0.128557,0.99566
5,neyman,-2.0,25.994898,2.0,2e-06,0.13298,0.997375


Attrition,No,Yes
Department,Unnamed: 1_level_1,Unnamed: 2_level_1
Human Resources,51,12
Research & Development,828,133
Sales,354,92


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,10.796007,2.0,0.004526,0.085698,0.845592
1,cressie-read,0.666667,10.686054,2.0,0.004781,0.085261,0.841692
2,log-likelihood,0.0,10.490345,2.0,0.005273,0.084477,0.834544
3,freeman-tukey,-0.5,10.363646,2.0,0.005618,0.083965,0.829771
4,mod-log-likelihood,-1.0,10.253226,2.0,0.005937,0.083516,0.825517
5,neyman,-2.0,10.078297,2.0,0.006479,0.082801,0.818594




Attrition,No,Yes
EducationField,Unnamed: 1_level_1,Unnamed: 2_level_1
Human Resources,20,7
Life Sciences,517,89
Marketing,124,35
Medical,401,63
Other,71,11
Technical Degree,100,32


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,16.024674,5.0,0.006774,0.104409,0.890712
1,cressie-read,0.666667,15.617228,5.0,0.008026,0.103073,0.881563
2,log-likelihood,0.0,14.900147,5.0,0.010798,0.100678,0.863876
3,freeman-tukey,-0.5,14.438741,5.0,0.01305,0.099107,0.85136
4,mod-log-likelihood,-1.0,14.035241,5.0,0.015387,0.097713,0.839645
5,neyman,-2.0,13.379343,5.0,0.020072,0.095402,0.818991


Attrition,No,Yes
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,501,87
Male,732,150


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,1.116967,1.0,0.290572,0.027565,0.184514
1,cressie-read,0.666667,1.119705,1.0,0.289982,0.027599,0.184853
2,log-likelihood,0.0,1.125434,1.0,0.288751,0.02767,0.18556
3,freeman-tukey,-0.5,1.129956,1.0,0.287785,0.027725,0.186119
4,mod-log-likelihood,-1.0,1.134673,1.0,0.286781,0.027783,0.186702
5,neyman,-2.0,1.144708,1.0,0.28466,0.027905,0.187942




Attrition,No,Yes
JobRole,Unnamed: 1_level_1,Unnamed: 2_level_1
Healthcare Representative,122,9
Human Resources,40,12
Laboratory Technician,197,62
Manager,97,5
Manufacturing Director,135,10
Research Director,78,2
Research Scientist,245,47
Sales Executive,269,57
Sales Representative,50,33


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,86.190254,8.0,2.752482e-15,0.242142,1.0
1,cressie-read,0.666667,85.838001,8.0,3.243442e-15,0.241647,1.0
2,log-likelihood,0.0,88.908721,8.0,7.743392e-16,0.245931,1.0
3,freeman-tukey,-0.5,95.201061,8.0,4.0708290000000006e-17,0.254485,1.0
4,mod-log-likelihood,-1.0,106.301702,8.0,2.187725e-19,0.268913,1.0
5,neyman,-2.0,155.087408,8.0,1.700565e-29,0.32481,1.0


Attrition,No,Yes
MaritalStatus,Unnamed: 1_level_1,Unnamed: 2_level_1
Divorced,294,33
Married,589,84
Single,350,120


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,46.163677,2.0,9.455511e-11,0.177211,0.999996
1,cressie-read,0.666667,45.30014,2.0,1.456128e-10,0.175546,0.999995
2,log-likelihood,0.0,43.999698,2.0,2.78989e-10,0.173008,0.999992
3,freeman-tukey,-0.5,43.373275,2.0,3.816041e-10,0.171772,0.99999
4,mod-log-likelihood,-1.0,43.029524,2.0,4.531663e-10,0.17109,0.999988
5,neyman,-2.0,43.161501,2.0,4.242278e-10,0.171352,0.999989


Attrition,No,Yes
OverTime,Unnamed: 1_level_1,Unnamed: 2_level_1
No,944,110
Yes,289,127


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,87.564294,1.0,8.158424e-21,0.244065,1.0
1,cressie-read,0.666667,84.576205,1.0,3.6968769999999997e-20,0.239864,1.0
2,log-likelihood,0.0,80.079543,1.0,3.5963659999999997e-19,0.233401,1.0
3,freeman-tukey,-0.5,77.830595,1.0,1.122677e-18,0.2301,1.0
4,mod-log-likelihood,-1.0,76.421211,1.0,2.291765e-18,0.228007,1.0
5,neyman,-2.0,75.828553,1.0,3.093947e-18,0.227121,1.0


## 범주형. df_under. chi-sqaure

In [65]:
# Attrition에 따른 범주형(순서형) 변수간 카이제곱 검정
# 귀무가설 : 범주형(순서형) 변수에 따라 퇴사 비율에 차이가 없다.
# 대립가설 : 범주형(순서형) 변수에 따라 퇴사 비율에 차이가 있다.

try:
    for col in cat_ord_list:
        expected, observed, stats = pg.chi2_independence(x='Attrition', y=col, data=df_under)
        display(df_under.pivot_table(index=col, columns='Attrition', aggfunc='size')) # 분할표
        display(stats)
except:
    pass

Attrition,No,Yes
Education,Unnamed: 1_level_1,Unnamed: 2_level_1
1,22,31
2,51,44
3,82,99
4,75,58
5,7,5


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,6.147042,4.0,0.188433,0.113879,0.480669
1,cressie-read,0.666667,6.150979,4.0,0.188153,0.113916,0.48095
2,log-likelihood,0.0,6.164813,4.0,0.187173,0.114044,0.48194
3,freeman-tukey,-0.5,6.180453,4.0,0.186071,0.114188,0.483057
4,mod-log-likelihood,-1.0,6.200663,4.0,0.184655,0.114375,0.484499
5,neyman,-2.0,6.255061,4.0,0.180893,0.114875,0.488373


Attrition,No,Yes
EnvironmentSatisfaction,Unnamed: 1_level_1,Unnamed: 2_level_1
1,41,72
2,49,43
3,68,62
4,79,60


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,11.769775,3.0,0.008215,0.157578,0.832399
1,cressie-read,0.666667,11.795847,3.0,0.008116,0.157752,0.833302
2,log-likelihood,0.0,11.888321,3.0,0.007776,0.158369,0.836469
3,freeman-tukey,-0.5,11.994114,3.0,0.007403,0.159072,0.840031
4,mod-log-likelihood,-1.0,12.132518,3.0,0.006943,0.159988,0.844592
5,neyman,-2.0,12.513623,3.0,0.005816,0.162481,0.856588


Attrition,No,Yes
JobInvolvement,Unnamed: 1_level_1,Unnamed: 2_level_1
1,7,28
2,57,71
3,152,125
4,21,13


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,18.645372,3.0,0.000324,0.198334,0.965054
1,cressie-read,0.666667,18.83746,3.0,0.000295,0.199353,0.966671
2,log-likelihood,0.0,19.562521,3.0,0.000209,0.203153,0.972172
3,freeman-tukey,-0.5,20.46121,3.0,0.000136,0.207767,0.977819
4,mod-log-likelihood,-1.0,21.741349,3.0,7.4e-05,0.214168,0.984036
5,neyman,-2.0,25.886977,3.0,1e-05,0.233696,0.994725


Attrition,No,Yes
JobLevel,Unnamed: 1_level_1,Unnamed: 2_level_1
1,82,143
2,87,52
3,41,32
4,18,5
5,9,5


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,34.951,4.0,4.754261e-07,0.271544,0.999266
1,cressie-read,0.666667,35.117393,4.0,4.394429e-07,0.27219,0.9993
2,log-likelihood,0.0,35.726247,4.0,3.294259e-07,0.274539,0.999411
3,freeman-tukey,-0.5,36.451914,4.0,2.335906e-07,0.277313,0.999521
4,mod-log-likelihood,-1.0,37.444468,4.0,1.458791e-07,0.281064,0.99964
5,neyman,-2.0,40.427677,4.0,3.5307e-08,0.292045,0.999849


Attrition,No,Yes
JobSatisfaction,Unnamed: 1_level_1,Unnamed: 2_level_1
1,43,66
2,53,46
3,65,73
4,76,52


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,10.311929,3.0,0.016093,0.147496,0.775147
1,cressie-read,0.666667,10.326089,3.0,0.015988,0.147597,0.77577
2,log-likelihood,0.0,10.376019,3.0,0.015626,0.147954,0.777958
3,freeman-tukey,-0.5,10.432716,3.0,0.015224,0.148358,0.780421
4,mod-log-likelihood,-1.0,10.506319,3.0,0.014718,0.14888,0.783585
5,neyman,-2.0,10.706105,3.0,0.013426,0.150289,0.791992


Attrition,No,Yes
PerformanceRating,Unnamed: 1_level_1,Unnamed: 2_level_1
3,206,200
4,31,37


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,0.429223,1.0,0.51237,0.030092,0.100438
1,cressie-read,0.666667,0.429297,1.0,0.512334,0.030095,0.100447
2,log-likelihood,0.0,0.429557,1.0,0.512206,0.030104,0.100478
3,freeman-tukey,-0.5,0.429849,1.0,0.512063,0.030114,0.100513
4,mod-log-likelihood,-1.0,0.430226,1.0,0.511878,0.030127,0.100558
5,neyman,-2.0,0.431231,1.0,0.511385,0.030162,0.100679


Attrition,No,Yes
RelationshipSatisfaction,Unnamed: 1_level_1,Unnamed: 2_level_1
1,44,57
2,44,45
3,68,71
4,81,64


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,3.742355,3.0,0.290663,0.088855,0.336982
1,cressie-read,0.666667,3.744406,3.0,0.290419,0.08888,0.337154
2,log-likelihood,0.0,3.751603,3.0,0.289566,0.088965,0.337757
3,freeman-tukey,-0.5,3.759727,3.0,0.288605,0.089061,0.338438
4,mod-log-likelihood,-1.0,3.770211,3.0,0.28737,0.089185,0.339317
5,neyman,-2.0,3.798353,3.0,0.284078,0.089518,0.341675


Attrition,No,Yes
StockOptionLevel,Unnamed: 1_level_1,Unnamed: 2_level_1
0,91,154
1,108,56
2,28,12
3,10,15


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,40.087805,3.0,1.020809e-08,0.290815,0.999918
1,cressie-read,0.666667,40.23235,3.0,9.512663e-09,0.291339,0.999922
2,log-likelihood,0.0,40.748654,3.0,7.39315e-09,0.293202,0.999933
3,freeman-tukey,-0.5,41.344667,3.0,5.526072e-09,0.295339,0.999945
4,mod-log-likelihood,-1.0,42.131838,3.0,3.761809e-09,0.298137,0.999957
5,neyman,-2.0,44.33843,3.0,1.278927e-09,0.305845,0.999978


Attrition,No,Yes
WorkLifeBalance,Unnamed: 1_level_1,Unnamed: 2_level_1
1,10,25
2,57,58
3,147,127
4,23,27


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,8.217121,3.0,0.041731,0.131665,0.667149
1,cressie-read,0.666667,8.263474,3.0,0.040869,0.132036,0.669887
2,log-likelihood,0.0,8.431623,3.0,0.037885,0.133373,0.679685
3,freeman-tukey,-0.5,8.629572,3.0,0.034644,0.134929,0.690951
4,mod-log-likelihood,-1.0,8.896411,3.0,0.0307,0.136999,0.705678
5,neyman,-2.0,9.673431,3.0,0.021556,0.142857,0.745587




Attrition,No,Yes
Attrition,Unnamed: 1_level_1,Unnamed: 2_level_1
No,237.0,
Yes,,237.0


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,470.008439,1.0,3.191041e-104,0.995781,1.0
1,cressie-read,0.666667,496.458725,1.0,5.603839e-110,1.023416,1.0
2,log-likelihood,0.0,642.783224,1.0,8.291463999999999e-142,1.16451,1.0
3,freeman-tukey,-0.5,990.322572,1.0,2.2792929999999997e-217,1.445437,1.0
4,mod-log-likelihood,-1.0,2264.3098,1.0,0.0,2.185641,1.0
5,neyman,-2.0,55813.750529,1.0,0.0,10.851292,1.0


Attrition,No,Yes
BusinessTravel,Unnamed: 1_level_1,Unnamed: 2_level_1
Non-Travel,32,12
Travel_Frequently,37,69
Travel_Rarely,168,156


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,19.195731,2.0,6.8e-05,0.201239,0.981543
1,cressie-read,0.666667,19.302811,2.0,6.4e-05,0.2018,0.982077
2,log-likelihood,0.0,19.690405,2.0,5.3e-05,0.203816,0.983892
3,freeman-tukey,-0.5,20.145521,2.0,4.2e-05,0.206158,0.9858
4,mod-log-likelihood,-1.0,20.757511,2.0,3.1e-05,0.209266,0.98803
5,neyman,-2.0,22.532452,2.0,1.3e-05,0.218029,0.992766


Attrition,No,Yes
Department,Unnamed: 1_level_1,Unnamed: 2_level_1
Human Resources,11,12
Research & Development,147,133
Sales,79,92


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,1.731782,2.0,0.420676,0.060445,0.200079
1,cressie-read,0.666667,1.732062,2.0,0.420618,0.060449,0.200105
2,log-likelihood,0.0,1.733042,2.0,0.420412,0.060467,0.200198
3,freeman-tukey,-0.5,1.734146,2.0,0.42018,0.060486,0.200302
4,mod-log-likelihood,-1.0,1.735567,2.0,0.419881,0.060511,0.200436
5,neyman,-2.0,1.739364,2.0,0.419085,0.060577,0.200794




Attrition,No,Yes
EducationField,Unnamed: 1_level_1,Unnamed: 2_level_1
Human Resources,3,7
Life Sciences,103,89
Marketing,28,35
Medical,65,63
Other,15,11
Technical Degree,23,32


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,5.517973,5.0,0.355979,0.107895,0.399251
1,cressie-read,0.666667,5.530448,5.0,0.354619,0.108017,0.400127
2,log-likelihood,0.0,5.575241,5.0,0.349766,0.108453,0.403271
3,freeman-tukey,-0.5,5.6273,5.0,0.344189,0.108958,0.40692
4,mod-log-likelihood,-1.0,5.696551,5.0,0.336874,0.109627,0.411766
5,neyman,-2.0,5.89336,5.0,0.316734,0.111504,0.425481


Attrition,No,Yes
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,92,87
Male,145,150


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,0.143623,1.0,0.704706,0.017407,0.06661
1,cressie-read,0.666667,0.143625,1.0,0.704704,0.017407,0.06661
2,log-likelihood,0.0,0.143632,1.0,0.704697,0.017407,0.066611
3,freeman-tukey,-0.5,0.14364,1.0,0.704689,0.017408,0.066612
4,mod-log-likelihood,-1.0,0.14365,1.0,0.704679,0.017409,0.066613
5,neyman,-2.0,0.143677,1.0,0.704652,0.01741,0.066616




Attrition,No,Yes
JobRole,Unnamed: 1_level_1,Unnamed: 2_level_1
Healthcare Representative,23,9
Human Resources,9,12
Laboratory Technician,30,62
Manager,14,5
Manufacturing Director,21,10
Research Director,15,2
Research Scientist,50,47
Sales Executive,63,57
Sales Representative,12,33


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,45.98435,8.0,2.393332e-07,0.31147,0.999783
1,cressie-read,0.666667,46.48204,8.0,1.92454e-07,0.313151,0.999809
2,log-likelihood,0.0,48.397737,8.0,8.291972e-08,0.319539,0.999885
3,freeman-tukey,-0.5,50.84736,8.0,2.807848e-08,0.327525,0.999941
4,mod-log-likelihood,-1.0,54.482439,8.0,5.564892e-09,0.339031,0.999978
5,neyman,-2.0,67.50035,8.0,1.542777e-11,0.377367,0.999999


Attrition,No,Yes
MaritalStatus,Unnamed: 1_level_1,Unnamed: 2_level_1
Divorced,48,33
Married,116,84
Single,73,120


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,19.343374,2.0,6.3e-05,0.202012,0.982276
1,cressie-read,0.666667,19.377356,2.0,6.2e-05,0.202189,0.982441
2,log-likelihood,0.0,19.497444,2.0,5.8e-05,0.202815,0.983011
3,freeman-tukey,-0.5,19.634189,2.0,5.5e-05,0.203525,0.98364
4,mod-log-likelihood,-1.0,19.812225,2.0,5e-05,0.204445,0.984425
5,neyman,-2.0,20.298088,2.0,3.9e-05,0.206937,0.98639


Attrition,No,Yes
OverTime,Unnamed: 1_level_1,Unnamed: 2_level_1
No,184,110
Yes,53,127


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,47.731406,1.0,4.887983e-12,0.317331,1.0
1,cressie-read,0.666667,47.962083,1.0,4.345419e-12,0.318097,1.0
2,log-likelihood,0.0,48.792612,1.0,2.845096e-12,0.32084,1.0
3,freeman-tukey,-0.5,49.761107,1.0,1.736523e-12,0.324008,1.0
4,mod-log-likelihood,-1.0,51.053854,1.0,8.986629e-13,0.32819,1.0
5,neyman,-2.0,54.750269,1.0,1.368605e-13,0.339863,1.0


## 범주형. df_over. chi-square

In [66]:
# Attrition에 따른 범주형(순서형) 변수간 카이제곱 검정
# 귀무가설 : 범주형(순서형) 변수에 따라 퇴사 비율에 차이가 없다.
# 대립가설 : 범주형(순서형) 변수에 따라 퇴사 비율에 차이가 있다.

try:
    for col in cat_ord_list:
        expected, observed, stats = pg.chi2_independence(x='Attrition', y=col, data=df_over)
        display(df_over.pivot_table(index=col, columns='Attrition', aggfunc='size')) # 분할표
        display(stats)
except:
    pass

Attrition,0,1
Education,Unnamed: 1_level_1,Unnamed: 2_level_1
1,139,181
2,238,334
3,473,567
4,340,146
5,43,5


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,137.644205,4.0,9.014335e-29,0.236256,1.0
1,cressie-read,0.666667,139.001164,4.0,4.618208e-29,0.237417,1.0
2,log-likelihood,0.0,144.329441,4.0,3.3385799999999996e-30,0.241925,1.0
3,freeman-tukey,-0.5,151.338604,4.0,1.051626e-31,0.24773,1.0
4,mod-log-likelihood,-1.0,162.092617,4.0,5.201093e-34,0.256381,1.0
5,neyman,-2.0,203.468219,4.0,6.747645e-43,0.287245,1.0


Attrition,0,1
EnvironmentSatisfaction,Unnamed: 1_level_1,Unnamed: 2_level_1
1,212,401
2,244,368
3,391,352
4,386,112


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,236.19874,3.0,6.31686e-51,0.309487,1.0
1,cressie-read,0.666667,238.290635,3.0,2.2292099999999998e-51,0.310854,1.0
2,log-likelihood,0.0,246.046808,3.0,4.686169e-53,0.315873,1.0
3,freeman-tukey,-0.5,255.44134,3.0,4.354052e-55,0.321847,1.0
4,mod-log-likelihood,-1.0,268.499248,3.0,6.518523e-58,0.32997,1.0
5,neyman,-2.0,308.850993,3.0,1.208017e-66,0.353898,1.0


Attrition,0,1
JobInvolvement,Unnamed: 1_level_1,Unnamed: 2_level_1
1,55,187
2,304,551
3,743,477
4,131,18


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,287.050263,3.0,6.3132280000000004e-62,0.341179,1.0
1,cressie-read,0.666667,290.434764,3.0,1.169081e-62,0.343185,1.0
2,log-likelihood,0.0,303.66356,3.0,1.602712e-65,0.350913,1.0
3,freeman-tukey,-0.5,320.897009,3.0,2.982428e-69,0.360733,1.0
4,mod-log-likelihood,-1.0,346.938503,3.0,6.863949e-75,0.375085,1.0
5,neyman,-2.0,442.953964,3.0,1.096138e-95,0.423821,1.0


Attrition,0,1
JobLevel,Unnamed: 1_level_1,Unnamed: 2_level_1
1,400,744
2,482,285
3,186,160
4,101,25
5,64,19


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,226.231612,4.0,8.546245999999999e-48,0.302887,1.0
1,cressie-read,0.666667,227.688256,4.0,4.151759e-48,0.30386,1.0
2,log-likelihood,0.0,233.073057,4.0,2.877409e-49,0.307432,1.0
3,freeman-tukey,-0.5,239.576621,4.0,1.144518e-50,0.311692,1.0
4,mod-log-likelihood,-1.0,248.599352,4.0,1.3040270000000001e-52,0.317507,1.0
5,neyman,-2.0,276.472348,4.0,1.28394e-58,0.334834,1.0


Attrition,0,1
JobSatisfaction,Unnamed: 1_level_1,Unnamed: 2_level_1
1,223,394
2,234,374
3,369,356
4,407,109


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,251.962941,3.0,2.461866e-54,0.319648,1.0
1,cressie-read,0.666667,254.505267,3.0,6.940144e-55,0.321256,1.0
2,log-likelihood,0.0,264.012901,3.0,6.091375e-57,0.327202,1.0
3,freeman-tukey,-0.5,275.657967,3.0,1.8421520000000001e-59,0.33434,1.0
4,mod-log-likelihood,-1.0,292.038504,3.0,5.25756e-63,0.344131,1.0
5,neyman,-2.0,343.837088,3.0,3.221803e-74,0.373405,1.0


Attrition,0,1
PerformanceRating,Unnamed: 1_level_1,Unnamed: 2_level_1
3,1044,1169
4,189,64


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,67.722738,1.0,1.881808e-16,0.165718,1.0
1,cressie-read,0.666667,68.301749,1.0,1.402965e-16,0.166425,1.0
2,log-likelihood,0.0,70.428829,1.0,4.771747e-17,0.168997,1.0
3,freeman-tukey,-0.5,72.973746,1.0,1.3138600000000002e-17,0.172023,1.0
4,mod-log-likelihood,-1.0,76.463475,1.0,2.243238e-18,0.176088,1.0
5,neyman,-2.0,86.95939,1.0,1.1077249999999999e-20,0.187785,1.0


Attrition,0,1
RelationshipSatisfaction,Unnamed: 1_level_1,Unnamed: 2_level_1
1,219,333
2,258,338
3,388,443
4,368,119


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,165.234041,3.0,1.359811e-35,0.258853,1.0
1,cressie-read,0.666667,166.606111,3.0,6.875618e-36,0.259926,1.0
2,log-likelihood,0.0,171.663882,3.0,5.564797e-37,0.263841,1.0
3,freeman-tukey,-0.5,177.742543,3.0,2.7099179999999996e-38,0.268472,1.0
4,mod-log-likelihood,-1.0,186.119035,3.0,4.2064470000000004e-40,0.274725,1.0
5,neyman,-2.0,211.558306,3.0,1.340884e-45,0.292899,1.0


Attrition,0,1
StockOptionLevel,Unnamed: 1_level_1,Unnamed: 2_level_1
0,477,939
1,540,213
2,146,64
3,70,17


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,357.047676,3.0,4.442185e-77,0.38051,1.0
1,cressie-read,0.666667,359.386776,3.0,1.383814e-77,0.381755,1.0
2,log-likelihood,0.0,367.920089,3.0,1.964085e-79,0.38626,1.0
3,freeman-tukey,-0.5,378.047152,3.0,1.258813e-81,0.39154,1.0
4,mod-log-likelihood,-1.0,391.829885,3.0,1.3026100000000002e-84,0.398614,1.0
5,neyman,-2.0,432.823378,3.0,1.716694e-93,0.418947,1.0


Attrition,0,1
WorkLifeBalance,Unnamed: 1_level_1,Unnamed: 2_level_1
1,55,138
2,286,483
3,766,578
4,126,34


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,165.35876,3.0,1.27808e-35,0.258951,1.0
1,cressie-read,0.666667,166.472435,3.0,7.347962e-36,0.259821,1.0
2,log-likelihood,0.0,170.590012,3.0,9.490525e-37,0.263015,1.0
3,freeman-tukey,-0.5,175.561386,3.0,8.015615e-38,0.26682,1.0
4,mod-log-likelihood,-1.0,182.451411,3.0,2.6064790000000002e-39,0.272005,1.0
5,neyman,-2.0,203.656524,3.0,6.839965e-44,0.287377,1.0




Attrition,0,1
Attrition,Unnamed: 1_level_1,Unnamed: 2_level_1
0,1233.0,
1,,1233.0


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,2462.002,1.0,0.0,0.999189,1.0
1,cressie-read,0.666667,2602.61,1.0,0.0,1.027325,1.0
2,log-likelihood,0.0,3400.982,1.0,0.0,1.174372,1.0
3,freeman-tukey,-0.5,5500.113,1.0,0.0,1.493445,1.0
4,mod-log-likelihood,-1.0,15842.73,1.0,0.0,2.534653,1.0
5,neyman,-2.0,1518440.0,1.0,0.0,24.814312,1.0


Attrition,0,1
BusinessTravel,Unnamed: 1_level_1,Unnamed: 2_level_1
0,138,76
1,208,556
2,887,601


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,231.446136,2.0,5.522175e-51,0.306357,1.0
1,cressie-read,0.666667,232.869071,2.0,2.710964e-51,0.307298,1.0
2,log-likelihood,0.0,238.044228,2.0,2.038697e-52,0.310694,1.0
3,freeman-tukey,-0.5,244.157596,2.0,9.590731000000001e-54,0.314658,1.0
4,mod-log-likelihood,-1.0,252.429661,2.0,1.533188e-55,0.319944,1.0
5,neyman,-2.0,276.698508,2.0,8.235356e-61,0.334971,1.0


Attrition,0,1
Department,Unnamed: 1_level_1,Unnamed: 2_level_1
0,51,88
1,828,867
2,354,278


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,19.885507,2.0,4.8e-05,0.089799,0.984738
1,cressie-read,0.666667,19.916773,2.0,4.7e-05,0.08987,0.984869
2,log-likelihood,0.0,20.027477,2.0,4.5e-05,0.090119,0.985327
3,freeman-tukey,-0.5,20.153849,2.0,4.2e-05,0.090403,0.985833
4,mod-log-likelihood,-1.0,20.318805,2.0,3.9e-05,0.090772,0.986469
5,neyman,-2.0,20.77115,2.0,3.1e-05,0.091777,0.988076


Attrition,0,1
EducationField,Unnamed: 1_level_1,Unnamed: 2_level_1
0,20,45
1,517,485
2,124,305
3,401,255
4,71,105
5,100,38


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,153.920465,5.0,1.9534210000000001e-31,0.249834,1.0
1,cressie-read,0.666667,154.793648,5.0,1.2729880000000002e-31,0.250542,1.0
2,log-likelihood,0.0,157.954556,5.0,2.700493e-32,0.253087,1.0
3,freeman-tukey,-0.5,161.666127,5.0,4.3694610000000004e-33,0.256043,1.0
4,mod-log-likelihood,-1.0,166.656412,5.0,3.770213e-34,0.259965,1.0
5,neyman,-2.0,181.120997,5.0,3.083335e-37,0.271012,1.0


Attrition,0,1
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
0,501,665
1,732,568


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,43.224142,1.0,4.881465e-11,0.132393,0.999998
1,cressie-read,0.666667,43.252668,1.0,4.810814e-11,0.132437,0.999998
2,log-likelihood,0.0,43.352836,1.0,4.570733e-11,0.13259,0.999998
3,freeman-tukey,-0.5,43.46599,1.0,4.313921e-11,0.132763,0.999998
4,mod-log-likelihood,-1.0,43.612094,1.0,4.003572e-11,0.132986,0.999998
5,neyman,-2.0,44.004759,1.0,3.275785e-11,0.133584,0.999999


Attrition,0,1
JobRole,Unnamed: 1_level_1,Unnamed: 2_level_1
0,122,35
1,40,64
2,197,226
3,97,113
4,135,132
5,78,110
6,245,219
7,269,270
8,50,64


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,65.614446,8.0,3.647893e-11,0.163118,0.999999
1,cressie-read,0.666667,66.231709,8.0,2.753116e-11,0.163884,0.999999
2,log-likelihood,0.0,68.531488,8.0,9.62832e-12,0.166705,1.0
3,freeman-tukey,-0.5,71.333933,8.0,2.664971e-12,0.170079,1.0
4,mod-log-likelihood,-1.0,75.253293,8.0,4.388864e-13,0.174689,1.0
5,neyman,-2.0,87.500268,8.0,1.494342e-15,0.188368,1.0


Attrition,0,1
MaritalStatus,Unnamed: 1_level_1,Unnamed: 2_level_1
0,294,207
1,589,646
2,350,380


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,18.97143,2.0,7.6e-05,0.087711,0.980375
1,cressie-read,0.666667,18.988722,2.0,7.5e-05,0.087751,0.980467
2,log-likelihood,0.0,19.049573,2.0,7.3e-05,0.087891,0.980789
3,freeman-tukey,-0.5,19.118502,2.0,7.1e-05,0.08805,0.981148
4,mod-log-likelihood,-1.0,19.207754,2.0,6.7e-05,0.088255,0.981604
5,neyman,-2.0,19.448876,2.0,6e-05,0.088808,0.982783


Attrition,0,1
OverTime,Unnamed: 1_level_1,Unnamed: 2_level_1
0,944,804
1,289,429


Unnamed: 0,test,lambda,chi2,dof,pval,cramer,power
0,pearson,1.0,37.962674,1.0,7.211109e-10,0.124074,0.999987
1,cressie-read,0.666667,38.002989,1.0,7.063635e-10,0.12414,0.999987
2,log-likelihood,0.0,38.145012,1.0,6.567743e-10,0.124372,0.999988
3,freeman-tukey,-0.5,38.306106,1.0,6.047313e-10,0.124634,0.999988
4,mod-log-likelihood,-1.0,38.514995,1.0,5.433461e-10,0.124974,0.999989
5,neyman,-2.0,39.080805,1.0,4.066222e-10,0.125888,0.999991
