# To perform stat tests on `general_data.csv` data set 

## Non-Parametric tests
>1. wilcoxon - sign test
>2. Friedman test
>3. Mannwhitney test
>4. Kruskal-Walli's test
>5. Chi-Suare test

# Parametric tests

>1. One Sample t-test
>2. Two Sample paried t-test
>3. Two Smaple independent t-test

In [2]:
#to import the requried the lib
import numpy as np
import pandas as pd

In [3]:
# to import requried lib to perform the tests
from scipy.stats import wilcoxon
from scipy.stats import friedmanchisquare
from scipy.stats import mannwhitneyu
from scipy.stats import kruskal
from scipy.stats import chi2_contingency
from scipy.stats import ttest_1samp
from scipy.stats import ttest_rel
from scipy.stats import ttest_ind

In [5]:
#to import the dataset into the notebook

ds = pd.read_csv("general_data.csv")
ds.head()

Unnamed: 0,Age,Attrition,BusinessTravel,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeID,Gender,...,NumCompaniesWorked,Over18,PercentSalaryHike,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,YearsAtCompany,YearsSinceLastPromotion,YearsWithCurrManager
0,51,No,Travel_Rarely,Sales,6,2,Life Sciences,1,1,Female,...,1.0,Y,11,8,0,1.0,6,1,0,0
1,31,Yes,Travel_Frequently,Research & Development,10,1,Life Sciences,1,2,Female,...,0.0,Y,23,8,1,6.0,3,5,1,4
2,32,No,Travel_Frequently,Research & Development,17,4,Other,1,3,Male,...,1.0,Y,15,8,3,5.0,2,5,0,3
3,38,No,Non-Travel,Research & Development,2,5,Life Sciences,1,4,Male,...,3.0,Y,11,8,3,13.0,5,8,7,5
4,32,No,Travel_Rarely,Research & Development,10,1,Medical,1,5,Male,...,4.0,Y,12,8,2,9.0,2,6,0,4


>The dataset contains several numerical and categorical columns providing various information on employee's personal and employment details.

In [6]:
ds.info()
# we see that there are no null values in the dataset 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4410 entries, 0 to 4409
Data columns (total 24 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Age                      4410 non-null   int64  
 1   Attrition                4410 non-null   object 
 2   BusinessTravel           4410 non-null   object 
 3   Department               4410 non-null   object 
 4   DistanceFromHome         4410 non-null   int64  
 5   Education                4410 non-null   int64  
 6   EducationField           4410 non-null   object 
 7   EmployeeCount            4410 non-null   int64  
 8   EmployeeID               4410 non-null   int64  
 9   Gender                   4410 non-null   object 
 10  JobLevel                 4410 non-null   int64  
 11  JobRole                  4410 non-null   object 
 12  MaritalStatus            4410 non-null   object 
 13  MonthlyIncome            4410 non-null   int64  
 14  NumCompaniesWorked      

In [7]:
ds.shape

(4410, 24)

In [8]:
# Dataset columns
ds.columns

Index(['Age', 'Attrition', 'BusinessTravel', 'Department', 'DistanceFromHome',
       'Education', 'EducationField', 'EmployeeCount', 'EmployeeID', 'Gender',
       'JobLevel', 'JobRole', 'MaritalStatus', 'MonthlyIncome',
       'NumCompaniesWorked', 'Over18', 'PercentSalaryHike', 'StandardHours',
       'StockOptionLevel', 'TotalWorkingYears', 'TrainingTimesLastYear',
       'YearsAtCompany', 'YearsSinceLastPromotion', 'YearsWithCurrManager'],
      dtype='object')

#### Numerical features overview

# Assignment day 12

In [67]:
# Make a copy of the original sourcefile
ds_copy2 = ds.copy()

In [68]:
ds_copy2.head()

Unnamed: 0,Age,Attrition,BusinessTravel,Department,DistanceFromHome,Education,EducationField,EmployeeCount,EmployeeID,Gender,...,NumCompaniesWorked,Over18,PercentSalaryHike,StandardHours,StockOptionLevel,TotalWorkingYears,TrainingTimesLastYear,YearsAtCompany,YearsSinceLastPromotion,YearsWithCurrManager
0,51,No,Travel_Rarely,Sales,6,2,Life Sciences,1,1,Female,...,1.0,Y,11,8,0,1.0,6,1,0,0
1,31,Yes,Travel_Frequently,Research & Development,10,1,Life Sciences,1,2,Female,...,0.0,Y,23,8,1,6.0,3,5,1,4
2,32,No,Travel_Frequently,Research & Development,17,4,Other,1,3,Male,...,1.0,Y,15,8,3,5.0,2,5,0,3
3,38,No,Non-Travel,Research & Development,2,5,Life Sciences,1,4,Male,...,3.0,Y,11,8,3,13.0,5,8,7,5
4,32,No,Travel_Rarely,Research & Development,10,1,Medical,1,5,Male,...,4.0,Y,12,8,2,9.0,2,6,0,4


In [69]:
# to convert Yes = 1 and No = 0 into a new column Target
ds_copy2['Attrition'] = ds_copy2['Attrition'].apply(lambda x: 0 if x == 'No' else 1)

In [70]:
# Convert catogrical variable type to numeric type
ds_copy2['BusinessTravel'] = ds_copy2['BusinessTravel'].map({'Non-Travel':1, 'Travel_Rarely':2, 'Travel_Frequently':3})

ds_copy2['MaritalStatus'] = ds_copy2['MaritalStatus'].map({'Single':1, 'Married':2, 'Divorced':3})

ds_copy2['Department'] = ds_copy2['Department'].map({'Sales':1, 'Research & Development':2, 'Human Resources':3})

ds_copy2['EducationField'] = ds_copy2['EducationField'].map({'Life Sciences':1, 'Medical':2, 
                                                             'Marketing':3, 'Technical Degree':4, 
                                                             'Human Resources':5, 'Other':6})

ds_copy2['Gender'] = ds_copy2['Gender'].map({'Male':1, 'Female':2})

ds_copy2['JobRole'] = ds_copy2['JobRole'].map({'Healthcare Representative':1, 
                                              'Research Scientist':2, 'Sales Executive':3, 
                                              'Human Resources':4, 'Research Director':5, 
                                              'Manufacturing Director':6 ,'Manager':7,
                                              'Sales Representative':8 ,'Laboratory Technician':9})

In [71]:
# to display the newly modified coluumns 
ds_copy2[['Attrition','BusinessTravel','MaritalStatus','Department','EducationField','Gender','JobRole']].head(10)

Unnamed: 0,Attrition,BusinessTravel,MaritalStatus,Department,EducationField,Gender,JobRole
0,0,2,2,1,1,2,1
1,1,3,1,2,1,2,2
2,0,3,2,2,6,1,3
3,0,1,2,2,1,1,4
4,0,2,1,2,2,1,3
5,0,2,2,2,1,2,5
6,1,2,1,2,2,1,3
7,0,2,2,2,1,1,3
8,0,2,2,2,1,1,9
9,0,1,3,2,2,2,9


In [72]:
# To drop the columns which are not of any signifigance anymore
ds_copy2 = ds_copy2.drop([ 'EmployeeCount', 'EmployeeID', 'StandardHours', 'Over18'], axis=1)

In [73]:
ds_copy2.columns

Index(['Age', 'Attrition', 'BusinessTravel', 'Department', 'DistanceFromHome',
       'Education', 'EducationField', 'Gender', 'JobLevel', 'JobRole',
       'MaritalStatus', 'MonthlyIncome', 'NumCompaniesWorked',
       'PercentSalaryHike', 'StockOptionLevel', 'TotalWorkingYears',
       'TrainingTimesLastYear', 'YearsAtCompany', 'YearsSinceLastPromotion',
       'YearsWithCurrManager'],
      dtype='object')

In [75]:
ds_copy2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4410 entries, 0 to 4409
Data columns (total 20 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Age                      4410 non-null   int64  
 1   Attrition                4410 non-null   int64  
 2   BusinessTravel           4410 non-null   int64  
 3   Department               4410 non-null   int64  
 4   DistanceFromHome         4410 non-null   int64  
 5   Education                4410 non-null   int64  
 6   EducationField           4410 non-null   int64  
 7   Gender                   4410 non-null   int64  
 8   JobLevel                 4410 non-null   int64  
 9   JobRole                  4410 non-null   int64  
 10  MaritalStatus            4410 non-null   int64  
 11  MonthlyIncome            4410 non-null   int64  
 12  NumCompaniesWorked       4391 non-null   float64
 13  PercentSalaryHike        4410 non-null   int64  
 14  StockOptionLevel        

#### now we have all columns as numeric 

# 1. Wilcoxon - sign test

> this is for comparing two paried samples
> since the data set has no 2 paried samples(duplicates) this test cant not be used 

# 2. Friedman test

> this is for comparing more than two paried samples
> since the data set has no 2 or more paried samples(duplicates) this test cant not be used 

# 3. Mannwhitney test

> this is for comparing two independent samples
> since the data set has no 2 independent samples this test cant not be used 

# 4. Kruskal-Walli's test

> this is for comparing more tha two independent samples
> since the data set has no 2 or more independent samples this test cant not be used 

# 5. Chi-Suare test

> this is for checking dependency for categorrical variables 
we can use this test for all the columns with Attrition as refrence 

In [130]:
chitable = pd.crosstab(ds_copy2.Attrition,ds_copy2.BusinessTravel)
print(chitable)
stats,p,dof,expeted = chi2_contingency(chitable)
print(stats,p) 

BusinessTravel    1     2    3
Attrition                     
0               414  2661  624
1                36   468  207
72.5472410569655 1.7642769729832015e-16


In [131]:
chitable = pd.crosstab(ds_copy2.Attrition, ds_copy2.Age)
print(chitable) 
stats,p,dof,expeted = chi2_contingency(chitable)
print(stats,p) 

chitable = pd.crosstab(ds_copy2.Attrition, ds_copy2.Department)
print(chitable) 
stats,p,dof,expeted = chi2_contingency(chitable)
print(stats,p) 

chitable = pd.crosstab(ds_copy2.Attrition, ds_copy2.DistanceFromHome)
print(chitable) 
stats,p,dof,expeted = chi2_contingency(chitable)
print(stats,p) 

chitable = pd.crosstab(ds_copy2.Attrition, ds_copy2.Education)
print(chitable) 
stats,p,dof,expeted = chi2_contingency(chitable)
print(stats,p) 

chitable = pd.crosstab(ds_copy2.Attrition, ds_copy2.EducationField)
print(chitable) 
stats,p,dof,expeted = chi2_contingency(chitable)
print(stats,p) 

chitable = pd.crosstab(ds_copy2.Attrition, ds_copy2.Gender)
print(chitable) 
stats,p,dof,expeted = chi2_contingency(chitable)
print(stats,p) 

chitable = pd.crosstab(ds_copy2.Attrition, ds_copy2.JobLevel)
print(chitable) 
stats,p,dof,expeted = chi2_contingency(chitable)
print(stats,p) 

chitable = pd.crosstab(ds_copy2.Attrition, ds_copy2.JobRole)
print(chitable) 
stats,p,dof,expeted = chi2_contingency(chitable)
print(stats,p) 

chitable = pd.crosstab(ds_copy2.Attrition, ds_copy2.MaritalStatus)
print(chitable) 
stats,p,dof,expeted = chi2_contingency(chitable)
print(stats,p) 

chitable = pd.crosstab(ds_copy2.Attrition, ds_copy2.MonthlyIncome)
print(chitable) 
stats,p,dof,expeted = chi2_contingency(chitable)
print(stats,p) 

chitable = pd.crosstab(ds_copy2.Attrition, ds_copy2.NumCompaniesWorked)
print(chitable) 
stats,p,dof,expeted = chi2_contingency(chitable)
print(stats,p) 

chitable = pd.crosstab(ds_copy2.Attrition, ds_copy2.PercentSalaryHike)
print(chitable) 
stats,p,dof,expeted = chi2_contingency(chitable)
print(stats,p) 

chitable = pd.crosstab(ds_copy2.Attrition, ds_copy2.StockOptionLevel)
print(chitable) 
stats,p,dof,expeted = chi2_contingency(chitable)
print(stats,p) 

chitable = pd.crosstab(ds_copy2.Attrition, ds_copy2.TotalWorkingYears)
print(chitable) 
stats,p,dof,expeted = chi2_contingency(chitable)
print(stats,p) 

chitable = pd.crosstab(ds_copy2.Attrition, ds_copy2.TrainingTimesLastYear)
print(chitable) 
stats,p,dof,expeted = chi2_contingency(chitable)
print(stats,p) 

chitable = pd.crosstab(ds_copy2.Attrition, ds_copy2.YearsAtCompany)
print(chitable) 
stats,p,dof,expeted = chi2_contingency(chitable)
print(stats,p) 

chitable = pd.crosstab(ds_copy2.Attrition, ds_copy2.YearsSinceLastPromotion)
print(chitable) 
stats,p,dof,expeted = chi2_contingency(chitable)
print(stats,p) 

chitable = pd.crosstab(ds_copy2.Attrition, ds_copy2.YearsWithCurrManager)
print(chitable) 
stats,p,dof,expeted = chi2_contingency(chitable)
print(stats,p) 


Age        18  19  20  21  22  23  24  25  26   27  ...  51  52  53  54  55  \
Attrition                                           ...                       
0          12   9  15  21  33  30  57  60  81  135  ...  51  45  51  54  57   
1          12  18  18  18  15  12  21  18  36    9  ...   6   9   6   0   9   

Age        56  57  58  59  60  
Attrition                      
0          33  12  27  30  15  
1           9   0  15   0   0  

[2 rows x 43 columns]
357.52484648609584 1.1886524349105674e-51
Department     1     2    3
Attrition                  
0           1137  2430  132
1            201   453   57
29.090274924488266 4.820888218170406e-07
DistanceFromHome   1    2    3    4    5    6    7    8    9    10  ...  20  \
Attrition                                                           ...       
0                 528  519  222  159  165  156  204  207  219  198  ...  66   
1                  96  114   30   33   30   21   48   33   36   60  ...   9   

DistanceFromHome  21

# Parametric tests
## 1. one sample t test
> our column of interest is Attrition only

In [133]:
stats,p = ttest_1samp(ds_copy2.Attrition,0)
print(stats,p) 

29.111372682591252 1.420878878017739e-170


## 2. two sample t test 
> no column/rows are paried samples

## 3. two sample independent t test 
> no column/rows are dependent and contiunes  samples

# proofs for above improper tests 

### kruskal

In [107]:
stats,p = kruskal(ds_copy2.Attrition, ds_copy2.Age)
print(stats,p) 
stats,p = kruskal(ds_copy2.Attrition, ds_copy2.BusinessTravel)
print(stats,p) 
stats,p = kruskal(ds_copy2.Attrition, ds_copy2.Department)
print(stats,p) 
stats,p = kruskal(ds_copy2.Attrition, ds_copy2.DistanceFromHome)
print(stats,p) 
stats,p = kruskal(ds_copy2.Attrition, ds_copy2.Education)
print(stats,p) 
stats,p = kruskal(ds_copy2.Attrition, ds_copy2.EducationField)
print(stats,p) 
stats,p = kruskal(ds_copy2.Attrition, ds_copy2.Gender)
print(stats,p) 
stats,p = kruskal(ds_copy2.Attrition, ds_copy2.JobLevel)
print(stats,p) 
stats,p = kruskal(ds_copy2.Attrition, ds_copy2.JobRole)
print(stats,p) 
stats,p = kruskal(ds_copy2.Attrition, ds_copy2.MaritalStatus)
print(stats,p) 
stats,p = kruskal(ds_copy2.Attrition, ds_copy2.MonthlyIncome)
print(stats,p) 
stats,p = kruskal(ds_copy2.Attrition, ds_copy2.NumCompaniesWorked)
print(stats,p) 
stats,p = kruskal(ds_copy2.Attrition, ds_copy2.PercentSalaryHike)
print(stats,p) 
stats,p = kruskal(ds_copy2.Attrition, ds_copy2.StockOptionLevel)
print(stats,p) 
stats,p = kruskal(ds_copy2.Attrition, ds_copy2.TotalWorkingYears)
print(stats,p) 
stats,p = kruskal(ds_copy2.Attrition, ds_copy2.TrainingTimesLastYear)
print(stats,p) 
stats,p = kruskal(ds_copy2.Attrition, ds_copy2.YearsAtCompany)
print(stats,p) 
stats,p = kruskal(ds_copy2.Attrition, ds_copy2.YearsSinceLastPromotion)
print(stats,p) 
stats,p = kruskal(ds_copy2.Attrition, ds_copy2.YearsWithCurrManager)
print(stats,p) 

7146.223848980246 0.0
7283.590756342403 0.0
6808.423337203369 0.0
6848.431941577725 0.0
6978.038250048392 0.0
6415.707613339291 0.0
6252.46425142523 0.0
6491.935423717814 0.0
6976.427798120716 0.0
6618.688220118341 0.0
7145.04395599951 0.0
nan nan
7156.59239983065 0.0
1742.343584875534 0.0
nan nan
6599.184376163454 0.0
6477.796111996682 0.0
2201.844645126309 0.0
4699.79602612524 0.0


### mannwhitneyu

In [134]:
stats,p = mannwhitneyu(ds_copy2.Attrition, ds_copy2.Age)
print(stats,p) 
stats,p = mannwhitneyu(ds_copy2.Attrition, ds_copy2.BusinessTravel)
print(stats,p) 
stats,p = mannwhitneyu(ds_copy2.Attrition, ds_copy2.Department)
print(stats,p) 
stats,p = mannwhitneyu(ds_copy2.Attrition, ds_copy2.DistanceFromHome)
print(stats,p) 
stats,p = mannwhitneyu(ds_copy2.Attrition, ds_copy2.Education)
print(stats,p) 
stats,p = mannwhitneyu(ds_copy2.Attrition, ds_copy2.EducationField)
print(stats,p) 
stats,p = mannwhitneyu(ds_copy2.Attrition, ds_copy2.Gender)
print(stats,p) 
stats,p = mannwhitneyu(ds_copy2.Attrition, ds_copy2.JobLevel)
print(stats,p) 
stats,p = mannwhitneyu(ds_copy2.Attrition, ds_copy2.JobRole)
print(stats,p) 
stats,p = mannwhitneyu(ds_copy2.Attrition, ds_copy2.MaritalStatus)
print(stats,p) 
stats,p = mannwhitneyu(ds_copy2.Attrition, ds_copy2.MonthlyIncome)
print(stats,p) 
stats,p = mannwhitneyu(ds_copy2.Attrition, ds_copy2.NumCompaniesWorked)
print(stats,p) 
stats,p = mannwhitneyu(ds_copy2.Attrition, ds_copy2.PercentSalaryHike)
print(stats,p) 
stats,p = mannwhitneyu(ds_copy2.Attrition, ds_copy2.StockOptionLevel)
print(stats,p) 
stats,p = mannwhitneyu(ds_copy2.Attrition, ds_copy2.TotalWorkingYears)
print(stats,p) 
stats,p = mannwhitneyu(ds_copy2.Attrition, ds_copy2.TrainingTimesLastYear)
print(stats,p) 
stats,p = mannwhitneyu(ds_copy2.Attrition, ds_copy2.YearsAtCompany)
print(stats,p) 
stats,p = mannwhitneyu(ds_copy2.Attrition, ds_copy2.YearsSinceLastPromotion)
print(stats,p) 
stats,p = mannwhitneyu(ds_copy2.Attrition, ds_copy2.YearsWithCurrManager)
print(stats,p) 

0.0 0.0
159975.0 0.0
475659.0 0.0
221832.0 0.0
181305.0 0.0
646299.0 0.0
940653.0 0.0
579109.5 0.0
139711.5 0.0
501255.0 0.0
0.0 0.0
2054322.0 0.0
0.0 0.0
5482660.5 0.0
170527.5 0.0
490522.5 0.0
520357.5 0.0
4843692.0 0.0
2101288.5 0.0


### wilcoxon

In [135]:
stats,p = wilcoxon(ds_copy2.Attrition, ds_copy2.Age)
print(stats,p) 
stats,p = wilcoxon(ds_copy2.Attrition, ds_copy2.BusinessTravel)
print(stats,p) 
stats,p = wilcoxon(ds_copy2.Attrition, ds_copy2.Department)
print(stats,p) 
stats,p = wilcoxon(ds_copy2.Attrition, ds_copy2.DistanceFromHome)
print(stats,p) 
stats,p = wilcoxon(ds_copy2.Attrition, ds_copy2.Education)
print(stats,p) 
stats,p = wilcoxon(ds_copy2.Attrition, ds_copy2.EducationField)
print(stats,p) 
stats,p = wilcoxon(ds_copy2.Attrition, ds_copy2.Gender)
print(stats,p) 
stats,p = wilcoxon(ds_copy2.Attrition, ds_copy2.JobLevel)
print(stats,p) 
stats,p = wilcoxon(ds_copy2.Attrition, ds_copy2.JobRole)
print(stats,p) 
stats,p = wilcoxon(ds_copy2.Attrition, ds_copy2.MaritalStatus)
print(stats,p) 
stats,p = wilcoxon(ds_copy2.Attrition, ds_copy2.MonthlyIncome)
print(stats,p) 
stats,p = wilcoxon(ds_copy2.Attrition, ds_copy2.NumCompaniesWorked)
print(stats,p) 
stats,p = wilcoxon(ds_copy2.Attrition, ds_copy2.PercentSalaryHike)
print(stats,p) 
stats,p = wilcoxon(ds_copy2.Attrition, ds_copy2.StockOptionLevel)
print(stats,p) 
stats,p = wilcoxon(ds_copy2.Attrition, ds_copy2.TotalWorkingYears)
print(stats,p) 
stats,p = wilcoxon(ds_copy2.Attrition, ds_copy2.TrainingTimesLastYear)
print(stats,p) 
stats,p = wilcoxon(ds_copy2.Attrition, ds_copy2.YearsAtCompany)
print(stats,p) 
stats,p = wilcoxon(ds_copy2.Attrition, ds_copy2.YearsSinceLastPromotion)
print(stats,p) 
stats,p = wilcoxon(ds_copy2.Attrition, ds_copy2.YearsWithCurrManager)
print(stats,p) 

0.0 0.0
0.0 0.0
0.0 0.0
0.0 0.0
0.0 0.0
0.0 0.0
0.0 0.0
0.0 0.0
0.0 0.0
0.0 0.0
0.0 0.0
47713.5 0.0
0.0 0.0
305439.0 0.0
1245.0 0.0
7440.0 0.0
11184.0 0.0
220440.0 0.0
76627.5 0.0


  r_plus = np.sum((d > 0) * r, axis=0)
  r_minus = np.sum((d < 0) * r, axis=0)


### friedmanchisquare

In [105]:

stats,p = friedmanchisquare(ds_copy2.Attrition, ds_copy2.Age)
print(stats,p) 
stats,p = friedmanchisquare(ds_copy2.Attrition, ds_copy2.BusinessTravel)
print(stats,p) 
stats,p = friedmanchisquare(ds_copy2.Attrition, ds_copy2.Department)
print(stats,p) 
stats,p = friedmanchisquare(ds_copy2.Attrition, ds_copy2.DistanceFromHome)
print(stats,p) 
stats,p = friedmanchisquare(ds_copy2.Attrition, ds_copy2.Education)
print(stats,p) 
stats,p = friedmanchisquare(ds_copy2.Attrition, ds_copy2.EducationField)
print(stats,p) 
stats,p = friedmanchisquare(ds_copy2.Attrition, ds_copy2.Gender)
print(stats,p) 
stats,p = friedmanchisquare(ds_copy2.Attrition, ds_copy2.JobLevel)
print(stats,p) 
stats,p = friedmanchisquare(ds_copy2.Attrition, ds_copy2.JobRole)
print(stats,p) 
stats,p = friedmanchisquare(ds_copy2.Attrition, ds_copy2.MaritalStatus)
print(stats,p) 
stats,p = friedmanchisquare(ds_copy2.Attrition, ds_copy2.MonthlyIncome)
print(stats,p) 
stats,p = friedmanchisquare(ds_copy2.Attrition, ds_copy2.NumCompaniesWorked)
print(stats,p) 
stats,p = friedmanchisquare(ds_copy2.Attrition, ds_copy2.PercentSalaryHike)
print(stats,p) 
stats,p = friedmanchisquare(ds_copy2.Attrition, ds_copy2.StockOptionLevel)
print(stats,p) 
stats,p = friedmanchisquare(ds_copy2.Attrition, ds_copy2.TotalWorkingYears)
print(stats,p) 
stats,p = friedmanchisquare(ds_copy2.Attrition, ds_copy2.TrainingTimesLastYear)
print(stats,p) 
stats,p = friedmanchisquare(ds_copy2.Attrition, ds_copy2.YearsAtCompany)
print(stats,p) 
stats,p = friedmanchisquare(ds_copy2.Attrition, ds_copy2.YearsSinceLastPromotion)
print(stats,p) 
stats,p = friedmanchisquare(ds_copy2.Attrition, ds_copy2.YearsWithCurrManager)
print(stats,p) 

ValueError: Less than 3 levels.  Friedman test not appropriate.

### 2 sample independent test

In [128]:
stats,p = ttest_ind(ds_copy2.Attrition, ds_copy2.Age)
print(stats,p) 
stats,p = ttest_ind(ds_copy2.Attrition, ds_copy2.BusinessTravel)
print(stats,p) 
stats,p = ttest_ind(ds_copy2.Attrition, ds_copy2.Department)
print(stats,p) 
stats,p = ttest_ind(ds_copy2.Attrition, ds_copy2.DistanceFromHome)
print(stats,p) 
stats,p = ttest_ind(ds_copy2.Attrition, ds_copy2.Education)
print(stats,p) 
stats,p = ttest_ind(ds_copy2.Attrition, ds_copy2.EducationField)
print(stats,p) 
stats,p = ttest_ind(ds_copy2.Attrition, ds_copy2.Gender)
print(stats,p) 
stats,p = ttest_ind(ds_copy2.Attrition, ds_copy2.JobLevel)
print(stats,p) 
stats,p = ttest_ind(ds_copy2.Attrition, ds_copy2.JobRole)
print(stats,p) 
stats,p = ttest_ind(ds_copy2.Attrition, ds_copy2.MaritalStatus)
print(stats,p) 
stats,p = ttest_ind(ds_copy2.Attrition, ds_copy2.MonthlyIncome)
print(stats,p) 
stats,p = ttest_ind(ds_copy2.Attrition, ds_copy2.NumCompaniesWorked)
print(stats,p) 
stats,p = ttest_ind(ds_copy2.Attrition, ds_copy2.PercentSalaryHike)
print(stats,p) 
stats,p = ttest_ind(ds_copy2.Attrition, ds_copy2.StockOptionLevel)
print(stats,p) 
stats,p = ttest_ind(ds_copy2.Attrition, ds_copy2.TotalWorkingYears)
print(stats,p) 
stats,p = ttest_ind(ds_copy2.Attrition, ds_copy2.TrainingTimesLastYear)
print(stats,p) 
stats,p = ttest_ind(ds_copy2.Attrition, ds_copy2.YearsAtCompany)
print(stats,p) 
stats,p = ttest_ind(ds_copy2.Attrition, ds_copy2.YearsSinceLastPromotion)
print(stats,p) 
stats,p = ttest_ind(ds_copy2.Attrition, ds_copy2.YearsWithCurrManager)
print(stats,p) 

-267.08268402194506 0.0
-197.66266258570005 0.0
-162.9472709882222 0.0
-73.92105563691779 0.0
-167.95761114128968 0.0
-92.42726075591335 0.0
-134.2805823822782 0.0
-108.34814220120998 0.0
-106.94530117922253 0.0
-141.48890355184625 0.0
-91.74733118564392 0.0
nan nan
-271.7370409532765 0.0
-45.278457422325424 0.0
nan nan
-130.69794899443974 0.0
-74.10006092710509 0.0
-41.50265814554052 0.0
-73.36426551326637 0.0


### 2 sample paried test

In [129]:
stats,p = ttest_rel(ds_copy2.Attrition, ds_copy2.Age)
print(stats,p) 
stats,p = ttest_rel(ds_copy2.Attrition, ds_copy2.BusinessTravel)
print(stats,p) 
stats,p = ttest_rel(ds_copy2.Attrition, ds_copy2.Department)
print(stats,p) 
stats,p = ttest_rel(ds_copy2.Attrition, ds_copy2.DistanceFromHome)
print(stats,p) 
stats,p = ttest_rel(ds_copy2.Attrition, ds_copy2.Education)
print(stats,p) 
stats,p = ttest_rel(ds_copy2.Attrition, ds_copy2.EducationField)
print(stats,p) 
stats,p = ttest_rel(ds_copy2.Attrition, ds_copy2.Gender)
print(stats,p) 
stats,p = ttest_rel(ds_copy2.Attrition, ds_copy2.JobLevel)
print(stats,p) 
stats,p = ttest_rel(ds_copy2.Attrition, ds_copy2.JobRole)
print(stats,p) 
stats,p = ttest_rel(ds_copy2.Attrition, ds_copy2.MaritalStatus)
print(stats,p) 
stats,p = ttest_rel(ds_copy2.Attrition, ds_copy2.MonthlyIncome)
print(stats,p) 
stats,p = ttest_rel(ds_copy2.Attrition, ds_copy2.NumCompaniesWorked)
print(stats,p) 
stats,p = ttest_rel(ds_copy2.Attrition, ds_copy2.PercentSalaryHike)
print(stats,p) 
stats,p = ttest_rel(ds_copy2.Attrition, ds_copy2.StockOptionLevel)
print(stats,p) 
stats,p = ttest_rel(ds_copy2.Attrition, ds_copy2.TotalWorkingYears)
print(stats,p) 
stats,p = ttest_rel(ds_copy2.Attrition, ds_copy2.TrainingTimesLastYear)
print(stats,p) 
stats,p = ttest_rel(ds_copy2.Attrition, ds_copy2.YearsAtCompany)
print(stats,p) 
stats,p = ttest_rel(ds_copy2.Attrition, ds_copy2.YearsSinceLastPromotion)
print(stats,p) 
stats,p = ttest_rel(ds_copy2.Attrition, ds_copy2.YearsWithCurrManager)
print(stats,p) 

-265.38946409068467 0.0
-210.56726646532707 0.0
-166.76195680636746 0.0
-73.88850643856405 0.0
-167.1559384969801 0.0
-92.13903993178118 0.0
-133.1270918535975 0.0
-108.01602487088296 0.0
-106.68542008956325 0.0
-133.08683897558444 0.0
-91.74730883596561 0.0
nan nan
-272.62099503261675 0.0
-45.1661939355849 0.0
nan nan
-129.0260139200504 0.0
-73.51135021011144 0.0
-41.34909014847404 0.0
-72.22247856599837 0.0
