# HYPOTHESIS TESTING USING CRITICAL VALUE

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import norm, t

In [2]:
df = pd.read_csv('general_data.csv')
df = df.drop_duplicates() 
df.dropna(inplace = True)
dfattrition = df[df.Attrition == 'Yes'].reset_index(drop=True).copy()

In [3]:
dfattrition.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
Age,705.0,33.628369,9.678836,18.0,28.0,32.0,39.0,58.0
DistanceFromHome,705.0,9.024113,7.755184,1.0,2.0,7.0,15.0,29.0
Education,705.0,2.87234,1.014463,1.0,2.0,3.0,4.0,5.0
EmployeeCount,705.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0
EmployeeID,705.0,2194.387234,1270.408987,2.0,1075.0,2165.0,3343.0,4403.0
JobLevel,705.0,2.032624,1.048707,1.0,1.0,2.0,2.0,5.0
MonthlyIncome,705.0,61814.950355,44890.457122,10090.0,28440.0,49080.0,71040.0,198590.0
NumCompaniesWorked,705.0,2.937589,2.681283,0.0,1.0,1.0,5.0,9.0
PercentSalaryHike,705.0,15.487943,3.785842,11.0,12.0,14.0,18.0,25.0
StandardHours,705.0,8.0,0.0,8.0,8.0,8.0,8.0,8.0


In [4]:
## Make function to test the hypotesis
def checking_hypotesis(sample, alpha, column_name, df, tail, random):
    sample_df = df[column_name].sample(sample, random_state = random)
    stddev_population  = df.describe().T.loc[column_name, 'std']
    mean_population = df.describe().T.loc[column_name, 'mean']

    mean_sample= sample_df.mean()
    stddev_sample = sample_df.std()
    
    print( 'Mean of Sample :', mean_sample, ', Mean of Population :', mean_population)
    if sample >= 30 :
        print('\n*** Using Z test, sample >= 30 ***')
        Ztest = (mean_sample - mean_population)/(stddev_population/sample**(0.5))

        print('Value of Z test :', Ztest)
        if tail == 'left_tail':
            critical_value = - norm.ppf(1-alpha)
            print('Value of Critical Val :', critical_value )
            if Ztest > critical_value:
                return 'Fail to Reject Ho, there is not enough evidence'
            else:
                return 'Reject Ho'

        elif tail == 'right_tail':
            critical_value = norm.ppf(1-alpha)
            print('Value of Critical Val :', critical_value )
            if Ztest < critical_value:
                return 'Fail to Reject Ho, there is not enough evidence'
            else:
                return 'Reject Ho'

        else:
            critical_value = norm.ppf(1-(alpha/2))

            print('Value of Critical Value :', critical_value )
            if Ztest < critical_value and Ztest > -critical_value:
                return 'Fail to Reject Ho, there is not enough evidence'
            else:
                return 'Reject Ho'
    else :
        print('\n*** Using T test ***, sample < 30')
        Ttest = (mean_sample-mean_population)/(stddev_sample/sample**(0.5))
        print('Value of T test :', Ttest)
        if tail == 'left_tail':
            critical_value = - t.ppf(1-alpha, sample-1)
            print('Value of Critical Val :', critical_value )
            if Ttest > critical_value:
                return 'Fail to Reject Ho, there is not enough evidence'
            else:
                return 'Reject Ho'

        elif tail == 'right_tail':
            critical_value = t.ppf(1-alpha, sample-1)
            print('Value of Critical Value :', critical_value )
            if Ttest < critical_value:
                return 'Fail to Reject Ho, there is not enough evidence'
            else:
                return 'Reject Ho'

        else:
            critical_value = t.ppf(1-(alpha/2), sample-1)
            print('Value of Critical Val :', critical_value )
            if Ttest < critical_value and Ttest > -critical_value:
                return 'Fail to Reject Ho, there is not enough evidence'
            else:
                return 'Reject Ho'

## Hypotesis 1 (DistanceFromHome)

Average DistanceFromHome of leaving employee is 9, where standard deviation is 7.75  <br>
A researcher believe that average distance is lower. <br>
Sample of 30 people have mean 7.9 <br>

So; <br>
Ho >= 9 <br>
Ha < 9 <br>
alpha = 5% <br>

In [5]:
sample = 30
alpha = 0.05
tail = 'left_tail'

result = checking_hypotesis(sample, alpha, 'DistanceFromHome', dfattrition , tail, 10)
print('\nThe result is We', result)

Mean of Sample : 7.933333333333334 , Mean of Population : 9.024113475177305

*** Using Z test, sample >= 30 ***
Value of Z test : -0.7703812899550473
Value of Critical Val : -1.6448536269514722

The result is We Fail to Reject Ho, there is not enough evidence


## Hypotesis 2 (Age)

Average age of leaving employee is 33.6, where standard deviation is 9.67  <br>
A researcher believe that average age is higher. <br>
Sample of 35 people have mean 37 <br>

So; <br>
Ho <= 33.6 <br>
Ha > 33.6 <br>
alpha = 5% <br>

In [6]:
sample = 35
alpha = 0.05
tail = 'right_tail'

result = checking_hypotesis(sample, alpha, 'Age', dfattrition , tail, 10)
print('\nThe result is We', result)

Mean of Sample : 37.05714285714286 , Mean of Population : 33.62836879432624

*** Using Z test, sample >= 30 ***
Value of Z test : 2.0957995235742177
Value of Critical Val : 1.6448536269514722

The result is We Reject Ho


## Hypotesis 3 (Education)

Average age of leaving employee is 2.87, where standard deviation is 1 <br>
A researcher believe that average of Education is higher. <br>
Sample of 35 people have mean 3 <br>

So; <br>
Ho <= 2.87 <br>
Ha > 2.87 <br>
alpha = 5% <br>

In [7]:
sample = 35
alpha = 0.05
tail = 'right_tail'

result = checking_hypotesis(sample, alpha, 'Education', dfattrition , tail, 30)
print('\nThe Result is We', result)

Mean of Sample : 3.085714285714286 , Mean of Population : 2.872340425531915

*** Using Z test, sample >= 30 ***
Value of Z test : 1.2443403453939608
Value of Critical Val : 1.6448536269514722

The Result is We Fail to Reject Ho, there is not enough evidence


## Hypotesis 4 (YearsAtCompany)

Average YearsAtCompany of leaving employee is 5.15, where standard deviation is 5.96 <br>
A researcher believe that average of year at company is lower. <br>
Sample of 35 people have mean 4.7 <br>

So; <br>
Ho >= 5.15 <br>
Ha < 5.15 <br>
alpha = 5% <br>

In [8]:
sample = 35
alpha = 0.05
tail = 'left_tail'

result = checking_hypotesis(sample, alpha, 'YearsAtCompany', dfattrition , tail, 40)
print('\nThe Result is We', result)

Mean of Sample : 4.714285714285714 , Mean of Population : 5.148936170212766

*** Using Z test, sample >= 30 ***
Value of Z test : -0.43137711127029127
Value of Critical Val : -1.6448536269514722

The Result is We Fail to Reject Ho, there is not enough evidence


## Hypotesis 5 (MonthlyIncome)

Average MonthlyIncome of leaving employee is 61815, where standard deviation is 44890 <br>
A researcher believe that average of Monthly income is higher. <br>
Sample of 35 people have mean 74723 <br>

So; <br>
Ho <= 61815 <br>
Ha > 61815 <br>
alpha = 5% <br>

In [9]:
sample = 35
alpha = 0.05
tail = 'right_tail'

result = checking_hypotesis(sample, alpha, 'MonthlyIncome', dfattrition , tail, 20)
print('\nThe Result is We', result)

Mean of Sample : 74723.14285714286 , Mean of Population : 61814.95035460993

*** Using Z test, sample >= 30 ***
Value of Z test : 1.7011610394692747
Value of Critical Val : 1.6448536269514722

The Result is We Reject Ho


## Hypotesis 6 (TotalWorkingYears)

Average TotalWorkingYears of leaving employee is 8.2, where standard deviation is 7.1 <br>
A researcher believe that average of Total Working years is higher. <br>
Sample of 35 people have mean 8.8 <br>

So; <br>
Ho <= 8.2 <br>
Ha > 8.2 <br>
alpha = 5% <br>

In [10]:
sample = 35
alpha = 0.05
tail = 'right_tail'

result = checking_hypotesis(sample, alpha, 'TotalWorkingYears', dfattrition , tail, 20)
print('\nThe Result is We', result)

Mean of Sample : 8.885714285714286 , Mean of Population : 8.273758865248228

*** Using Z test, sample >= 30 ***
Value of Z test : 0.5044582772983476
Value of Critical Val : 1.6448536269514722

The Result is We Fail to Reject Ho, there is not enough evidence


## Hypotesis 7 (JobLevel)

Average JobLevel of leaving employee is 2, where standard deviation is 1 <br>
A researcher believe that average of JobLevel is lower. <br>
Sample of 35 people have mean 1.85 <br>

So; <br>
Ho >= 2 <br>
Ha < 2 <br>
alpha = 5% <br>

In [11]:
sample = 35
alpha = 0.05
tail = 'left_tail'

result = checking_hypotesis(sample, alpha, 'JobLevel', dfattrition , tail, 9)
print('\nThe Result is We', result)

Mean of Sample : 1.8571428571428572 , Mean of Population : 2.0326241134751775

*** Using Z test, sample >= 30 ***
Value of Z test : -0.9899438893283818
Value of Critical Val : -1.6448536269514722

The Result is We Fail to Reject Ho, there is not enough evidence


## Hypotesis 8 (PercentSalaryHike)

Average PercentSalaryHike of leaving employee is 15.4, where standard deviation is 3.78 <br>
A researcher believe that average of PercentSalaryHike is not equal that value. <br>
Sample of 35 people have mean 15.9 <br>

So; <br>
Ho = 15.4 <br>
Ha != 15.4 <br>
alpha = 5% <br>

In [12]:
sample = 35
alpha = 0.05
tail = 'two_tail'

result = checking_hypotesis(sample, alpha, 'PercentSalaryHike', dfattrition , tail, 10)
print('\nThe Result is We', result)

Mean of Sample : 15.942857142857143 , Mean of Population : 15.487943262411347

*** Using Z test, sample >= 30 ***
Value of Z test : 0.7108873435783161
Value of Critical Value : 1.959963984540054

The Result is We Fail to Reject Ho, there is not enough evidence


## Hypotesis 9 (TrainingTimesLastYear)

Average PercentSalaryHike of leaving employee is 2.65, where standard deviation is 1.15 <br>
A researcher believe that average of TrainingTimesLastYear is not equal that value. <br>
Sample of 35 people have mean 3.31 <br>

So; <br>
Ho = 2.65 <br>
Ha != 2.65 <br>
alpha = 5% <br>

In [13]:
sample = 35
alpha = 0.05
tail = 'two_tail'

result = checking_hypotesis(sample, alpha, 'TrainingTimesLastYear', dfattrition , tail, 30)
print('\nThe Result is We', result)

Mean of Sample : 3.3142857142857145 , Mean of Population : 2.6581560283687944

*** Using Z test, sample >= 30 ***
Value of Z test : 3.3581798383565724
Value of Critical Value : 1.959963984540054

The Result is We Reject Ho


## Hypotesis 10 (YearsWithCurrManager)

Average YearsWithCurrManager of leaving employee is 2.86, where standard deviation is 3.15 <br>
A researcher believe that average of YearsWithCurrManager is not equal that value. <br>
Sample of 35 people have mean 1.85 <br>

So; <br>
Ho = 2.86 <br>
Ha != 2.86 <br>
alpha = 5% <br>

In [17]:
sample = 35
alpha = 0.05
tail = 'two_tail'

result = checking_hypotesis(sample, alpha, 'YearsWithCurrManager', dfattrition , tail, 30)
print('\nThe Result is We', result)

Mean of Sample : 2.7142857142857144 , Mean of Population : 2.8652482269503547

*** Using Z test, sample >= 30 ***
Value of Z test : -0.2838356169973338
Value of Critical Value : 1.959963984540054

The Result is We Fail to Reject Ho, there is not enough evidence
