In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats as stats
from scipy.stats import f_oneway
from scipy.stats import chi2
from scipy.stats import chisquare
from scipy.stats import chi2_contingency
import statsmodels.api as sm
from statsmodels.stats import weightstats as stests
import random
import statistics
import warnings
warnings.filterwarnings("ignore")

In [3]:
df = pd.read_csv("garments_worker_productivity.csv")

In [4]:
df.head()

Unnamed: 0,date,quarter,department,day,team,targeted_productivity,smv,wip,over_time,incentive,idle_time,idle_men,no_of_style_change,no_of_workers,actual_productivity
0,01/01/15,Quarter1,sewing,Thursday,8,0.8,26.16,1108.0,7080,98,0.0,0,0,59.0,0.940725
1,01/01/15,Quarter1,finishing,Thursday,1,0.75,3.94,,960,0,0.0,0,0,8.0,0.8865
2,01/01/15,Quarter1,sewing,Thursday,11,0.8,11.41,968.0,3660,50,0.0,0,0,30.5,0.80057
3,01/01/15,Quarter1,sewing,Thursday,12,0.8,11.41,968.0,3660,50,0.0,0,0,30.5,0.80057
4,01/01/15,Quarter1,sewing,Thursday,6,0.8,25.9,1170.0,1920,50,0.0,0,0,56.0,0.800382


In [18]:
# check whether the finishing department has achieved obtained higher incentive than sewing department 
alpha=0.05

#sigma not known so to apply two sample t-test 

print("Hypothesis:")
print("H0: Finishing department average incentive <= Sewing department average incentive")
print("H1: Finishing department average incentive > Sewing department average incentive (claim)")
print("It is a two sample right tailed t test")

#Prepare samples
sew_inc=df[df['department']=='sewing']['incentive'].tail(30)
fin_inc=df[df['department']=='finishing']['incentive'].tail(30)

n=30

#Check normality of samples via Shapiro test
sew_tval,sew_pval=stats.shapiro(sew_inc)
fin_tval,fin_pval=stats.shapiro(sew_inc)

print("Values of Shapiro Test")
print(sew_pval,fin_pval)
print("As both the pvalues are > alpha , therefore sample are normally distributed")

#Check for Equality Variances by Levene Test
levene_tval,levene_pval=stats.levene(sew_inc,fin_inc)
print("Values of Levene Test")
print(levene_pval)
print("As the pval < alpha, therefore the samples have unequal variances")

cv=stats.t.isf(alpha,df=n-1)
print("Critical Value:",cv)

t_val,p_val=stats.ttest_ind(sew_inc,fin_inc, alternative="greater", equal_var='False')
print(p_val)

print("As pval > alpha, therefore we cannot reject H0")

print("We don't have enough to support the claim H1")



Hypothesis:
H0: Finishing department average incentive <= Sewing department average incentive
H1: Finishing department average incentive > Sewing department average incentive (claim)
It is a two sample right tailed t test
Values of Shapiro Test
0.10378154367208481 0.10378154367208481
As both the pvalues are > alpha , therefore sample are normally distributed
Values of Levene Test
0.02350554135451634
As the pval < alpha, therefore the samples have unequal variances
Critical Value: 1.6991270265334977
0.9812812664694907
As pval > alpha, therefore we cannot reject H0
We don't have enough to support the claim H1


In [22]:
#15
#Conduct an ANOVA for comparing productivity for Quarter1, Quarter2, Quarter3, Quarter 4 and accordingly conclude.

q1=df[df['quarter']=='Quarter1']['actual_productivity']
q2=df[df['quarter']=='Quarter2']['actual_productivity']
q3=df[df['quarter']=='Quarter3']['actual_productivity']
q4=df[df['quarter']=='Quarter4']['actual_productivity']

#H0: All quarters have same actual productivity
#H0: Atleast one quarter has different productivity
k=4
N=len(df)
dfN=N-1
dfD=k-1

cv=stats.f.isf(alpha,dfN,dfD)
print("Critical Value:",cv)

#One way ANOVA
t_val,p_val=stats.f_oneway(q1,q2,q3,q4)

print("Oneway ANOVA:",p_val)

print("As pval of oneway ANOVA < alpha, we can reject H0")
print("there is enough evidence to support the claim")

Critical Value: 8.528759019312771
Oneway ANOVA: 0.0012927759459059219
As pval of oneway ANOVA < alpha, we can reject H0
there is enough evidence to support the claim


In [27]:
#16

#The manager has asked you to classifiy low productivity as less than .5, medium between .5 and less than .75 and higher as greater than or equal to .75. 
#He believes that the percentage of actual producvity in sewing departemnt for person classified as low is 30% (0.3), for medium is 40% (0.4) and for high is 30% (0.3). 

#The manager claims that Actual Productivity of Sewing Department is lower than other Departments

#H0: Actual Productivity is independent of Department
#H1:Actual Productivity is dependent on Department (claim)

#Create cross table
observed=pd.crosstab(df['department'],df['actual_productivity'])
observed


actual_productivity,0.233705,0.235795,0.238042,0.246250,0.247316,0.249417,0.251399,0.256500,0.258000,0.259375,...,1.033156,1.033570,1.050281,1.050667,1.057963,1.059621,1.096633,1.100484,1.108125,1.120437
department,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
finishing,0,1,1,1,1,0,0,1,1,1,...,1,1,0,1,1,1,1,0,1,1
sewing,1,0,0,0,0,1,1,0,0,0,...,0,0,1,0,0,0,0,1,0,0


In [28]:
r=2
c=879

dof=(r-1)*(c-1)

observed_values=observed.values
observed_values

array([[0, 1, 1, ..., 0, 1, 1],
       [1, 0, 0, ..., 1, 0, 0]])

In [29]:
cv=chi2.isf(alpha,dof)
print("Critical Value:",cv)

Critical Value: 948.0451836927294


In [31]:
tval,pval,df,expected=chi2_contingency(observed_values, correction=False)

print("Pval:",pval)

print("As pval < alpha, reject H0")
print("We have enough evidence to support the claim h1")

Pval: 3.379397973696696e-12
As pval < alpha, reject H0
We have enough evidence to support the claim h1


In [None]:
# check whether the finishing department has achieved obtained higher incentive than sewing department 

