# Hypothesis Testing Fundamentals

# Imports

In [10]:
import pandas as pd
import numpy as np
from scipy.stats import norm

In [3]:
late_shipments = pd.read_feather('late_shipments.feather')
late_shipments.sample(5)

Unnamed: 0,id,country,managed_by,fulfill_via,vendor_inco_term,shipment_mode,late_delivery,late,product_group,sub_classification,...,line_item_quantity,line_item_value,pack_price,unit_price,manufacturing_site,first_line_designation,weight_kilograms,freight_cost_usd,freight_cost_groups,line_item_insurance_usd
2,69871.0,Vietnam,PMO - US,Direct Drop,EXW,Air,0.0,No,ARV,Adult,...,22925.0,110040.0,4.8,0.08,Hetero Unit III Hyderabad IN,Yes,3723.0,19056.13,expensive,181.57
936,47301.0,Zimbabwe,PMO - US,Direct Drop,EXW,Air,0.0,No,ARV,Pediatric,...,814.0,0.01,0.0,0.0,Mylan (formerly Matrix) Nashik,Yes,60.0,1344.94,reasonable,0.0
172,18980.0,Tanzania,PMO - US,Direct Drop,EXW,Air,0.0,No,HRDT,HIV test,...,336.0,26880.0,80.0,0.8,"Alere Medical Co., Ltd.",Yes,139.0,2234.96,reasonable,27.63
21,75706.0,Ghana,PMO - US,Direct Drop,FCA,Air,0.0,No,ARV,Pediatric,...,934.0,5547.96,5.94,0.1,ABBVIE Ludwigshafen Germany,Yes,110.0,2060.79,reasonable,5.7
145,40620.0,Mozambique,PMO - US,Direct Drop,EXW,Air,1.0,Yes,HRDT,HIV test,...,2591.0,201320.7,77.7,0.78,Inverness Japan,Yes,994.0,12838.85,expensive,332.18


In [8]:
from bootstrap import *

## Hypothesis tests and z-scores

### Calculating the sample mean

In [6]:
# Print the late_shipments dataset
print(late_shipments)

# Calculate the proportion of late shipments
late_prop_samp = (late_shipments['late'] == 'Yes').mean()

# Print the results
print(late_prop_samp)

          id       country managed_by  fulfill_via vendor_inco_term  \
0    36203.0       Nigeria   PMO - US  Direct Drop              EXW   
1    30998.0      Botswana   PMO - US  Direct Drop              EXW   
2    69871.0       Vietnam   PMO - US  Direct Drop              EXW   
3    17648.0  South Africa   PMO - US  Direct Drop              DDP   
4     5647.0        Uganda   PMO - US  Direct Drop              EXW   
..       ...           ...        ...          ...              ...   
995  13608.0        Uganda   PMO - US  Direct Drop              DDP   
996  80394.0    Congo, DRC   PMO - US  Direct Drop              EXW   
997  61675.0        Zambia   PMO - US  Direct Drop              EXW   
998  39182.0  South Africa   PMO - US  Direct Drop              DDP   
999   5645.0      Botswana   PMO - US  Direct Drop              EXW   

    shipment_mode  late_delivery late product_group    sub_classification  \
0             Air            1.0  Yes          HRDT              HIV t

### Calculating a z-score

In [9]:
# Hypothesize that the proportion is 6%
late_prop_hyp = 0.06

# Calculate the standard error
std_error = np.std(late_shipments_boot_distn, ddof=1)

# Find z-score of late_prop_samp
z_score = (late_prop_samp - late_prop_hyp)/std_error

# Print z_score
print(z_score)

0.13353771933071554


## p-values

### Calculating p-values

In [11]:
# Calculate the z-score of late_prop_samp
z_score = (late_prop_samp - late_prop_hyp) / std_error

# Calculate the p-value
p_value = 1 - norm.cdf(z_score, loc=0, scale=1)
                 
# Print the p-value
print(p_value) 

0.4468840678346485


## Statistical significance

### Calculating a confidence interval

In [12]:
# Calculate 95% confidence interval using quantile method
lower = np.quantile(late_shipments_boot_distn, 0.025)
upper = np.quantile(late_shipments_boot_distn, 0.975)

# Print the confidence interval
print((lower, upper))

(0.047, 0.076)
