In [2]:
# Project: use different statistical testing to validate hypotheses 
# FetchMaker’s mission is to match up prospective dog owners with their perfect pet. 

In [38]:
import numpy as np
import pandas as pd
from scipy.stats import ttest_1samp, ttest_ind, binom_test, f_oneway, chi2_contingency
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [28]:
dogs = pd.read_csv("Micro project in Python- Hypothesis Testing SciPy_Dataset.csv")
#print(dogs)

In [29]:
# Define the function to retrieve dog attributes 
def get_attribute(breed, attribute):
  if breed in dogs.breed.unique():
    if attribute in dogs.columns:
        return dogs[dogs["breed"] == breed][attribute]
    else:
      raise NameError('Attribute {} does not exist.'.format(attribute))
  else:
    raise NameError('Breed {} does not exist.'.format(breed))

In [30]:
# Define the sub-functions to retrieve different attributes 
def get_weight(breed):
  return get_attribute(breed, 'weight')
  
def get_tail_length(breed):
  return get_attribute(breed, 'tail_length')

def get_color(breed):
    return get_attribute(breed, 'color')

def get_age(breed):
    return get_attribute(breed, 'age')

def get_is_rescue(breed):
    return get_attribute(breed, 'is_rescue')

def get_likes_children(breed):
    return get_attribute(breed, 'likes_children')

def get_is_hypoallergenic(breed):
    return get_attribute(breed, "is_hypoallergenic")

def get_name(breed):
    return get_attribute(breed, "name")

In [31]:
# 1 Sample T-Testing: compares a sample mean to a hypothetical population mean. 
# It answers the question “What is the probability that the sample came from a distribution with the desired mean?”

# we know the average tail length for rottweiler bread is 4cm. We want to compares our sample mean to a hypothetical population mean (4cm). 
# The null hypothesis states that there is no significant difference: “The set of samples belongs to a population with the target mean”. 
# The alternative hypothesis assumes that some difference exists between two means.

# Interpretation: Statistical significance is determined by looking at the p-value. 
# The p-value gives the probability of observing the test results under the null hypothesis. 
# The lower the p-value, the lower the probability of obtaining a result like the one that was observed if the null hypothesis was true. 
# Thus, a low p-value indicates decreased support for the null hypothesis. However, the possibility that the null hypothesis is true and that we simply obtained a very rare result can never be ruled out completely. 
# The cutoff value for determining statistical significance is usually a value of .05 or less is chosen. 
# This corresponds to a 5% (or less) chance of obtaining a result like the one that was observed if the null hypothesis was true.


In [37]:
# 1 Sample T-Testing
rottweiler_tl = get_tail_length("rottweiler")
#print rottweiler_tl
r_m = np.mean(rottweiler_tl)
#print r_m
tval, pval = ttest_1samp(rottweiler_tl, 4)
print(pval)
#print(np.std(rottweiler_tl))
#print("P-value")
print("1 Sample T-Testing:") 
print("P-value is {}. Therefore, the null hypothesis is likely true. In other words, there is no significant difference: “The set of samples belongs to a population with the target mean.” ".format(round(pval,2)))

0.2579725931822169
1 Sample T-Testing:
P-value is 0.26. Therefore, the null hypothesis is likely true. In other words, there is no significant difference: “The set of samples belongs to a population with the target mean.” 


In [39]:
# 2 Sample T-Testing: compare whether the average difference between two groups is really significant or if it is due instead to random chance. 
# The null hypothesis, in this case, is that the two distributions have the same mean.

# We think the average weight of whippet and pitbull are same. 
# We wan to see is there any significant difference (reject null hypothesis or there is only random difference (meaning if we had bigger samples, there would be no difference). 
# We use with 2 Sample T-Testing. 


In [52]:
# 2 Sample T-Testing
w_w = get_weight("whippet")
w_p = get_weight("pitbull")
#print(np.mean(w_w))
#print(np.mean(w_p))
tval, pval = ttest_ind(w_w, w_p)
print(pval)
print("2 Sample T-Testing:") 
print("P-value is {}. Therefore, we can reject the null hypothesis. In other words, there is a significant difference between average weights of the two breeds” ".format(round(pval,2)))

0.03742529840191976
2 Sample T-Testing:
P-value is 0.04. Therefore, we can reject the null hypothesis. In other words, there is a significant difference between average weights of the two breeds” 


In [53]:
# ANOVA (Analysis of Variance) tests the null hypothesis that all of the datasets have the same mean. 
# If we reject the null hypothesis with ANOVA, were saying that at least one of the sets has a different mean; 
# however, it does not tell us which datasets are different.

# We want to see of there is a significant difference in the average weights of the three dog breeds? 
# we can use ANOVA method 

In [55]:
# ANOVA
w_t = get_weight("terrier")
#print(np.mean(w_w))
#print(np.mean(w_t))
#print(np.mean(w_p))
tval, pval = f_oneway(w_w,w_t,w_p)
#print pval
print("ANOVA:") 
print("P-value is {}. Therefore, we can reject the null hypothesis and say that at least one of the breads has different average weight. In other words, the result is statisctically significant".format(pval)) 

ANOVA
P-value is 3.276415588274815e-17. Therefore, we can reject the null hypothesis and say that at least one of the breads has different average weight. In other words, the result is statisctically significant


In [56]:
# Tukeys Range: we have performed ANOVA to compare three sets of data and we found that there is some significant difference between datasets. 
# We can perform a Tukey’s Range Test to determine the difference between datasets

# Which of the pairs of these dog breeds differ from each other. 
# We can perform a Tukeys Range Test to determine the difference between datasets.


In [61]:
# Tukeys Range
w = np.concatenate([w_w,w_t,w_p])
labels = ['whippet'] * len(w_w) + ['terrier'] * len(w_t) + ['pitbull'] * len(w_p)
tukey_results = pairwise_tukeyhsd(w, labels, 0.05)
print(tukey_results)
print("Tukey_results")
print("Terrier breed has different average weight")

Multiple Comparison of Means - Tukey HSD,FWER=0.05
 group1  group2 meandiff  lower  upper  reject
----------------------------------------------
pitbull terrier  -13.24  -16.728 -9.752  True 
pitbull whippet  -3.34    -6.828 0.148  False 
terrier whippet   9.9     6.412  13.388  True 
----------------------------------------------
Tukey_results
Terrier breed has different average weight


In [62]:
# Binomial Test: We want to know if whippets are significantly more or less likely to be a rescue. 
# To analyze a dataset like this, with two different possibilities for entries, we can use a Binomial Test. 
# A Binomial Test compares a categorical dataset to some expectation. In this case our expected percentage is 8%.

In [65]:
# Binomial Test
whippet_rescue =get_is_rescue("whippet")
#print whippet_rescue
num_whippet_rescues = np.count_nonzero(whippet_rescue)
num_whippets = np.size(whippet_rescue)
#print(float(num_whippet_rescues)/num_whippets)
pval=binom_test(num_whippet_rescues, p=0.08, n=float(num_whippets))
print ("P-value is {}. Therefore, we can't reject the null hypothesis. In other words, the result is not statisctically significant and we can say that expected percentage os of 8% can be true if we have a bigger sample data".format(round(pval,2)))

P-value is 0.58. Therefore, we can't reject the null hypothesis. In other words, the result is not statisctically significant and we can say that expected percentage os of 8% can be true if we have a bigger sample data


In [66]:
# Chi Square test: If we have two or more categorical datasets that we want to compare, 
# we should use a Chi Square test.

# We want to see if "poodle"s and "shihtzu"s have significantly different color breakdowns.

In [69]:
poodle_colors = get_color("poodle")
shihtzu_colors = get_color("shihtzu")
color_table = [[np.count_nonzero(poodle_colors == "black"), np.count_nonzero(shihtzu_colors == "black")],[np.count_nonzero(poodle_colors == "brown"), np.count_nonzero(shihtzu_colors == "brown")],[np.count_nonzero(poodle_colors == "Gold"), np.count_nonzero(shihtzu_colors == "Gold")],[np.count_nonzero(poodle_colors == "Grey"), np.count_nonzero(shihtzu_colors == "Grey")],[np.count_nonzero(poodle_colors == "white"), np.count_nonzero(shihtzu_colors == "white")]]
# trap the error 
for i in range(0,5):
  for j in range(0,2):
    if color_table[i][j] == 0:
      color_table[i][j] = color_table[i][j]+0.01

      print(color_table) 
chi2, pval, dof, expected = chi2_contingency(color_table)
#print pval
print ("P-value is {}. Therefore, we can reject the null hypothesis and and state that the two breeds have significantly different color breakdowns.".format(pval)) 

[[17, 10], [13, 36], [0.01, 0], [0, 0], [10, 7]]
[[17, 10], [13, 36], [0.01, 0.01], [0, 0], [10, 7]]
[[17, 10], [13, 36], [0.01, 0.01], [0.01, 0], [10, 7]]
[[17, 10], [13, 36], [0.01, 0.01], [0.01, 0.01], [10, 7]]
P-value is 0.021038135506335327. Therefore, we can reject the null hypothesis and and state that the two breeds have significantly different color breakdowns.
