### Importing Libraries

In [1]:
import numpy as np
import pandas as pd

### Reading csv file

In [16]:
# Import data
dogs = pd.read_csv('dog_data.csv')
dogs.head()


Unnamed: 0,is_rescue,weight,tail_length,age,color,likes_children,is_hypoallergenic,name,breed
0,0,6,2.25,2,black,1,0,Huey,chihuahua
1,0,4,5.36,4,black,0,0,Cherish,chihuahua
2,0,7,3.63,3,black,0,1,Becka,chihuahua
3,0,5,0.19,2,black,0,0,Addie,chihuahua
4,0,5,0.37,1,black,1,1,Beverlee,chihuahua


### Filtering dogs dataframe for specific breeds

In [3]:
# Subset to just whippets, terriers, and pitbulls
dogs_wtp = dogs[dogs.breed.isin(['whippet', 'terrier', 'pitbull'])]

# Subset to just poodles and shihtzus
dogs_ps = dogs[dogs.breed.isin(['poodle', 'shihtzu'])]

### Is rescued values fro Whippet breed

In [20]:
whippet_rescue = dogs.is_rescue[dogs.breed == 'whippet']
whippet_rescue.head()

700    0
701    0
702    0
703    0
704    0
Name: is_rescue, dtype: int64

### Number of rescued whippets

In [5]:
num_whippet_rescues = np.sum(whippet_rescue)
num_whippet_rescues

6


### Total number of whippets

In [21]:
num_whippets = len(whippet_rescue)
num_whippets

100

### Hypothesis to check if 8% of whippets are rescues

In [22]:
from scipy.stats import binom_test
pval = binom_test(num_whippet_rescues, num_whippets, 0.08)
pval

0.5811780106238098

### Weight values of Whippet, Terrier and Pitbull breeds

In [11]:
wt_whippets = dogs.weight[dogs.breed == 'whippet']
wt_terriers = dogs.weight[dogs.breed == 'terrier']
wt_pitbulls = dogs.weight[dogs.breed == 'pitbull']

### Hypothesis to check if there is a significant difference in the average weights of these three dog breeds

In [23]:
from scipy.stats import f_oneway
fstat, pval = f_oneway(wt_pitbulls, wt_terriers, wt_whippets)
pval

3.276415588274815e-17

In [24]:
# Tukey’s range test to check if at least one pair of dog breeds have significantly different average weights
from statsmodels.stats.multicomp import pairwise_tukeyhsd
result = pairwise_tukeyhsd(endog = dogs_wtp.weight, groups = dogs_wtp.breed)
print(result)

 Multiple Comparison of Means - Tukey HSD, FWER=0.05 
 group1  group2 meandiff p-adj   lower  upper  reject
-----------------------------------------------------
pitbull terrier   -13.24  0.001 -16.728 -9.752   True
pitbull whippet    -3.34 0.0639  -6.828  0.148  False
terrier whippet      9.9  0.001   6.412 13.388   True
-----------------------------------------------------


### Contingency table of dog colors by breed (poodle vs. shihtzu)

In [25]:
from scipy.stats import chi2_contingency
Xtab = pd.crosstab(dogs_ps.breed, dogs_ps.color)
Xtab

color,black,brown,gold,grey,white
breed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
poodle,17,13,8,52,10
shihtzu,10,36,6,41,7


### Hypothesis to check if there is an association between breed (poodle vs. shihtzu) and color.

In [15]:
chi2, pval, dof, expected = chi2_contingency(Xtab)
pval

0.005302408293244597
