In [1]:
from math import sqrt
from scipy import stats

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from pydataset import data

# Ace Realty wants to determine whether the average time it takes to sell homes is different for its two offices. A sample of 40 sales from office #1 revealed a mean of 90 days and a standard deviation of 15 days. A sample of 50 sales from office #2 revealed a mean of 100 days and a standard deviation of 20 days. Use a .05 level of significance.

In [2]:
# Create hypotesis
H0 = 'There is a difference in the time it takes to sell homes between office 1 and office 2'

# Create null hypothesis
HA = 'There is not a difference in the time it takes to sell homes between office 1 and office 2'

# Establish alpha
alpha = .05

In [3]:
# Perform statistical test (2 sample, 1 tailed t-test)
t, p = stats.ttest_ind_from_stats(100,20,50, 90,15,40)
t,p

(2.6252287036468456, 0.01020985244923939)

In [4]:
if p < alpha:
    print('We reject the null hypothesis. ', H0)
else:
    print('We fail to reject the null hypothesis, ', HA)

We reject the null hypothesis.  There is a difference in the time it takes to sell homes between office 1 and office 2


# Load the mpg dataset and use it to answer the following questions:

In [5]:
# Create dataframe
mpg = data('mpg')
mpg.head(3)

Unnamed: 0,manufacturer,model,displ,year,cyl,trans,drv,cty,hwy,fl,class
1,audi,a4,1.8,1999,4,auto(l5),f,18,29,p,compact
2,audi,a4,1.8,1999,4,manual(m5),f,21,29,p,compact
3,audi,a4,2.0,2008,4,manual(m6),f,20,31,p,compact


- Is there a difference in fuel-efficiency in cars from 2008 vs 1999?

In [6]:
# Create hypothesis
H0 = 'There is a difference in the fuel efficiency in cars from 2008 and cars from 1999'

# Create null hypothesis 
HA = 'There is no difference in the fuel efficiency in cars from 2008 and cars from 1999'

# Establish alpha
alpha = .05

In [7]:
# Create feature for average fuel efficiency by taking the average of highway and city mileage
mpg['avg_fe'] = stats.hmean(mpg[['cty', 'hwy']], axis =1)
mpg.head()

Unnamed: 0,manufacturer,model,displ,year,cyl,trans,drv,cty,hwy,fl,class,avg_fe
1,audi,a4,1.8,1999,4,auto(l5),f,18,29,p,compact,22.212766
2,audi,a4,1.8,1999,4,manual(m5),f,21,29,p,compact,24.36
3,audi,a4,2.0,2008,4,manual(m6),f,20,31,p,compact,24.313725
4,audi,a4,2.0,2008,4,auto(av),f,21,30,p,compact,24.705882
5,audi,a4,2.8,1999,6,auto(l5),f,16,26,p,compact,19.809524


In [8]:
# Create a subset of data for fuel efficiency of cars made in 2008
fe_2008 = mpg[mpg.year == 2008].avg_fe

fe_2008.head()

3     24.313725
4     24.705882
7     21.600000
10    23.333333
11    22.304348
Name: avg_fe, dtype: float64

In [9]:
# Create a subset of data for fuel efficiency of cars made in 1999
fe_1999 = mpg[mpg.year == 1999].avg_fe

fe_1999.head()

1    22.212766
2    24.360000
5    19.809524
6    21.272727
8    21.272727
Name: avg_fe, dtype: float64

In [16]:
# Perform statistical test (2 sample, 2 tailed t-test)
t, p = stats.ttest_ind(fe_1999, fe_2008)
t, p

(0.3011962975077886, 0.7635345888327115)

In [17]:
if p < alpha:
    print('We reject the null hypothesis. ', H0)
else:
    print('We fail to reject the null hypothesis, ', HA)

We fail to reject the null hypothesis,  Compact cars are not more fuel efficient than the average car


- Are compact cars more fuel-efficient than the average car?

In [12]:
# Create hypothesis
H0 = 'Compact cars are more fuel efficient than the average car'

# Create null hypothesis 
HA = 'Compact cars are not more fuel efficient than the average car'

# Establish alpha
alpha = .05

In [14]:
# Create a subset of data for fuel efficiency of compact cars 
compact = mpg[mpg['class'] == 'compact'].avg_fe

compact.head()

1    22.212766
2    24.360000
3    24.313725
4    24.705882
5    19.809524
Name: avg_fe, dtype: float64

In [15]:
# Establish target variable mean
μ = mpg.avg_fe.mean()
μ

19.585525317633177

In [19]:
# Perform statistical test (1 sample, 1 tailed t-test)
t, p = stats.ttest_1samp(compact, μ)
t, p

(7.512360093161354, 1.5617666348807727e-09)

In [20]:
if p < alpha:
    print('We reject the null hypothesis. ', H0)
else:
    print('We fail to reject the null hypothesis, ', HA)

We reject the null hypothesis.  Compact cars are more fuel efficient than the average car


- Do manual cars get better gas mileage than automatic cars?

In [21]:
# Create hypothesis
H0 = 'Manual cars have better gas mileage than automatic cars'

# Create null hypothesis 
HA = 'manual cars do not have better gas mileage than automatic cars'

# Establish alpha
alpha = .05

In [22]:
# Create a subset of data for fuel efficiency of automatic cars 
auto = mpg[mpg.trans.str.contains('auto')].avg_fe

auto.head()

1    22.212766
4    24.705882
5    19.809524
7    21.600000
9    19.512195
Name: avg_fe, dtype: float64

In [23]:
# Create a subset of data for fuel efficiency of manual cars 
manu = mpg[mpg.trans.str.contains('manu')].avg_fe

manu.head()

2     24.360000
3     24.313725
6     21.272727
8     21.272727
10    23.333333
Name: avg_fe, dtype: float64

In [24]:
# Perform statisitcal test (2 sample, 1 tailed t-test)
t, p = stats.ttest_ind(manu, auto)
t, p

(4.652577547151351, 5.510464610044005e-06)

In [25]:
if p < alpha:
    print('We reject the null hypothesis. ', H0)
else:
    print('We fail to reject the null hypothesis, ', HA)

We reject the null hypothesis.  Manual cars have better gas mileage than automatic cars
