In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
import seaborn as sns
from env import get_db_url, user, password, host
from pydataset import data

#### For each of the following questions, formulate a null and alternative hypothesis (be as specific as you can be), then give an example of what a true positive, true negative, type I and type II errors would look like. Note that some of the questions are intentionally phrased in a vague way. It is your job to reword these as more precise questions that could be tested.

#### Has the network latency gone up since we switched internet service providers?

H0: There has been no increase in network latency since we switched internet service providers

H1: Network latency has gone up since we switched internet service providers

#### Is the website redesign any good?

H0: There has been no increase in the time customers spend on the website since the redesign.

H1: Customers spend more time on the website since the redesign than they did before the redesign. 

#### Is our television ad driving more sales?

H0: There has been no increase in sales since the premier of the television ad.

H1: Sales have gone up since the premier of the television ad. 

# T-Test

#### Ace Realty wants to determine whether the average time it takes to sell homes is different for its two offices. A sample of 40 sales from office #1 revealed a mean of 90 days and a standard deviation of 15 days. A sample of 50 sales from office #2 revealed a mean of 100 days and a standard deviation of 20 days. Use a .05 level of significance.


In [2]:
# Office 1
μ1 = 90
σ1 = 15
n1 = 40

# Office 2
μ2 = 100
σ2 = 20
n2 = 50

# T-Test
α = .05
t, p = stats.ttest_ind_from_stats(μ1, σ1, n1, 
                                  μ2, σ2, n2,
                                  equal_var=False)

In [3]:
# H0: There is no difference between sales times in the two offices. 
# H1: There is a difference in sales times between the two offices.

# 2 tailed test:
if p < α:
    print('We reject H0.')
else: 
    print('We fail to reject H0.')

We reject H0.


#### Load the mpg dataset and use it to answer the following questions:

In [4]:
cars = data('mpg')
cars['avg_mpg'] = (cars['cty'] + cars['hwy'])/2

#### Is there a difference in fuel-efficiency in cars from 2008 vs 1999?

In [5]:
# cars from 2008
cars_08 = cars[cars.year == 2008]

# cars from 1999
cars_99 = cars[cars.year == 1999]

# T-Test
α = .05
t, p = stats.ttest_ind(cars_08.avg_mpg, 
                       cars_99.avg_mpg,
                       equal_var=False)

In [6]:
# H0: There is no difference in mean avg_mpg for cars from 2008
#     and the mean_avg mpg for cars from 1999.

# H1: There is a difference in mean avg_mpg for cars from 2008
#     and the mean_avg mpg for cars from 1999.

# 2 tailed test:
if p < α:
    print('We reject H0.')
else: 
    print('We fail to reject H0.')

We fail to reject H0.


#### Are compact cars more fuel-efficient than the average car?

In [7]:
# compact cars
compacts = cars[cars['class'] == 'compact']

# mean fuel efficiency (avg_mpg) for all cars
μ = cars.avg_mpg.mean()

# T-test
α = .05
t, p = stats.ttest_1samp(compacts.avg_mpg, μ)

In [8]:
# H0: There is no difference between the mean avg_mpg for compact cars 
#     and the mean avg_mpg for all cars in the dataset

# H1: The mean avg_mpg for compact cars is higher than the
      #mean avg_mpg for all cars in the dataset

# 1 tailed, greater than
if p/2 < α and t > 0 :
    print('We reject H0.')
else: 
    print('We fail to reject H0.')

We reject H0.


#### Do manual cars get better gas mileage than automatic cars?

In [9]:
# manual cars
manuals = cars[cars.trans.str.contains('manual')]

# automatic cars
autos = cars[cars.trans.str.contains('auto')]

# T-Test
α = .05
t, p = stats.ttest_ind(manuals.avg_mpg, autos.avg_mpg, equal_var=False)

In [10]:
# H0: The mean avg_mpg for cars with manual transmissions is not higher than
#     the mean avg_mpg for cars with automatic transmissions

# H1: The mean avg_mpg for cars with manual transmissions is higher than
#     the mean_avg_mpg for cars with automatic transmissions

# 1 tailed, greater than
if p/2 < α and t > 0 :
    print('We reject H0.')
else: 
    print('We fail to reject H0.')

We reject H0.
