In [1]:
# Dependencies and Setup
import os
import csv
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
from scipy.stats import linregress
import seaborn as sns

# Data file paths
county_2015_path = "../Output/2015_cleaned_all_county.csv"

# Read 2015 and 2017 county and state CSVs
county_2015 = pd.read_csv(county_2015_path)

In [2]:
county_2015.head()

Unnamed: 0.1,Unnamed: 0,State,County,TotalPop,Men,Women,Hispanic,White,Black,Native,...,OtherTrans,WorkatHome,MeanCommute,PrivateWork,PublicWork,SelfEmployed,FamilyWork,Poverty,ChildPoverty,Unemployment
0,0,Alabama,Autauga,55221,26745,28476,1435,41857,10215,220,...,311,431,26,17653,5013,1319,0,7123,10271,4196
1,1,Alabama,Baldwin,195121,95314,99807,8780,162145,18536,1170,...,1203,3352,26,70051,10572,4985,343,26146,37463,14634
2,2,Alabama,Barbour,26932,14497,12435,1238,12442,12577,53,...,128,137,24,6172,1788,627,8,7190,12200,4740
3,3,Alabama,Bibb,22604,12073,10531,497,16839,4837,90,...,124,58,28,6369,1335,555,33,3797,6306,1876
4,4,Alabama,Blount,57710,28512,29198,4963,50727,865,173,...,88,510,34,18194,2995,931,88,9637,15697,4443


In [3]:
# Replacing blanks with 0
county_2015.Income.fillna(value=0, inplace=True)
county_2015.MeanCommute.fillna(value=0, inplace=True)
county_2015.Poverty.fillna(value=0, inplace=True)

In [4]:
# Export csv
county_15_output = county_2015.to_csv("fillnacounties15.csv", index=False)

In [5]:
fillnacounty_2015_path = "fillnacounties15.csv"
fillnacounty_2015 = pd.read_csv(fillnacounty_2015_path)
fillnacounty_2015.head()

Unnamed: 0.1,Unnamed: 0,State,County,TotalPop,Men,Women,Hispanic,White,Black,Native,...,OtherTrans,WorkatHome,MeanCommute,PrivateWork,PublicWork,SelfEmployed,FamilyWork,Poverty,ChildPoverty,Unemployment
0,0,Alabama,Autauga,55221,26745,28476,1435,41857,10215,220,...,311,431,26,17653,5013,1319,0,7123,10271,4196
1,1,Alabama,Baldwin,195121,95314,99807,8780,162145,18536,1170,...,1203,3352,26,70051,10572,4985,343,26146,37463,14634
2,2,Alabama,Barbour,26932,14497,12435,1238,12442,12577,53,...,128,137,24,6172,1788,627,8,7190,12200,4740
3,3,Alabama,Bibb,22604,12073,10531,497,16839,4837,90,...,124,58,28,6369,1335,555,33,3797,6306,1876
4,4,Alabama,Blount,57710,28512,29198,4963,50727,865,173,...,88,510,34,18194,2995,931,88,9637,15697,4443


In [6]:
ttest_2015_df = fillnacounty_2015[['State', 'County', 'Income', 'MeanCommute', 'Poverty']]
ttest_2015_df

Unnamed: 0,State,County,Income,MeanCommute,Poverty
0,Alabama,Autauga,51281,26,7123
1,Alabama,Baldwin,50254,26,26146
2,Alabama,Barbour,32964,24,7190
3,Alabama,Bibb,38678,28,3797
4,Alabama,Blount,45813,34,9637
...,...,...,...,...,...
3215,Puerto Rico,Vega Baja,16948,32,27576
3216,Puerto Rico,Vieques,18104,14,3569
3217,Puerto Rico,Villalba,17818,26,13329
3218,Puerto Rico,Yabucoa,15627,29,18973


## CT vs CA 2015 TTest

In [7]:
CA_2015 = ttest_2015_df[ttest_2015_df["State"] == "California"]

In [8]:
CT_2015 = ttest_2015_df[ttest_2015_df["State"] == "Connecticut"]

In [9]:
CA_2015.Income.mean()

56013.15517241379

In [10]:
CT_2015.Income.mean()

71184.125

In [11]:
stats.ttest_ind(CT_2015.Income, CA_2015.Income, equal_var=False)

Ttest_indResult(statistic=3.978041861723879, pvalue=0.0016057153850185287)

#### Income 2015 PValue (pvalue=0.00160 < 0.05) therefore we reject the null hypothesis.  There is a statistical difference between Connecticut and California's population average income.

In [12]:
CA_2015.MeanCommute.mean()

24.586206896551722

In [13]:
CT_2015.MeanCommute.mean()

25.0

In [14]:
stats.ttest_ind(CT_2015.MeanCommute, CA_2015.MeanCommute, equal_var=False)

Ttest_indResult(statistic=0.39395028729397447, pvalue=0.6983352714049234)

#### Mean Commute 2015 PValue (pvalue=0.69833 > 0.05) therefore we fail to reject the null hypothesis.  There is no statisitical difference between Connecticut and California's population mean commute.