# Testing statistical signficance of ACS changes
 
 
I wrote acs_testing module using math here:
 
* https://www2.census.gov/programs-surveys/acs/tech_docs/statistical_testing/2017StatisticalTesting5year.pdf

to perform only the basic estimate difference calculation.

I generated a sheet of results using the test tool here:

* https://www.census.gov/programs-surveys/acs/guidance/statistical-testing-tool.html

and I'm going to test it below.

In [1]:
from acs_testing.significance import Estimate, Difference

In [2]:
import pandas as pd 
df = pd.read_csv("./data/vacant.csv").set_index("town")
df.columns = "vacant1","moe1","vacant2","moe2"

In [3]:
df.head()

Unnamed: 0_level_0,vacant1,moe1,vacant2,moe2
town,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Bethel,3.6,1.5,5.8,2.3
Bridgeport,13.1,0.9,13.4,0.9
Brookfield,8.6,3.1,6.9,3.0
Danbury,9.3,1.2,9.1,1.1
Darien,5.6,2.0,4.9,1.7


In [4]:
df["sig_diff"] = df.apply(lambda x: Difference(
    Estimate(x["vacant1"],x["moe1"]),
    Estimate(x["vacant2"],x["moe2"])
).is_significant(), axis=1)

In [6]:
df["z"] = df.apply(lambda x: Difference(
    Estimate(x["vacant1"],x["moe1"]),
    Estimate(x["vacant2"],x["moe2"])
).Z, axis=1)

In [7]:
df[df["sig_diff"]].head()

Unnamed: 0_level_0,vacant1,moe1,vacant2,moe2,sig_diff,z
town,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Monroe,1.7,1.4,6.1,2.3,True,-2.688126
New Fairfield,13.2,2.7,17.4,2.5,True,-1.877612
Stamford,6.6,0.9,8.7,1.0,True,-2.56771
Trumbull,2.2,1.2,4.0,1.2,True,-1.744786
Avon,2.5,1.5,5.3,2.2,True,-1.72982


In [8]:
# Test against this spreadsheet, generated by 
# the Census significance tester

check = pd.read_csv("data/results.tsv",sep="\t").set_index("Label")
check["sig_diff_check"] = check["Statistically Different?"] == "Yes"
combined = check.join(df)
print (len(combined[combined["sig_diff_check"] != combined["sig_diff"]]))

0


In [9]:
combined["Z-score"]

Label
Bethel           1.32
Bridgeport       0.39
Brookfield       0.65
Danbury          0.20
Darien           0.44
Easton           0.43
Fairfield        0.66
Greenwich        1.21
Monroe           2.69
New Canaan       0.47
New Fairfield    1.88
Newtown          1.03
Norwalk          0.12
Redding          0.24
Ridgefield       1.00
Shelton          1.23
Sherman          0.06
Stamford         2.57
Stratford        1.34
Trumbull         1.74
Weston           0.95
Westport         0.46
Wilton           1.45
Avon             1.73
Berlin           0.25
Bloomfield       0.57
Bristol          1.55
Burlington       2.29
Canton           0.35
East Granby      0.96
                 ... 
Voluntown        1.19
Waterford        0.06
Andover          2.26
Bolton           1.07
Columbia         0.05
Coventry         0.96
Ellington        1.66
Hebron           0.39
Mansfield        0.00
Somers           0.14
Stafford         1.18
Tolland          3.42
Union            2.26
Vernon           1.87
Will

In [10]:
combined["zdiff"] = combined.apply(lambda x: max(
    x["z"],
    x["Z-score"]
) - max(
    x["z"],x["Z-score"]
), axis=1)

print (len(combined[combined["zdiff"] > 0]))

0


In [11]:
combined["SEA"] = combined.apply(
    lambda x: Estimate(x["vacant1"],
                       x["moe1"]).se,
    axis=1
)
combined["SEB"] = combined.apply(
    lambda x: Estimate(x["vacant2"],
                       x["moe2"]).se,
    axis=1
)
combined[["SEB","Second SE"]].head()

Unnamed: 0_level_0,SEB,Second SE
Label,Unnamed: 1_level_1,Unnamed: 2_level_1
Bethel,1.398176,1.4
Bridgeport,0.547112,0.55
Brookfield,1.823708,1.82
Danbury,0.668693,0.67
Darien,1.033435,1.03


In [12]:
# Those look pretty good

In [13]:
combined[["SEA","First SE"]].tail()

Unnamed: 0_level_0,SEA,First SE
Label,Unnamed: 1_level_1,Unnamed: 2_level_1
Scotland,2.492401,2.49
Sterling,3.525836,3.53
Thompson,2.066869,2.07
Windham,1.458967,1.46
Woodstock,2.431611,2.43


In [14]:
# Those look pretty good, too