In [10]:
# Import libraries
import pandas as pd
import numpy as np

In [5]:
# Read in pw_interpolation and smooth surface interpolated data

# Output file from population weighted interpolation
pw_interpolated = pd.read_csv("interpolated.csv", usecols = ["GEOID", "pop", "housing", "white"])
# Output file from smooth surface interpolation
ss_interpolated = pd.read_csv("estimates50.csv")

In [9]:
# Merge dataframes by GEOID
interpolations = pw_interpolated.merge(ss_interpolated, how="left", on="GEOID")
interpolations

Unnamed: 0,GEOID,pop,housing,white,P001001,H001001,P001003,P1_001N,H1_001N,P1_003N,state,county,tract
0,10001040100,6541.000000,2469.000000,5702.000000,6544.629384,2470.397083,5705.400048,7315.0,2740.0,5980.0,10.0,1.0,40100.0
1,10001040201,5041.000000,2023.000000,3665.000000,5043.341307,2023.650121,3666.474518,5446.0,2123.0,3424.0,10.0,1.0,40201.0
2,10001040203,5008.301676,2018.494317,3117.585436,5005.945996,2017.406765,3116.138075,5182.0,2157.0,2808.0,10.0,1.0,40203.0
3,10001040204,4383.204483,1631.638682,2975.825969,4655.971419,1733.185423,3160.993697,6451.0,2269.0,3613.0,10.0,1.0,40204.0
4,10001040205,3195.371557,1189.738115,2168.896783,2878.966888,1071.802060,1954.369020,4699.0,1985.0,2430.0,10.0,1.0,40205.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
257,10005051801,4840.451064,2097.991489,3820.208511,4900.095310,2123.542025,3863.847523,5359.0,2154.0,3636.0,10.0,5.0,51801.0
258,10005051802,4231.548936,1773.008511,2532.791489,4163.679323,1744.174047,2486.972903,4354.0,1740.0,2256.0,10.0,5.0,51802.0
259,10005051900,4565.000000,1833.000000,3632.000000,4553.284160,1828.295699,3622.678657,4760.0,1949.0,3566.0,10.0,5.0,51900.0
260,10005980000,0.810506,0.704298,0.723018,3.065934,2.664176,2.734989,1.0,2.0,0.0,10.0,5.0,980000.0


In [24]:
# Calculate distances in both estimates from ground truth
interpolations["pop_diff_pw"] = (interpolations["P1_001N"] - interpolations["pop"]).apply(np.abs)
interpolations["pop_diff_ss"] = (interpolations["P1_001N"] - interpolations["P001001"]).apply(np.abs)
interpolations["house_diff_pw"] = (interpolations["H1_001N"] - interpolations["housing"]).apply(np.abs)
interpolations["house_diff_ss"] = (interpolations["H1_001N"] - interpolations["H001001"]).apply(np.abs)
interpolations["white_diff_pw"] = (interpolations["P1_003N"] - interpolations["white"]).apply(np.abs)
interpolations["white_diff_ss"] = (interpolations["P1_003N"] - interpolations["P001003"]).apply(np.abs)

differences = interpolations[["pop_diff_pw", "pop_diff_ss", "house_diff_pw", "house_diff_ss", "white_diff_pw", "white_diff_ss"]]

In [25]:
# Display statistics on differences between interpolated data and ground truth data
differences.describe()

Unnamed: 0,pop_diff_pw,pop_diff_ss,house_diff_pw,house_diff_ss,white_diff_pw,white_diff_ss
count,259.0,259.0,259.0,259.0,259.0,259.0
mean,446.512786,453.756627,190.339182,194.987662,368.028166,371.373722
std,561.973011,597.461044,263.243786,275.621261,337.095965,350.74376
min,0.0,0.018568,0.0,0.004025,0.0,0.003685
25%,118.506525,98.023558,27.046888,25.081972,134.0,124.525592
50%,246.0,233.876203,98.0,85.769141,273.0,261.886267
75%,562.329706,604.818625,242.0,243.60604,508.554457,523.267726
max,4306.768408,5070.460674,1735.456357,1739.85382,1962.489406,2275.58672


In [26]:
# Filter out census tracts with 0 values for percentile analysis
interpolations2 = interpolations.loc[(interpolations[["P1_001N", "H1_001N", "P1_003N"]] != 0).all(axis=1)].copy()
interpolations2

Unnamed: 0,GEOID,pop,housing,white,P001001,H001001,P001003,P1_001N,H1_001N,P1_003N,state,county,tract,pop_diff_pw,pop_diff_ss,house_diff_pw,house_diff_ss,white_diff_pw,white_diff_ss
0,10001040100,6541.000000,2469.000000,5702.000000,6544.629384,2470.397083,5705.400048,7315.0,2740.0,5980.0,10.0,1.0,40100.0,774.000000,770.370616,271.000000,269.602917,278.000000,274.599952
1,10001040201,5041.000000,2023.000000,3665.000000,5043.341307,2023.650121,3666.474518,5446.0,2123.0,3424.0,10.0,1.0,40201.0,405.000000,402.658693,100.000000,99.349879,241.000000,242.474518
2,10001040203,5008.301676,2018.494317,3117.585436,5005.945996,2017.406765,3116.138075,5182.0,2157.0,2808.0,10.0,1.0,40203.0,173.698324,176.054004,138.505683,139.593235,309.585436,308.138075
3,10001040204,4383.204483,1631.638682,2975.825969,4655.971419,1733.185423,3160.993697,6451.0,2269.0,3613.0,10.0,1.0,40204.0,2067.795517,1795.028581,637.361318,535.814577,637.174031,452.006303
4,10001040205,3195.371557,1189.738115,2168.896783,2878.966888,1071.802060,1954.369020,4699.0,1985.0,2430.0,10.0,1.0,40205.0,1503.628443,1820.033112,795.261885,913.197940,261.103217,475.630980
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
256,10005051702,5613.000000,2303.000000,5013.000000,5618.467267,2305.259650,5017.160632,6577.0,2590.0,5286.0,10.0,5.0,51702.0,964.000000,958.532733,287.000000,284.740350,273.000000,268.839368
257,10005051801,4840.451064,2097.991489,3820.208511,4900.095310,2123.542025,3863.847523,5359.0,2154.0,3636.0,10.0,5.0,51801.0,518.548936,458.904690,56.008511,30.457975,184.208511,227.847523
258,10005051802,4231.548936,1773.008511,2532.791489,4163.679323,1744.174047,2486.972903,4354.0,1740.0,2256.0,10.0,5.0,51802.0,122.451064,190.320677,33.008511,4.174047,276.791489,230.972903
259,10005051900,4565.000000,1833.000000,3632.000000,4553.284160,1828.295699,3622.678657,4760.0,1949.0,3566.0,10.0,5.0,51900.0,195.000000,206.715840,116.000000,120.704301,66.000000,56.678657


In [27]:
# Calculate distances in both estimates from ground truth as percentages
interpolations2["pop_diff_pw"] = ((interpolations2["P1_001N"] - interpolations2["pop"]) / interpolations2["P1_001N"]).apply(np.abs)
interpolations2["pop_diff_ss"] = ((interpolations2["P1_001N"] - interpolations2["P001001"]) / interpolations2["P1_001N"]).apply(np.abs)
interpolations2["house_diff_pw"] = ((interpolations2["H1_001N"] - interpolations2["housing"]) / interpolations2["H1_001N"]).apply(np.abs)
interpolations2["house_diff_ss"] = ((interpolations2["H1_001N"] - interpolations2["H001001"]) / interpolations2["H1_001N"]).apply(np.abs)
interpolations2["white_diff_pw"] = ((interpolations2["P1_003N"] - interpolations2["white"]) / interpolations2["P1_003N"]).apply(np.abs)
interpolations2["white_diff_ss"] = ((interpolations2["P1_003N"] - interpolations2["P001003"]) / interpolations2["P1_003N"]).apply(np.abs)

differences2 = interpolations2[["pop_diff_pw", "pop_diff_ss", "house_diff_pw", "house_diff_ss", "white_diff_pw", "white_diff_ss"]]

In [28]:
# Display statistics on percentile differences between interpolated data and ground truth data
differences2.describe()

Unnamed: 0,pop_diff_pw,pop_diff_ss,house_diff_pw,house_diff_ss,white_diff_pw,white_diff_ss
count,257.0,257.0,257.0,257.0,257.0,257.0
mean,0.114947,0.119478,0.103114,0.111083,0.18476,0.190317
std,0.111399,0.152145,0.122743,0.19662,0.185065,0.217784
min,0.000338,0.001065,0.0,0.000968,0.0,0.000368
25%,0.034712,0.031087,0.017155,0.016719,0.073084,0.074591
50%,0.070088,0.071103,0.05812,0.056145,0.152057,0.136529
75%,0.162591,0.154544,0.143094,0.143592,0.256426,0.244918
max,0.548563,1.703125,0.693452,2.607926,2.056604,2.07478


In [29]:
# Determine if pw interpolation or ss interpolation is closer to the ground truth. Negative values mean pw was closer, and vice versa
interpolations3 = interpolations.loc[(interpolations[["P1_001N", "H1_001N", "P1_003N"]] != 0).all(axis=1)].copy()

# Calculate how much closer an interpolation method is to the ground truth as a percentage of the ground truth
interpolations3["pop_diff"] = (interpolations3["pop_diff_pw"] - interpolations3["pop_diff_ss"]) / interpolations3["P1_001N"]
interpolations3["house_diff"] = (interpolations3["house_diff_pw"] - interpolations3["house_diff_ss"]) / interpolations3["H1_001N"]
interpolations3["white_diff"] = (interpolations3["white_diff_pw"] - interpolations3["white_diff_ss"]) / interpolations3["P1_003N"]

differences3 = interpolations3[["pop_diff", "house_diff", "white_diff"]]

In [30]:
differences3.describe()

Unnamed: 0,pop_diff,house_diff,white_diff
count,257.0,257.0,257.0
mean,-0.004531,-0.007968,-0.005557
std,0.099659,0.128085,0.108235
min,-1.42493,-1.914474,-1.536551
25%,-0.006741,-0.008411,-0.007429
50%,0.000143,-0.000581,0.000343
75%,0.005022,0.003106,0.006367
max,0.261075,0.274571,0.278198
