In [None]:
import data
import resulthelpers
import matplotlib.pyplot as plt
import numpy
import scipy.stats

# Load Main Results

In [None]:
all_results = resulthelpers.join_results(
    resulthelpers.load_basic_results("results/*.tsv"),
    resulthelpers.load_simulation_results("batch/all-results.tsv"),
    ks_col="ks_pl"
);

# Load Lognormal Results

In [None]:
all_ln_results = resulthelpers.join_results(
    resulthelpers.load_basic_results("./results/lognormal-results.tsv"),
    resulthelpers.load_lognormal_simulation_results("./batch/all-results-lognormal.tsv")
);

# Power Law Confidence Intervals

In [None]:
# just the tail results
# (since we only ran the bootstrap on the tail results, this isn't really necessary)
tail_results = all_results[all_results["fitpart"] == "tail"]

good_datasets = ["Census CBSA", "Census Place", "Census UA/UC", "CCA Tract", "CCA Tract (3+)", "CCA Street Network", "CCA Raster", "CCA Block"]

# ignore prior year results (e.g., "CBSA, 2000")
tail_results = tail_results[tail_results["dataset"].isin(good_datasets)]

# calculate alpha_range

# The 97.5% quantile of a standard normal random var, i.e.,
# the multiplier for sigma that gives us an approximate 95% confidence interval.
std_norm_975 = scipy.stats.norm.ppf(.975)
tail_results["alpha_lower"] = tail_results["alpha"] - std_norm_975*tail_results["plsigma"]
tail_results["alpha_upper"] = tail_results["alpha"] + std_norm_975*tail_results["plsigma"]
significant_pls = tail_results[tail_results["bootstrap_p"] >= 0.1].sort_values(["alpha"], ascending=[False])

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
ax.errorbar(y=range(len(significant_pls)), x=significant_pls["alpha"], xerr=significant_pls["plsigma"] * std_norm_975, fmt="o", color="#774499", markersize=2)
ax.axvline(2, color="#000000a0", linewidth=1, linestyle="dotted")
ax.set_yticks(range(len(significant_pls)))
ax.set_yticklabels(significant_pls["column"].str.cat(significant_pls["dataset"], ": "))
ax.set_xlabel("Î±")
ax.set_xlim([1, 4])
plt.tight_layout()
fig.savefig("illustrations/significant-alphas-95.pdf")
fig.savefig("illustrations/significant-alphas-95.png", dpi=300)

In [None]:
disp_tbl = tail_results
disp_tbl["alpha_range"] = disp_tbl["alpha_lower"].map("{0:.3f}".format).str.cat(disp_tbl["alpha_upper"].map("{0:.3f}".format), sep=", ")
disp_tbl[["column", "dataset", "n_tail", "x_min", "alpha", "plsigma", "alpha_range", "ks_pl", "bootstrap_p"]].sort_values("column", ascending=False)

# Kernel Density Estimates

In [None]:
def generate_combined_kde():
    kde_datasets = {
        "CCA Block": {
            "get_data": data.get_cca_block_data,
            "params": [1000, 1000],
            "population_cutoff": 50,
            "color": "#ff9900"
        },
        "Census Place": {
            "get_data": data.get_census_place_data,
            "params": [],
            "population_cutoff": 5000,
            "color": "#cc0033"
        },
        "CCA Raster": {
            "get_data": data.get_cca_raster_data,
            "params": [1000, 5],
            "population_cutoff": 4000,
            "color": "#006699"
        }
    }
    
    fig, axes = pyplot.subplots(len(kde_datasets), 1, sharex=True, figsize=(10,6))
    x = numpy.linspace(-2, 17, 5000)
    
    i = 0
    for name in kde_datasets:
        ax = axes[i]
        i += 1
        dataset = kde_datasets[name]
        df = dataset["get_data"](*dataset["params"])
        popcutoff = dataset["population_cutoff"]

        population = df["population"][df["population"] > 0]
        kde = scipy.stats.gaussian_kde(numpy.log(population), bw_method=.02)
        ax.set_title(name + " Log Population Density")
        #ax.set_ylabel("Density")
        if i == len(kde_datasets):
            ax.set_xlabel("ln(population)")
        ax.plot(x, kde(x), color="#004454")
        ax.axvline(numpy.log(popcutoff), color="#00000080", linestyle="dotted")

    pyplot.tight_layout()
    fig.savefig("illustrations/combined-pop-density.png", dpi=300)
    #pyplot.close(fig)

generate_combined_kde()

In [None]:
df = data.get_cca_raster_data(1000, 5)
plots = [
    {"col": "area", "cutoff": 8, "label": "area (km$^2$)"},
    {"col": "population", "cutoff": 8000, "label": "population"}
]
fig = pyplot.figure(figsize=(6,3))
i = 0
for p in plots:
    i += 1
    ax = fig.add_subplot(1, 2, i)
    if i == 1:
        ax.set_title("CCA Raster lower tail density")
    ax.set_xlabel(p["label"])
    ax.set_ylabel("density")
    x = numpy.linspace(0, p["cutoff"], 5000)
    kde = scipy.stats.gaussian_kde(df[p["col"]][df[p["col"]] <= p["cutoff"]], bw_method=.04)
    ax.plot(x, kde(x), color="#004454")
pyplot.tight_layout()
fig.savefig("illustrations/cca-raster-lower-tail.pdf")
fig.savefig("illustrations/cca-raster-lower-tail.png", dpi=300)