In [1]:
import json, math, copy
from geosnap.data import store_ltdb
from geosnap.data import Community
from geosnap.data import store_census
from geosnap.data import data_store
import pandas as pd
import shapely.wkt
import shapely.geometry
from datetime import datetime
from datetime import timedelta
from pathlib import Path
from INCS import linc
import urllib.parse
import webbrowser
import os
import pprint
from geosnap.data import convert_gdf
import geopandas as gpd

In [2]:
# Print iterations progress
def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█', printEnd = "\r"):
	"""
	Call in a loop to create terminal progress bar
	@params:
		iteration   - Required  : current iteration (Int)
		total       - Required  : total iterations (Int)
		prefix      - Optional  : prefix string (Str)
		suffix      - Optional  : suffix string (Str)
		decimals    - Optional  : positive number of decimals in percent complete (Int)
		length      - Optional  : character length of bar (Int)
		fill        - Optional  : bar fill character (Str)
		printEnd    - Optional  : end character (e.g. "\r", "\r\n") (Str)
	"""
	percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
	filledLength = int(length * iteration // total)
	bar = fill * filledLength + '-' * (length - filledLength)
	print('\r%s |%s| %s%% %s' % (prefix, bar, percent, suffix), end = printEnd)
	# Print New Line on Complete
	if iteration == total: 
		print()

In [3]:
def _fips_filter_test(
    state_fips=None, county_fips=None, msa_fips=None, fips=None, data=None
):

    if isinstance(state_fips, (str,)):
        state_fips = [state_fips]
    if isinstance(county_fips, (str,)):
        county_fips = [county_fips]
    if isinstance(fips, (str,)):
        fips = [fips]

    # if counties already present in states, ignore them
    if county_fips:
        for i in county_fips:
            if state_fips and i[:2] in county_fips:
                county_fips.remove(i)
    # if any fips present in state or counties, ignore them too
    if fips:
        for i in fips:
            if state_fips and i[:2] in state_fips:
                fips.remove(i)
            if county_fips and i[:5] in county_fips:
                fips.remove(i)

    fips_list = []
    if fips:
        fips_list += fips
    if county_fips:
        fips_list += county_fips
    if state_fips:
        fips_list += state_fips

    if msa_fips:
        fips_list += data_store.msa_definitions[
           data_store.msa_definitions["CBSA Code"] == msa_fips
        ]["stcofips"].tolist()
    #fips_list = ['40123']
    #print(data)
    dfs = []
    for index in fips_list:
        dfs.append(data[data.geoid.str.startswith(index)])
        #dfs.append(data[data.geoid.startswith(index)])
        #dfs.append(data.loc['40123089598':'40123089600'])
        #dfs.append(data.loc[index])

    return pd.concat(dfs)

In [4]:
def _from_db_test(
    data, state_fips=None, county_fips=None, msa_fips=None, fips=None, years=None
):
    data = data[data.year.isin(years)]
    data = data.reset_index()
    #data = data.iloc[0:100000]
    #data.insert(0, "countyid", data['geoid'].str[:5])
    #data = data.set_index("countyid")
    #print(data)
    #return
    df = _fips_filter_test(
        state_fips=state_fips,
        county_fips=county_fips,
        msa_fips=msa_fips,
        fips=fips,
        data=data,
    )

    # we know we're using 2010, need to drop the year column so no conficts
    tracts = data_store.tracts_2010(convert=False)
    tracts = tracts[["geoid", "wkb"]]
    tracts = tracts[tracts.geoid.isin(df.geoid)]
    tracts = convert_gdf(tracts)

    gdf = df.merge(tracts, on="geoid", how="left").set_index("geoid")
    gdf = gpd.GeoDataFrame(gdf)
    return gdf

In [27]:
def from_ltdb_test(
        #cls,
        state_fips=None,
        county_fips=None,
        msa_fips=None,
        fips=None,
        boundary=None,
        years=[1970, 1980, 1990, 2000, 2010],
):
    gdf = _from_db_test(
        data=data_store.ltdb,
        state_fips=state_fips,
        county_fips=county_fips,
        msa_fips=msa_fips,
        fips=fips,
        years=years,
    )
    return gdf.reset_index()

In [32]:
def write_ALL_METROS_VARIABLES_js(metros, param):
    geoid       = metros.columns[0]
    method      = param['method']
    nClusters   = param['nClusters']
    years       = param['years']
    variables   = param['variables']
    seqClusters = 5
    distType    = 'tran'
    
    if ('Sequence' in param and type(param['Sequence']) is dict):
        if ('seq_clusters' in param['Sequence']): seqClusters = param['Sequence']['seq_clusters']
        if ('dist_type' in param['Sequence']): distType = param['Sequence']['dist_type']
    
    #msas = data_store.msa_definitions
    #for column in msas.columns:
    #	print(column)
    #print(msas)
    
    community = Community.from_ltdb(years=years, msa_fips="10220")
    #community = Community.from_ltdb(years=years)
    #community.gdf = community.gdf[['geoid', 'year']]
    #print(community.gdf)
    #print(variables)
    #print(variables.append(['geoid', 'year']))
    #print(variables)
    #return
    
    # Initial call to print 0% progress
    printProgressBar(0, len(metros.index), prefix = 'Progress:', suffix = 'Complete', length = 50)
    
    readCount = 0
    outList = []
    for index, metro in metros.iterrows():
        #if (index > 10): break
        #if (index != 3): continue
        #print(index, metro['geoid'], metro['name'])
        metroid = metro['geoid']
        p = metro['name'].rfind(', ')
        #if (p < 0): print(index, metro['geoid'], metro['name'], p)
        metroname = metro['name'][:p]
        stateabbr = metro['name'][p+2:]
        #print(index, metroid, stateabbr, metroname)
        
        try:
            #community = Community.from_ltdb(years=years, msa_fips=metroid)
            community.gdf = from_ltdb_test(years=years, msa_fips=metroid)
            #rint(community.gdf)
        except ValueError:
            continue
        #printProgressBar(index, len(metros.index), prefix = 'Progress:', suffix = 'Complete', length = 50)
        #continue
        
        if (len(community.gdf.index) <= 0): continue
        #print(community.gdf.columns)
        #for column in community.gdf.columns:
        #	print(column)
        #print(community)
        #print(community.gdf)
        
        # clustering by method, nClusters with filtering by variables
        try:
            clusters = community.cluster(columns=variables, method=method, n_clusters=nClusters)
        except KeyError:
            continue
        #print(clusters.gdf)
        #print(clusters.gdf[['year', 'geoid', 'kmeans']])
        
        # get pivot from clusters
        df_pivot = clusters.gdf.pivot(index='geoid', columns='year', values='kmeans')
        #print(df_pivot)
        #print(len(df_pivot.index))
        #print(df_pivot.columns)
        
        if (len(df_pivot.columns) > 1):										# 1980, 1990, 2000, 2010
            # convert df_pivot to list for INCS.linc
            yearList = []
            for year in df_pivot.columns:
                aYearList = df_pivot[year].values.tolist()
                aYearList = list(map(float, aYearList)) 
                yearList.append(aYearList)
            #print(yearList)
            # calculate INC
            incs = linc(yearList)
            #print(incs)
            ave = sum(incs) / len(incs) if (len(incs) != 0) else -9999
            #print("ave:", ave)
            #print(index, metroid, ave, stateabbr, metroname)
            readCount += len(incs)
            outList.append([metroid, ave])
            printProgressBar(index, len(metros.index), prefix = 'Progress:', suffix = 'Complete', length = 50)
    printProgressBar(len(metros.index), len(metros.index), prefix = 'Progress:', suffix = 'Complete', length = 50)
    print(outList)
    return
    print("readCount:", readCount)
    
    # write df_pivot to GEO_VARIABLES.js
    filename_GEO_VARIABLES = "NAM_" + param['filename_suffix'] + "/data/GEO_VARIABLES_"+param['filename_suffix']+".js"
    ofile = open(filename_GEO_VARIABLES, 'w')
    ofile.write('var GEO_VARIABLES =\n')
    ofile.write('[\n')
    heading = [geoid, 'INC']
    ofile.write('  '+json.dumps(heading)+',\n')
    wCount = 0
    for i, row in enumerate(outList):
        wCount += 1
        ofile.write('  '+json.dumps(row)+',\n')
    #print("GEO_VARIABLES.js write count:", wCount)
    ofile.write(']\n')
    ofile.close()

In [20]:
param = {
    'title': "Neighborhood Analysis: Kmeans, All metros",
    'filename_suffix': "All",
    'allMetros': True,
    'years': [1980, 1990, 2000, 2010],
    'method': "kmeans",
    'nClusters': 8,
    'variables': [
                  "p_nonhisp_white_persons", 
                  "p_nonhisp_black_persons", 
                  "p_hispanic_persons", 
                  "p_native_persons", 
                  "p_asian_persons",
                 ],
}

In [21]:
metros = data_store.msas()

In [33]:
started_datetime = datetime.now()

write_ALL_METROS_VARIABLES_js(metros, param)

ended_datetime = datetime.now()
elapsed = ended_datetime - started_datetime
total_seconds = int(elapsed.total_seconds())
hours, remainder = divmod(total_seconds,60*60)
minutes, seconds = divmod(remainder,60)	
print('Elapsed %02d:%02d:%02d' % (hours, minutes, seconds))

Progress: |██████████████████████████████████████████████████| 100.0% Complete
[['10100', 0.9333333333333332], ['10140', 0.7941176470588235], ['10180', 0.9187297519027968], ['10220', 1.0], ['10300', 0.8812839334578463], ['10420', 0.9340286138656331], ['10460', 0.8636363636363636], ['10500', 0.9918699186991871], ['10540', 0.9682539682539684], ['10580', 0.9418286649466009], ['10620', 1.0], ['10660', 0.82], ['10700', 1.0], ['10740', 0.8826186265876109], ['10760', 0.9230769230769231], ['10780', 0.828501542317332], ['10820', 1.0], ['10860', 1.0], ['10900', 0.9017033557453047], ['10940', 0.8333333333333334], ['10980', 0.8], ['11020', 0.9852463712757832], ['11060', 0.5], ['11100', 0.8880632375685984], ['11140', 0.9], ['11180', 0.9649122807017543], ['11220', 0.9375], ['11260', 0.8014561968653438], ['11380', 0.0], ['11420', 0.8888888888888888], ['11460', 0.8383310851669056], ['11500', 0.9393948977440438], ['11540', 0.9056448174095227], ['11580', 0.8888888888888888], ['11620', 0.9000000000000001