In [1]:
import pandas as pd
import numpy as np
from urllib.request import urlopen
import json
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import os

# **<font color = 'DarkRed'> State Level </font>**

* <font color='royalblue'> _Get_State_Code returns the state's two letter abbreviation and code._ </font>

In [2]:
def Get_State_Code(State_Name):
    State_Codes = pd.read_csv('https://www2.census.gov/geo/docs/reference/state.txt', sep = "|")
    State_Codes.set_index('STATE_NAME', inplace = True)
    State_Code = State_Codes.loc[State_Name, 'STATE']
    State = State_Codes.loc[State_Name, 'STUSAB']
    
    return State, State_Code.astype(str)

# **<font color = 'DarkRed'> Congressional District Level </font>**

## **<font color='Black'> Obtain congressional districts geometrical information (coordinates, land area, water area), population, and density </font>**

* <font color='red'> _Get_geojson_CD116 returns the geometric coordinates of each of the 116th congress congressional districts.<br> If a state code is passed as an argument then only the geometric coordinates of the congressional districts of the specified state will be returned.<br> The geojson dictionary also has other info such as land and water area._ </font>
* <font color='darkorange'> _Get_Districts_Pop19 returns the population of each of the 116th congress congressional districts in the given state, based on the 2019 survey._ </font>
* <font color='khaki'> _Get_Districts_List returns the list of congressional districts in the state._ </font>
* <font color='green'> _Get_District_County_Pop19 returns a dataframe with the population of each (county, district) pair, and the ratio of each county population in shared districts._ </font>
* <font color='royalblue'> _Congressional_Districts_Info returns the population, land area (in $Km^{2}$), water area (in $Km^{2}$), and population density (population per $Km^{2}$) of each district in the given state. The population density is taken as the population per land area._ </font>
* <font color='purple'> _Congressional_Districts_Map displays the map of the districts of a given state colored by the data in the dataframe, df, in column data_col.<br> The dataframe, df, should have the congressional districts as index.<br> The log_scale default is set to False. If it is set to True then log(data_col) values will be represented._ </font>

In [3]:
def Load_geojson_CD116(State_Code = 'all', savefile = False):
    with urlopen('https://raw.githubusercontent.com/loganpowell/census-geojson/master/GeoJSON/500k/2019/congressional-district.json') as response:
        districts_geoDict = json.load(response)
    
    if State_Code == 'all':
        if savefile:
            filename = 'data/all_geojson_CD116.json'
            with open(filename, 'w') as f:
                json.dump(districts_geoDict, f)
        return districts_geoDict
    else:
        state_districts_geoList = []
        for district in districts_geoDict['features']:
            if district['properties']['STATEFP'] == State_Code:
                state_districts_geoList.append(district)    
        state_districts_geoDict = {'type': 'FeatureCollection', 'features': state_districts_geoList}
        if savefile:
            filename = 'data/' + State_Code + '_geojson_CD116.json'
            with open(filename, 'w') as f:
                json.dump(state_districts_geoDict, f)
        return state_districts_geoDict
    
def Get_geojson_CD116(State_Code = 'all'):
    filename = 'data/' + State_Code + '_geojson_CD116.json'
    if os.path.isfile(filename):
        with open(filename, 'r') as f:
            districts_geoDict = json.load(f)
    else:
        districts_geoDict = Load_geojson_CD116(State_Code, savefile = True)
    return districts_geoDict

In [4]:
def Load_Districts_Pop19(State_Code):
    IP = 'https://api.census.gov/data/2019/acs/acs5?get=NAME,B01001_001E&for=congressional%20district:*&in=state:{}'
    Districts_Pop = pd.read_json(IP.format(State_Code))
        
    Districts_Pop.drop([0], inplace = True)
    Districts_Pop.drop([0, 2], axis = 1, inplace = True)
    Districts_Pop.columns = ['Population', 'CDistrict']
    Districts_Pop.set_index('CDistrict', inplace = True)
    Districts_Pop.sort_index(inplace = True)
    Districts_Pop['Population'] = pd.to_numeric(Districts_Pop['Population'], errors='coerce')
        
    return Districts_Pop

def Get_Districts_Pop19(State_Code):
    filename = 'data/' + State_Code + '_Districts_Pop19.json'
    if os.path.isfile(filename):
        Districts_Pop = pd.read_json(filename, orient = 'table')
    else:
        Districts_Pop = Load_Districts_Pop19(State_Code)
        Districts_Pop.to_json(filename, orient = 'table')
    
    return Districts_Pop

In [5]:
def Load_Districts_List(State_Code):
    IP = 'https://api.census.gov/data/2020/dec/pl?get=NAME&for=congressional%20district:*&in=state:{}'
    Districts = pd.read_json(IP.format(State_Code))
    Districts.drop([0], inplace = True)
    District_list = Districts[2].to_list()
    District_list.sort()
    
    return District_list

def Get_Districts_List(State_Code):
    filename = 'data/' + State_Code + '_Districts_List.txt'
    if os.path.isfile(filename):
        with open(filename, 'r') as f:
            District_list = json.load(f)
    else:
        District_list = Load_Districts_List(State_Code)
        with open(filename, 'w') as f:
            json.dump(District_list, f)
            
    return District_list

In [6]:
def Load_District_County_Pop19(State_Code):
    IP = 'https://api.census.gov/data/2019/acs/acs5?get=NAME,B01003_001E&for=county%20(or%20part):*&in=state:{}%20congressional%20district:{}'
    Dist_list = Get_Districts_List(State_Code)
    
    columns_list = ['County Name', 'Population', 'CDistrict', 'County Code']
    DC_Pop = pd.DataFrame(columns = columns_list)

    iter = 0
    for District in Dist_list:
        df = pd.read_json(IP.format(State_Code, District))
        df.drop([0], inplace = True)
        df.drop([2], axis = 1, inplace = True)
        df.columns = columns_list
        df['County Name'] = df['County Name'].str.rsplit(',').str[0].str.split("\s+\(").str[0]
        
        DC_Pop = pd.concat([DC_Pop, df], ignore_index = True)
        
    DC_Pop.sort_values(['County Code', 'CDistrict'], inplace = True)
    DC_Pop.set_index('County Code', inplace = True)
    DC_Pop['Population'] = pd.to_numeric(DC_Pop['Population'], errors='coerce')
    
    County_Pop = DC_Pop.groupby(level = 0).agg(Population = ('Population', 'sum'))
    
    DC_Pop['Ratio in District'] = 0.0
    DC_Pop['Ratio in District'] = DC_Pop.loc[:, 'Population']/County_Pop.loc[:, 'Population']
        
    return DC_Pop[['County Name', 'CDistrict', 'Population', 'Ratio in District']]

def Get_District_County_Pop19(State_Code):
    filename = 'data/' + State_Code + '_District_County_Pop19.json'
    if os.path.isfile(filename):
        District_County_Pop = pd.read_json(filename, orient = 'table')
    else:
        District_County_Pop = Load_District_County_Pop19(State_Code)
        District_County_Pop.to_json(filename, orient = 'table')
    
    return District_County_Pop

In [7]:
def Congressional_Districts_Info(State_Code):
    df = Get_Districts_Pop19(State_Code)
    geojson_dict = Get_geojson_CD116(State_Code)
    
    df['Land Area'] = 0.0
    df['Water Area'] = 0.0
    
    for District in geojson_dict['features']:
        ID = District['properties']['CD116FP']
        Land_area = District['properties']['ALAND']
        Water_area = District['properties']['AWATER']
#         GeoID = District['properties']['GEOID']
        
        df.loc[ID, 'Land Area'] = Land_area*10**-6
        df.loc[ID, 'Water Area'] = Water_area*10**-6
        
    df['Population Density'] = df['Population']/df['Land Area']
    
    return df

In [8]:
def Congressional_Districts_Map(State_Code, df, data_col, Log_Scale = False):
    districts_geoDict = Get_geojson_CD116(State_Code)
    
    if Log_Scale:
        fig = px.choropleth(df, geojson = districts_geoDict, 
                            locations = df.index.get_level_values(0), 
                            color = np.log(df[data_col]), 
                            hover_name = df.index.get_level_values(0), 
                            featureidkey = 'properties.CD116FP', 
                            color_continuous_scale = 'Portland', 
                            projection = 'natural earth', 
                            labels={'color' : 'Log ' + data_col})      
    else:
        fig = px.choropleth(df, geojson = districts_geoDict, 
                            locations = df.index.get_level_values(0), 
                            color = data_col, 
                            hover_name = df.index.get_level_values(0), 
                            featureidkey = 'properties.CD116FP', 
                            color_continuous_scale = 'Portland', 
                            projection = 'natural earth')
    
    fig.update_geos(fitbounds = "locations", visible = False)
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    fig.show()
    
    return

# **<font color = 'DarkRed'> County Level </font>**

## **<font color='Black'> Obtain counties geometrical information (coordinates, land area, water area), population, and density </font>**

* <font color='red'> _Get_geojson_Counties returns the geometric coordinates of each US county.<br> If a state code is passed as an argument then only the geometric coordinates of the counties of the specified state will be returned.<br> The geojson dictionary also has other info such as land and water area._ </font>
* <font color='darkorange'> _Get_Counties_Pop19 returns the population of each of county in the given state, based on the 2019 survey._ </font>
* <font color='khaki'> _Get_CountyNames_Dict returns a dictionary with the county codes of the given state as keys and their corresponding names as values._ </font>
* <font color='green'> _Counties_Info returns the population, land area (in $Km^{2}$), water area (in $Km^{2}$), and population density (population per $Km^{2}$) of each county in the given state. The population density is taken as the population per land area._ </font>

In [9]:
def Load_geojson_Counties(State_Code = 'all'):
    with urlopen('https://raw.githubusercontent.com/loganpowell/census-geojson/master/GeoJSON/500k/2019/county.json') as response:
        counties_geoDict = json.load(response)
        
    if State_Code == 'all':
        return counties_geoDict
    else:
        state_counties_geoList = []
        for county in counties_geoDict['features']:
            if county['properties']['STATEFP'] == State_Code:
                state_counties_geoList.append(county)    
        return {'type': 'FeatureCollection', 'features': state_counties_geoList}
    
def Get_geojson_Counties(State_Code):
    filename = 'data/' + State_Code + '_geojson_Counties.json'
    if os.path.isfile(filename):
        with open(filename, 'r') as f:
            counties_geoDict = json.load(f)
    else:
        counties_geoDict = Load_geojson_Counties(State_Code)
        with open(filename, 'w') as f:
            json.dump(counties_geoDict, f)
        
    return counties_geoDict

In [10]:
def Load_Counties_Pop19(State_Code):
    IP = 'https://api.census.gov/data/2019/acs/acs5?get=NAME,B01003_001E&for=county:*&in=state:{}'
    Counties_Pop = pd.read_json(IP.format(State_Code))
        
    Counties_Pop.drop([0], inplace = True)
    Counties_Pop.drop([2], axis = 1, inplace = True)
    Counties_Pop.columns = ['County Name', 'Population', 'County Code']
    Counties_Pop['County Name'] = Counties_Pop['County Name'].str.rsplit(',').str[0]
    Counties_Pop.set_index('County Code', inplace = True)
    Counties_Pop.sort_index(inplace = True)
    Counties_Pop['Population'] = pd.to_numeric(Counties_Pop['Population'], errors='coerce')
        
    return Counties_Pop

def Get_Counties_Pop19(State_Code):
    filename = 'data/' + State_Code + '_Counties_Pop19.json'
    if os.path.isfile(filename):
        Counties_Pop = pd.read_json(filename, orient = 'table')
    else:
        Counties_Pop = Load_Counties_Pop19(State_Code)
        Counties_Pop.to_json(filename, orient = 'table')
    
    return Counties_Pop

In [11]:
def Load_CountyNames_Dict(State_Code):
    IP = 'https://api.census.gov/data/2020/dec/pl?get=NAME&for=county:*&in=state:{}'
    Counties = pd.read_json(IP.format(State_Code))
    Counties.drop([0], inplace = True)
    Counties.drop([1], axis = 1, inplace = True)
    Counties.columns = ['County Name', 'County Code']
    Counties['County Name'] = Counties['County Name'].str.rsplit(',').str[0]
    Counties.set_index('County Code', inplace = True)
    Counties.sort_index(inplace = True)
    
    return Counties.squeeze().to_dict()

def Get_CountyNames_Dict(State_Code):
    filename = 'data/' + State_Code + '_CountyNames.json'
    if os.path.isfile(filename):
        with open(filename, 'r') as f:
            CountyNames_Dict = json.load(f)
    else:
        CountyNames_Dict = Load_CountyNames_Dict(State_Code)
        with open(filename, 'w') as f:
            json.dump(CountyNames_Dict, f)
    
    return CountyNames_Dict

In [12]:
def Counties_Info(State_Code):
    df = Get_Counties_Pop19(State_Code)
    geojson_dict = Get_geojson_Counties(State_Code)
    
    df['Land Area'] = 0.0
    df['Water Area'] = 0.0
    
    for County in geojson_dict['features']:
        ID = County['properties']['COUNTYFP']
        Land_area = County['properties']['ALAND']
        Water_area = County['properties']['AWATER']
        
        df.loc[ID, 'Land Area'] = Land_area*10**-6
        df.loc[ID, 'Water Area'] = Water_area*10**-6
        
    df['Population Density'] = df['Population']/df['Land Area']
    
    return df

# **<font color = 'DarkRed'> County and District Levels Plots </font>**

* <font color='royalblue'> _Map_State displays the map of the congressional districts (if level = 'Congressional District'), counties (if level = 'County'), or health districts (if level = 'Health District') of a given state, colored by the data in the dataframe, df, in the data column given by data_col.<br> If level = 'Congressional District', then the dataframe, df, should have the congressional districts as index.<br> If level = 'County', then the dataframe, df, should have the county codes as index.<br> If level = 'Health District', then the dataframe, df, should have the health districts names as index.<br> The log_scale default is set to False. If it is set to True then log(data_col) values will be represented._ </font>

In [13]:
def Map_State(State_Code, level, df, data_col, Log_Scale = False):
    if level == 'Congressional District':
        geoDict = Get_geojson_CD116(State_Code)
        featureID = 'properties.CD116FP'
        hover_info = df.index.get_level_values(0)
    elif level == 'County':
        geoDict = Get_geojson_Counties(State_Code)
        featureID = 'properties.COUNTYFP'
        hover_info = pd.Series(Get_CountyNames_Dict(State_Code))
    elif level == 'Health District':
        return
    else:
        print("The options for level should be: Congressional District, County, or Health District")
        return
    
    if Log_Scale:
        eps = 1e-10
        fig = px.choropleth(df, geojson = geoDict, 
                            locations = df.index.get_level_values(0), 
                            color = np.log(df[data_col] + eps),
                            hover_name = hover_info,
                            featureidkey = featureID, 
                            color_continuous_scale = 'Portland', 
                            projection = 'natural earth', 
                            labels={'color' : 'Log ' + data_col})      
    else:
        fig = px.choropleth(df, geojson = geoDict, 
                            locations = df.index.get_level_values(0), 
                            color = data_col,
                            hover_name = hover_info,
                            featureidkey = featureID, 
                            color_continuous_scale = 'Portland', 
                            projection = 'natural earth')
    
    fig.update_geos(fitbounds = "locations", visible = False)
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    fig.show()
    
    return