### Data used in this notebook.
1. [Age groups by county level.](https://www.census.gov/data/tables/time-series/demo/popest/2010s-counties-detail.html)
    * First table provides a list of states for each there is a county level breakdown of the age median.
    
    
2. [Race ratio by county level.](https://www.census.gov/data/tables/time-series/demo/popest/2010s-counties-detail.html)
    * Second table provides a list of states for each there is a county level breakdown of the races living in that are.
    
    
3. [Personal income county level.](https://apps.bea.gov/itable/iTable.cfm?ReqID=70&step=1&acrdn=5)
    * On the dropdown select "Personal Income and Employemnt by County and Metropolitan area".
    * Select "Personal Income and Imployment by Major Component".
    * Select "County" under Major Area.
    * Select "All Counties in the US" under States, or by go by state.
    * Select "All Statistics in tables." under Area and Statistics.
    * Download table.


In [1]:
import pandas as pd
import plotly.express as px
import json

## Age median per county.

1 = 4/1/2010 Census population\
2 = 4/1/2010 population estimates base\
3 = 7/1/2010 population estimate\
4 = 7/1/2011 population estimate\
5 = 7/1/2012 population estimate\
6 = 7/1/2013 population estimate\
7 = 7/1/2014 population estimate\
8 = 7/1/2015 population estimate\
9 = 7/1/2016 population estimate\
10 = 7/1/2017 population estimate\
11 = 7/1/2018 population estimate\
12 = 7/1/2019 population estimate

In [None]:
def get_age_median(file, attr=['MEDIAN_AGE_TOT'], year=12):
    """
    Given a file from first bullet point "age groups by county level" formats it into dataframe.
    
    Arguments:
        - file: str (filename).
        - attr: list (list of column names default being the age median).
        - year: int (year id, refure to the string above that lists the ids).
        
    Return:
        Pandas DataFrame
    """
    
    raw_age_data = pd.read_csv(file)
    age_median_2019 = raw_age_data[raw_age_data['YEAR'] == 12]
    ny_state_age_median = age_median_2019[["STNAME", "CTYNAME", *attr]]
    ny_state_age_median = ny_state_age_median.reset_index(drop=True)
    
    return ny_state_age_median

## Demographics per county.

In [None]:
def get_white_ratio(file, attr=['WA_MALE', 'WA_FEMALE'], year=12):
    """
    Given a file from second bullet point "Race ratio by county level" formats it into dataframe.
    
    Arguments:
        - file: str (filename).
        - attr: list (list of column names default being total white male and female).
        - year: int (year id, refure to the string above that lists the ids).
        
    Return:
        Pandas DataFrame
    """
    
    raw_all_data = pd.read_csv(file)
    raw_all_data = raw_all_data[raw_all_data['AGEGRP']==0]
    raw_all_data = raw_all_data[raw_all_data['YEAR']==year]
    raw_all_data['selected'] = raw_all_data[attr[0]]
    for col in attr[1:]:
        raw_all_data['selected'] = raw_all_data['selected'] + raw_all_data[col]

    raw_all_data['RATIO'] = raw_all_data['selected'] / raw_all_data['TOT_POP']
    result = raw_all_data[['STNAME', 'CTYNAME','RATIO']]
    result = result.reset_index(drop=True)
    
    return result

## Income per county.

1 - Personal income (thousands of dollars)\
2 - Population (persons)\
3 - Per capita personal income (dollars)

In [None]:
def get_income_per_person(file):
    """
    Given a file from second bullet point "Race ratio by county level" formats it into dataframe.
    
    Arguments:
        - file: str (filename).
        - attr: list (list of column names default being total white male and female).
        - year: int (year id, refure to the string above that lists the ids).
        
    Return:
        Pandas DataFrame
    """
    
    raw_all_data = pd.read_csv(file)
    raw_all_data = raw_all_data[raw_all_data['LineCode']==3]
    result = raw_all_data[['GeoName', '2019']]
    result = result.iloc[1: , :]
    result = result.reset_index(drop=True)
    
    return result

## Visualizing data.

In [None]:
age_data = get_age_median('./cc-est2019-agesex-36.csv')
white_ratio = get_white_ratio('./cc-est2019-alldata-36.csv', ['WA_MALE', 'WA_FEMALE'], 12)
income_per_capita = get_income_per_person('./CAINC1_Personal_Income_Summary.csv')

In [None]:
fig = px.histogram(age_data, x='CTYNAME', y='MEDIAN_AGE_TOT', labels={'CTYNAME': 'County', 'MEDIAN_AGE_TOT': 'Median Age'})
fig.show()

In [None]:
fig = px.histogram(white_ratio, x='CTYNAME', y='RATIO', labels={'CTYNAME':'County', 'RATIO': "Ration white/total"})
fig.show()

In [None]:
fig = px.histogram(income_per_capita, x='GeoName', y = '2019', labels={'GeoName': 'County', '2019': 'income_per_capita'})
fig.update_xaxes(
    showgrid=True,
    tickson="boundaries",
    ticklen=20
)
fig.show()

## Radviz Data

In [None]:
def create_radviz_data(base_file, age_file, ratio_file, income_file):
    age_data = get_age_median(f"{base_file}/{age_file}")
    white_ratio = get_white_ratio(f"{base_file}/{ratio_file}", ['WA_MALE', 'WA_FEMALE'], 12)
    income_per_capita = get_income_per_person(f"{base_file}/{income_file}")
    
    radviz_data = pd.DataFrame(columns=['county_name', 'age_median', 'white_ratio', 'income_per_capita'])
    
    radviz_data['county_name'] = age_data['CTYNAME']
    radviz_data['age_median'] = age_data['MEDIAN_AGE_TOT']
    radviz_data['white_ratio'] = white_ratio['RATIO']
    radviz_data['income_per_capita'] = income_per_capita['2019']
    
    return radviz_data.to_json(orient='records')

In [None]:
AGE_FILE = 'cc-est2019-agesex-34.csv'
RATIO_FILE = 'cc-est2019-alldata-34.csv'
INCOME_FILE = 'income.csv'

radviz_data = create_radviz_data('./NJ', AGE_FILE, RATIO_FILE, INCOME_FILE)

## GeoJson Data

In [43]:
def reformat_geojson(file, out_file, possibleNames = ['namelsad', 'NAMELSAD20']):
    
    f = open(file)
    geojson = json.load(f)
    f.close()
    
    for i in range(len(geojson['features'])):
        for k in geojson['features'][i]['properties'].keys():
            if k in possibleNames:
                county_name = geojson['features'][i]['properties'][k]
                geojson['features'][i]['properties']['county_name'] = county_name
                break

    f = open(out_file, 'x')
    f.write(json.dumps(geojson))
    f.close()

In [44]:
# reformat_geojson('/Users/gzakhar/Desktop/Research/Mueller/testing/public/nyGeo.json', '/Users/gzakhar/Desktop/Research/Mueller/testing/public/nyGeoCOPY.json')