# Who Are Our Future Zip Codes According To Census Data?
---
### Summary
    1. Import Modules
    2. Define Census Variables/Tables
    3. Request JSON Through Census API, Convert To Pandas DF
    4. Clean Data
        A. Drop Useless Columns
        B. Rename Columns
        C. Datatypes Format
    5. Data Exploration
        A. Import CSV With Zip Frequency And Monetary Values
        B. What Are The Demographics Of Our Top Zips?
    6. Create Function That Compares Differences
    7. Save Data

## 1. Import Modules

In [1]:
import pandas as pd
import requests

# See all data
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', -1)

  pd.set_option('display.max_colwidth', -1)


## 2. Define Census Variables/Tables

In [2]:
# Demographics we will use:

# -total population B01001_001E *
# -median age B01002_001E *
# -home age median B25035_001E *
# -median home value B25107_001E *
# -median household income last 12 mo B19013_001E *

api_variables = "B01001_001E,B01002_001E,B25035_001E,B25107_001E,B19013_001E"


## 3. Request JSON Through Census API, Convert To Pandas DF

In [3]:
url = "https://api.census.gov/data/2019/acs/acs5?get={}&for=zip%20code%20tabulation%20area:*&in=state:08".format(api_variables)

payload={}
headers = {
  'Cookie': 'TS010383f0=011ba694f273cea421f27bf0ab2cac56fcf022cae435637d6ff04b48f22db0da4930c5fdccf2e3ecfe07edeb712ddebd5998d53170'
}

response = requests.request("GET", url, headers=headers, data=payload)


In [4]:
def json_to_dataframe(response):
    """
    Convert response to dataframe
    """
    return pd.DataFrame(response.json()[1:], columns=response.json()[0])

In [5]:
df = json_to_dataframe(response)

df.head(1)

Unnamed: 0,B01001_001E,B01002_001E,B25035_001E,B25107_001E,B19013_001E,state,zip code tabulation area
0,42,59.0,0,-666666666,27000,8,80434


## 4. Clean Data

### A. Drop Useless Columns

In [6]:
df = df.drop(columns=["state"])

df.head(1)

Unnamed: 0,B01001_001E,B01002_001E,B25035_001E,B25107_001E,B19013_001E,zip code tabulation area
0,42,59.0,0,-666666666,27000,80434


### B. Rename Columns

In [7]:
column_update = ["population", #B01001_001E
                 "median_age", #B01002_001E
                 "median_home_age", #B25035_001E
                 "median_home_value", #B25107_001E
                 "median_income", #B19013_001E
                 "zip"]

df.columns = column_update

In [8]:
df.head(1)

Unnamed: 0,population,median_age,median_home_age,median_home_value,median_income,zip
0,42,59.0,0,-666666666,27000,80434


### C. Datatypes Format

In [9]:
df.dtypes

population           object
median_age           object
median_home_age      object
median_home_value    object
median_income        object
zip                  object
dtype: object

In [10]:
df = df.apply(pd.to_numeric, errors='coerce')

In [11]:
df.dtypes

population           int64  
median_age           float64
median_home_age      int64  
median_home_value    int64  
median_income        int64  
zip                  int64  
dtype: object

## 5. Data Exploration

### A. Import CSV With Zip Frequency And Monetary Values

In [12]:
df_f_m = pd.read_csv("zip_f_m.csv")

### B. What Are The Demographics Of Our Top Zips?

In [13]:
# Top zips by total monetary value spent
df_f_m[df_f_m['label'] == 'Top'].sort_values('monetary_sum', ascending=False).head(5)

Unnamed: 0,location_zip,frequency_count,monetary_sum,f_score,m_score,score_total,label
0,80919,1653,489861.74,1,1,2,Top
1,80920,1641,466744.75,1,1,2,Top
4,80906,1236,449015.5,1,1,2,Top
2,80921,1309,347282.33,1,1,2,Top
3,80132,1268,311673.35,1,1,2,Top


In [14]:
# Top zips by total appointments 
df_f_m[df_f_m['label'] == 'Top'].sort_values('frequency_count', ascending=False).head(5)

Unnamed: 0,location_zip,frequency_count,monetary_sum,f_score,m_score,score_total,label
0,80919,1653,489861.74,1,1,2,Top
1,80920,1641,466744.75,1,1,2,Top
2,80921,1309,347282.33,1,1,2,Top
3,80132,1268,311673.35,1,1,2,Top
4,80906,1236,449015.5,1,1,2,Top


In [15]:
# Create array of selected zips
df_list = df_f_m[df_f_m['label'] == 'Top'].sort_values('monetary_sum', ascending=False).head(5)
zip_array = df_list['location_zip'].tolist()
zip_array

[80919, 80920, 80906, 80921, 80132]

In [16]:
# Demographics of top zips
top_zip_demos = df[df['zip'].isin(zip_array)]
top_zip_demos

Unnamed: 0,population,median_age,median_home_age,median_home_value,median_income,zip
33,40016,35.6,1992,332000,96284,80920
89,28039,43.6,1988,373500,95320,80919
311,37608,39.1,1984,357800,68701,80906
312,24087,41.5,2004,444900,124085,80921
389,21286,43.9,1996,474200,129009,80132


## 6. Create Function That Compares Differences

In [17]:
def create_df(target_zip):
    """
    Loop through all CO zips finding the 
    differences between target zip
    demographics
    """
    arr = [] 
    for index, row in df.iterrows(): 
        for column in df.columns:   
            if column != 'zip':
                target_zip_data = df[df['zip'] == target_zip]['{}'.format(column)].iat[0]
                other_zip_data = row['{}'.format(column)]

                case = {"selected_zip": int(target_zip), "comparing_zip": int(row['zip']), "category": column, "selected_zip_data": target_zip_data, 
                                "comparing_zip_data": other_zip_data, "difference": abs(target_zip_data-other_zip_data)}
                arr.append(case)
    created_df = pd.DataFrame(arr)
    return created_df

In [18]:
df_80920 = create_df(80920)
df_80906 = create_df(80906)
df_80921 = create_df(80921)
df_80132 = create_df(80132)
df_80919 = create_df(80919)
df_80919.head(10)

Unnamed: 0,selected_zip,comparing_zip,category,selected_zip_data,comparing_zip_data,difference
0,80919,80434,population,28039.0,42.0,27997.0
1,80919,80434,median_age,43.6,59.0,15.4
2,80919,80434,median_home_age,1988.0,0.0,1988.0
3,80919,80434,median_home_value,373500.0,-666666666.0,667040166.0
4,80919,80434,median_income,95320.0,27000.0,68320.0
5,80919,80446,population,28039.0,4257.0,23782.0
6,80919,80446,median_age,43.6,48.7,5.1
7,80919,80446,median_home_age,1988.0,1995.0,7.0
8,80919,80446,median_home_value,373500.0,276800.0,96700.0
9,80919,80446,median_income,95320.0,68272.0,27048.0


In [19]:
def create_piv(df, zip_string):
    """
    Pivot the df, aggregate all values,
    top 10 lowest values sorted saved 
    as CSV
    """
    piv = df.pivot(index='comparing_zip',
              columns='category',
              values='difference')
    piv['agg_diffs']= piv.sum(axis=1)
    piv['selected_zip'] = zip_string
    piv = piv.sort_values('agg_diffs').head(10)
    return piv

In [67]:
# Concat all dfs to final pivot df
final_piv = pd.concat([create_piv(df_80920, "80920"), create_piv(df_80906, "80906"), 
                      create_piv(df_80921, "80921"), create_piv(df_80132, "80132"),
                      create_piv(df_80919, "80919")])

In [21]:
# Final data
final_piv = final_piv.reset_index()

category,comparing_zip,median_age,median_home_age,median_home_value,median_income,population,agg_diffs,selected_zip
0,80920,0.0,0.0,0.0,0.0,0.0,0.0,80920
1,80241,1.1,2.0,7300.0,5726.0,6378.0,19407.1,80920
2,80831,2.7,10.0,3800.0,1641.0,14491.0,19944.7,80920
3,80537,5.6,7.0,1100.0,24164.0,3567.0,28843.6,80920
4,80031,2.5,7.0,16900.0,14013.0,4275.0,35197.5,80920
5,80603,0.5,10.0,1400.0,8794.0,26081.0,36285.5,80920
6,80601,2.2,7.0,18300.0,20609.0,750.0,39668.2,80920
7,81507,15.2,7.0,4400.0,11277.0,25063.0,40762.2,80920
8,80549,0.1,8.0,7700.0,5828.0,27881.0,41417.1,80920
9,80538,6.1,2.0,9100.0,26829.0,9242.0,45179.1,80920


## 7. Save Data

In [724]:
# Save final csv files

final_piv.to_csv("final_piv.csv", encoding='utf-8', index=True)

top_zip_demos.to_csv("top_zip_demos.csv", encoding='utf-8', index=True)