# Who?
---
### Summary
    1. Import Modules
    2. Define Census Variables/Tables
    3. Request JSON Through Census API, Convert To Pandas DF
    4. Clean Data
        A. Drop Useless Columns
        B. Rename Columns
        C. Datatypes Format
    5. Data Exploration
        A. Import CSV With Zip Frequency And Monetary Values
        B. What Are The Demographics Of Our Top Zips?
        C. Create Function That Compares Differences
    6. Save Data

## 1. Import Modules

In [441]:
import pandas as pd
import requests

# See all data
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', -1)

  pd.set_option('display.max_colwidth', -1)


## 2. Define Census Variables/Tables

In [442]:
# Demographics we will use:

# -total population B01001_001E *
# -median age B01002_001E *
# -home age median B25035_001E *
# -median home value B25107_001E *
# -median household income last 12 mo B19013_001E *

# -education level (percent with a bachelors?) B06009_005E
# -home owner lived in unit B07013_002E
# -householder lived in renter-occupied residence B07013_003E



api_variables = "B01001_001E,B01002_001E,B25035_001E,B25107_001E,B19013_001E"


## 3. Request JSON Through Census API, Convert To Pandas DF

In [443]:
url = "https://api.census.gov/data/2019/acs/acs5?get={}&for=zip%20code%20tabulation%20area:*&in=state:08".format(api_variables)

payload={}
headers = {
  'Cookie': 'TS010383f0=011ba694f273cea421f27bf0ab2cac56fcf022cae435637d6ff04b48f22db0da4930c5fdccf2e3ecfe07edeb712ddebd5998d53170'
}

response = requests.request("GET", url, headers=headers, data=payload)


In [444]:
def json_to_dataframe(response):
    """
    Convert response to dataframe
    """
    return pd.DataFrame(response.json()[1:], columns=response.json()[0])

In [445]:
df = json_to_dataframe(response)

df.head(1)

Unnamed: 0,B01001_001E,B01002_001E,B25035_001E,B25107_001E,B19013_001E,state,zip code tabulation area
0,42,59.0,0,-666666666,27000,8,80434


## 4. Clean Data

### A. Drop Useless Columns

In [446]:
df = df.drop(columns=["state"])

df.head(1)

Unnamed: 0,B01001_001E,B01002_001E,B25035_001E,B25107_001E,B19013_001E,zip code tabulation area
0,42,59.0,0,-666666666,27000,80434


### B. Rename Columns

In [447]:
column_update = ["population", #B01001_001E
                 "median_age", #B01002_001E
                 "median_home_age", #B25035_001E
                 "median_home_value", #B25107_001E
                 "median_income", #B19013_001E
                 "zip"]

df.columns = column_update

In [448]:
df.head(1)

Unnamed: 0,population,median_age,median_home_age,median_home_value,median_income,zip
0,42,59.0,0,-666666666,27000,80434


### C. Datatypes Format

In [449]:
df.dtypes

population           object
median_age           object
median_home_age      object
median_home_value    object
median_income        object
zip                  object
dtype: object

In [450]:
df = df.apply(pd.to_numeric, errors='coerce')

In [451]:
df.dtypes

population           int64  
median_age           float64
median_home_age      int64  
median_home_value    int64  
median_income        int64  
zip                  int64  
dtype: object

## 5. Data Exploration

### A. Import CSV With Zip Frequency And Monetary Values

In [454]:
df_f_m = pd.read_csv("zip_f_m.csv")

### B. What Are The Demographics Of Our Top Zips?

In [455]:
# Top zips by total monetary value spent
df_f_m[df_f_m['label'] == 'Top'].sort_values('monetary_sum', ascending=False).head(5)

Unnamed: 0,location_zip,frequency_count,monetary_sum,f_score,m_score,score_total,label
0,80919,1653,489861.74,1,1,2,Top
1,80920,1641,466744.75,1,1,2,Top
4,80906,1236,449015.5,1,1,2,Top
2,80921,1309,347282.33,1,1,2,Top
3,80132,1268,311673.35,1,1,2,Top


In [456]:
# Top zips by total appointments 
df_f_m[df_f_m['label'] == 'Top'].sort_values('frequency_count', ascending=False).head(5)

Unnamed: 0,location_zip,frequency_count,monetary_sum,f_score,m_score,score_total,label
0,80919,1653,489861.74,1,1,2,Top
1,80920,1641,466744.75,1,1,2,Top
2,80921,1309,347282.33,1,1,2,Top
3,80132,1268,311673.35,1,1,2,Top
4,80906,1236,449015.5,1,1,2,Top


In [457]:
# Create array of selected zips
df_list = df_f_m[df_f_m['label'] == 'Top'].sort_values('monetary_sum', ascending=False).head(5)
zip_array = df_list['location_zip'].tolist()
zip_array

[80919, 80920, 80906, 80921, 80132]

In [458]:
# Demographics of top zips
df[df['zip'].isin(zip_array)]

Unnamed: 0,population,median_age,median_home_age,median_home_value,median_income,zip
33,40016,35.6,1992,332000,96284,80920
89,28039,43.6,1988,373500,95320,80919
311,37608,39.1,1984,357800,68701,80906
312,24087,41.5,2004,444900,124085,80921
389,21286,43.9,1996,474200,129009,80132


### C. Create Function That Compares Differences

In [482]:
def create_df(target_zip):
    """
    This function finds the difference from target_zip 
    demographics to all other zip demographics in df.
    """
    arr = [] 
    for index, row in df.iterrows(): 
        for column in df.columns:   
            if column != 'zip':
                target_zip_data = df[df['zip'] == target_zip]['{}'.format(column)].iat[0]
                other_zip_data = row['{}'.format(column)]

                case = {"selected_zip": int(target_zip), "comparing_zip": int(row['zip']), "category": column, "selected_zip_data": target_zip_data, 
                                "comparing_zip_data": other_zip_data, "difference": abs(target_zip_data-other_zip_data)}
                arr.append(case)
    created_df = pd.DataFrame(arr)
    return created_df

In [484]:
df_80919 = create_df(80919)
df_80919.head()

Unnamed: 0,selected_zip,comparing_zip,category,selected_zip_data,comparing_zip_data,difference
0,80919,80434,population,28039.0,42.0,27997.0
1,80919,80434,median_age,43.6,59.0,15.4
2,80919,80434,median_home_age,1988.0,0.0,1988.0
3,80919,80434,median_home_value,373500.0,-666666666.0,667040166.0
4,80919,80434,median_income,95320.0,27000.0,68320.0


In [488]:
df_80919[df_80919['comparing_zip'] != 80919].sort_values('difference', ascending=True)

Unnamed: 0,selected_zip,comparing_zip,category,selected_zip_data,comparing_zip_data,difference
1922,80919,80020,median_home_age,1988.0,1988.0,0.0
367,80919,81504,median_home_age,1988.0,1988.0,0.0
1792,80919,80498,median_home_age,1988.0,1988.0,0.0
682,80919,81244,median_home_age,1988.0,1988.0,0.0
1427,80919,81630,median_home_age,1988.0,1988.0,0.0
202,80919,80424,median_home_age,1988.0,1988.0,0.0
1172,80919,81327,median_home_age,1988.0,1988.0,0.0
2007,80919,80467,median_home_age,1988.0,1988.0,0.0
342,80919,81328,median_home_age,1988.0,1988.0,0.0
1342,80919,81210,median_home_age,1988.0,1988.0,0.0
