In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

# default cleaning method until proven otherwise
def clean_census_frame(csv_path , head=False , reset=True , set_index=False ):
    '''
    inputs) 
        >> csv_path
            > path to csv
        >> head
            > default=False
                >> if != False
                    > integer
                        >> returns the first {head} rows (using .head() method) 
                            > instead of enitre dataframe
        >> reset
            > default=True
                >> resets index after taking out rows
            > if set to False
                >> will not reset index
        >> set_index
            > default=False
            > if != False
                >> will set_index of new df to set_index
    output)
        >> dataframe cleaned like 2000 Census age&sex by 5-digit Zip Code (also how 2010 for same is cleaned)
    how)
        1. reads in csv , assumes it's large
        2. makes a copy for editing 
            > and potential future use
        3. locates readable column names  and non-readable names 
            > readable
                    > e.g. Estimate; SEX AND AGE - Total population
                >> assumes they are currently in row 0
            > non-readable
                    > e.g. HC01_VC03
                >> assumes they are currently == dataframe.columns
        4. replaces dataframe.columns (non-readable) with readable column names
            > and drops the old 0th column (column where readable names were stored)
        
    '''
    # load data
    df = pd.read_csv( csv_path , low_memory=False )

    # and copy
    _df = df.copy()

    # reset column names to current 0th row values
    _df.columns = _df.iloc[0]
    # new 2000 dataframe without row where values are from
    clean_df = _df[1:]
    
    # default
    if reset==True:
        # reset index
        clean_df = clean_df.reset_index()
        
    # set_index
    if set_index:
        clean_df = clean_df.set_index(set_index)
    
    if head:
        # return first {head} rows of dataframe
        return clean_df.head(head)
    else:
        # return dataframe
        return clean_df
    

'''load data'''
# 2011 
y2k11 = clean_census_frame('../data/acs/aff_download/ACS_11_5YR_DP05_with_ann.csv',reset=False)
# 2012
y2k12 = clean_census_frame('../data/acs/aff_download/ACS_12_5YR_DP05_with_ann.csv',reset=False)
#2013
y2k13 = clean_census_frame('../data/acs/aff_download/ACS_13_5YR_DP05_with_ann.csv',reset=False)
# 2014
y2k14 = clean_census_frame('../data/acs/aff_download/ACS_14_5YR_DP05_with_ann.csv',reset=False)
# 2015
y2k15 = clean_census_frame('../data/acs/aff_download/ACS_15_5YR_DP05_with_ann.csv',reset=False)
#2016
y2k16 = clean_census_frame('../data/acs/aff_download/ACS_16_5YR_DP05_with_ann.csv',reset=False)
#2017
y2k17 = clean_census_frame('../data/acs/aff_download/ACS_17_5YR_DP05_with_ann.csv',reset=False)

'''copy data for editing and extracting info'''
# 2011
y11 = y2k11.copy()
# 2012
y12 = y2k12.copy()
# 2013
y13 = y2k13.copy()
# 2014
y14 = y2k14.copy()
# 2015
y15 = y2k15.copy()
# 2016
y16 = y2k16.copy()
# 2017
y17 = y2k17.copy()

'''identify columns'''
# 2011
tags11 = y11.columns  
# 2012
tags12 = y12.columns  
#2013
tags13 = y13.columns  
# 2014
tags14 = y14.columns  
# 2015
tags15 = y15.columns  
#2016
tags16 = y16.columns  
# 2017
tags17 = y17.columns 

'''identify common columns'''
# collection of columns appearing in all 7 dataframes 2011-2017
common_tags = [tag for tag in tags17 if tag in tags11 & tags12 & tags13 & tags14 & tags15 & tags16]

'''identify non common columns for specific frames'''
# 2011
uncommon_11 = [tag for tag in y11.columns if tag not in common_tags]
# 2012
uncommon_12 = [tag for tag in y12.columns if tag not in common_tags]
# 2013
uncommon_13 = [tag for tag in y13.columns if tag not in common_tags]
# 2014
uncommon_14 = [tag for tag in y14.columns if tag not in common_tags]
# 2015
uncommon_15 = [tag for tag in y15.columns if tag not in common_tags]
# 2016
uncommon_16 = [tag for tag in y16.columns if tag not in common_tags]
# 2017
uncommon_17 = [tag for tag in y17.columns if tag not in common_tags]

"""drop each frame's uncommon columns, reset index"""
# 2011
a_t11 = y11.drop(uncommon_11,axis=1).reset_index()
# # 2012
a_t12 = y12.drop(uncommon_12,axis=1).reset_index()
# # 2013
a_t13 = y13.drop(uncommon_13,axis=1).reset_index()
# # 2014
a_t14 = y14.drop(uncommon_14,axis=1).reset_index()
# # 2015
a_t15 = y15.drop(uncommon_15,axis=1).reset_index()
# # 2016
a_t16 = y16.drop(uncommon_16,axis=1).reset_index()
# # 2017
a_t17 = y17.drop(uncommon_17,axis=1).reset_index()