# Ingest Census Data: Part 2

In [1]:
import os
import pandas as pd
import sqlite3

In [2]:
def importCensusData(directory, headers=None):
    """
    Import Census data and return a dataframe.
    """
    data = []

    with open(directory, 'r') as infile:
        lines = infile.readlines()

        for index, line in enumerate(lines):
            if index == 0:
                columns = line.replace('],', '').replace('[', '').replace('\"', '').replace(']]', '').replace('\n','').split(',')
            else:
                row = line.replace('],', '').replace('[', '').replace('\"', '').replace(']]', '').replace('\n','').split(',')
                data.append(row)

    if headers==None:
        column_names = column
    else:
        column_names = headers

    census_df = pd.DataFrame(data=data, columns=column_names)

    return census_df

In [3]:
def compileCensusData(directory, years, headers):
    """
    Compile all years of Census data, return dataframe.
    """
    census_df_list = []

    for index, year in enumerate(years):
        file_path = os.path.join(directory, years[int(index)] + ".txt")
        census_df = importCensusData(directory=file_path, headers=headers)
        census_df['Year_census'] = int(year)
        census_df_list.append(census_df)

    census_bg = pd.concat(census_df_list).reset_index(drop=True)

    return census_bg

In [4]:
#Set Years
bg_years = ['2009', '2013', '2014', '2015', '2016', '2017']

#Set header names for Block Group database table.
column_names = ['TotalPop','TotalPopMargin','UnWgtSampleCtPop','PerCapitaIncome',
                'PerCapitalIncomeMargin','MedianHouseholdInc','MedianHouseholdIncMargin','MedianAge',
                'MedianAgeMargin','HousingUnits','HousingUnitsMargin','UnweightedSampleHousingUnits',
                'State','County','Tract','BlockGroup']

In [5]:
folder_path = os.path.dirname('../data/census-data/BlockGroup/')

#Compile all years of raw Census data into one Pandas dataframe.
census_df = compileCensusData(directory=folder_path, years=bg_years, headers=column_names)