In [1]:
import glob
import os

import pandas as pd

[Source](https://data.census.gov/cedsci/table?q=DP05%3A%20ACS%20DEMOGRAPHIC%20AND%20HOUSING%20ESTIMATES&g=0500000US06001,06037,06067,06073,41051,53033_1600000US0644000,0653000,0664000,0666000,4159000,5363000&tid=ACSDP5Y2010.DP05&moe=false)

In [2]:
files = glob.glob('../01_inputs/USCB/DP05/*.csv')

In [3]:
def reshape(df):
    reshaped_df = df.melt(id_vars=['census'])
    reshaped_df['geography'] = reshaped_df['variable'].str.split(
        '!!', n=1, expand=True)[0]
    reshaped_df[['geography', 'state']] = reshaped_df['geography'].str.split(
        ', ', 1, expand=True)
    reshaped_df['measure'] = reshaped_df['variable'].str.split(
        '!!', n=1, expand=True)[1]
    reshaped_df.drop(labels=['variable'], axis=1, inplace=True)
    index_cols = ['geography', 'state', 'census']
    final_df = reshaped_df.pivot(
        index=index_cols, columns='measure', values='value').reset_index()
    final_df.rename_axis(None, axis=1, inplace=True)
    return final_df

In [4]:
def read_dp05(file):
    year = os.path.basename(file)[7:11]
    df = pd.read_csv(file)
    df.rename(columns={'Label (Grouping)': 'census'}, inplace=True)
    if year == '2020':
        ethnicity_df = pd.concat([df.iloc[74:75], df.iloc[79:80]])
        race_df = pd.concat([df.iloc[65:72], df.iloc[80:81]])
    else:
        ethnicity_df = pd.concat([df.iloc[69:70], df.iloc[74:75]])
        race_df = pd.concat([df.iloc[60:67], df.iloc[75:76]])        
    reshaped_ethnicity_df = reshape(ethnicity_df)
    reshaped_race_df = reshape(race_df)
    merged_df = pd.merge(reshaped_race_df, reshaped_ethnicity_df, how='outer', on=[
                         'geography', 'state'], suffixes=(' race ACS5Y', ' ethnicity ACS5Y'))
    merged_df['census year'] = year
    return merged_df

In [5]:
dp05_df = pd.concat([read_dp05(file) for file in files])

In [6]:
dp05_df[dp05_df['geography'].str.contains('County')].to_csv('../04_outputs/USCB-ACS5Y-DP05-County.csv')

In [7]:
dp05_df[~dp05_df['geography'].str.contains('County')].to_csv(
    '../04_outputs/USCB-ACS5Y-DP05-City.csv', index=False)

In [8]:
dp05_df.to_csv('../04_outputs/USCB-ACS5Y-DP05.csv', index=False)