In [114]:
import pandas as pd
import os
import csv

In [115]:
# NOTE: THIS PATH READS TO THE LOCATION OF THE LARGE FILE USB DRIVE ON A MAC,
# MAY NEED TO BE REDIRECTED TO THE APPROPRIATE LOCATION ON THE USER'S MACHINE
absolute_filepath = '/Volumes/Lexar/1990 data/'

In [116]:
# Context columns to drop
drop_cols = ['ANRCA', 'RES_ONLYA', 'TRUSTA', 'AIANHHA', 'RES_TRSTA', 'CDA', 'CTY_SUBA', 
             'DIVISIONA', 'PLACEA', 'PMSAA', 'REGIONA', 'URBRURALA', 'URB_AREAA', 
             'CD103A', 'ANPSADPI', 'COUNTYA', 'STATEA']

In [117]:
# Housing columns to drop
housing_drop_cols = ['ETQ001','ETQ002','ETQ003','ETQ004','ETQ005','ETQ006', 
                     'ESR001','ESR002','ESR003', 'ESR004','ESR005','ESR006',
                     'ESR007', 'ESR008','ESR009','ESR010','ESR011', 'ESR012',
                     'ESR013','ESR014','ESR015','ESR016','ESR017', 'ESR018', 
                     'ESR019','ESR020', 'ES4001', 'ES4002','ES4003','ES4004',
                     'ES4005','ES4006','ES4007','ES4008','ES4009','ES4010',
                     'ES4011', 'ES4012','ES4013','ES4014','ES4015','ES4016',
                     'ES4017','ESW001','ESW002','ESW003','ESW004','ESW005', 
                     'ETZ001', 'ETZ002','ETZ003','ETZ004','ETZ005']

In [118]:
# Map coded columns to values
race_col_dict = {'EUY001': 'White', 'EUY002': 'Black', 'EUY003': 'American Indian, Eskimo, or Aleut',
                 'EUY004': 'Asian or Pacific Islander', 'EUY005': 'Other race', 'EST001':'Median Value'}

In [119]:
# Map coded columns to values
housing_col_dict = {'ESA001': 'Total Units', 'ESN001': 'Total Occupied Units', 'ESN002': 'Vacant', 
                    'ES1001': 'Owner occupied','ES1002': 'Renter occupied', 'EST001': 'Median'}

In [120]:
# Mapping for dropped columns, likely unneeded
unused_col_dict = {'ETQ001': 'For rent', 'ETQ002': 'For sale only',
                    'ETQ003': 'Rented or sold, not occupied', 'ETQ004': 'For seasonal, recreational, or occasional use',
                    'ETQ005': 'For migrant workers', 'ETQ006': 'Other vacant', 'ETZ001': 'White',
                    'ETZ002': 'Black', 'ETZ003': 'American Indian, Eskimo, or Aleut', 
                    'ETZ004': 'Asian or Pacific Islander', 'ETZ005': 'Other race','ESR001': 'Less than $15,000','ESR002':'$15,000 to $19,999',
                    'ESR003':'$20,000 to $24,999', 'ESR004': '$25,000 to $29,999','ESR005': '$30,000 to $34,999',
                    'ESR006': '$35,000 to $39,999','ESR007': '$40,000 to $44,999', 'ESR008': '$45,000 to $49,999',
                    'ESR009': '$50,000 to $59,999','ESR010':'$60,000 to $74,999','ESR011':'$75,000 to $99,999',
                    'ESR012':'$100,000 to $124,999','ESR013': '$125,000 to $149,999','ESR014':'$150,000 to $174,999',
                    'ESR015':'$175,000 to $199,999','ESR016':'$200,000 to $249,999','ESR017':'$250,000 to $299,999',
                    'ESR018':'$300,000 to $399,999', 'ESR019':'$400,000 to $499,999','ESR020':'$500,000 or more'}

In [121]:
# Chicago's MSA code
chicago_msa_code = 1602

In [122]:
# Load housing data into df
if os.path.exists(absolute_filepath + 'nhgis0004_ds120_1990_block.csv'):
    housing_df = pd.read_csv(absolute_filepath + 'nhgis0004_ds120_1990_block.csv', 
                             index_col = 0).drop(drop_cols, axis = 1)
else:
    raise IOError('This file does not exists in this location')

In [123]:
# Load race data into df
if os.path.exists(absolute_filepath + 'nhgis0005_ds120_1990_block.csv'):
    race_df = pd.read_csv(absolute_filepath + 'nhgis0005_ds120_1990_block.csv', 
                          index_col = 0).drop(drop_cols, axis = 1)
else:
    raise IOError('This file does not exists in this location')

In [124]:
# Race dataframe data manipulations
race_df.rename(columns = race_col_dict, inplace = True)
race_df['Other races'] = race_df[
    ['American Indian, Eskimo, or Aleut', 'Asian or Pacific Islander', 'Other race']].sum(axis = 1)
race_df.drop(['American Indian, Eskimo, or Aleut', 'Asian or Pacific Islander', 'Other race'], 
             axis = 1, inplace = True)
race_df['Total Pop'] = race_df[['White', 'Black', 'Other races']].sum(axis = 1)
chi_race_df = race_df[race_df['MSA_CMSAA'] == chicago_msa_code]

In [125]:
# Calculate %s from values
Pct_white = chi_race_df.loc[:,('White')]/chi_race_df.loc[:,('Total Pop')]
Pct_black = chi_race_df.loc[:,('Black')]/chi_race_df.loc[:,('Total Pop')]
Pct_other = chi_race_df.loc[:,('Other races')]/chi_race_df.loc[:,('Total Pop')]
chi_race_df = chi_race_df.assign(Pct_White = Pct_white)
chi_race_df = chi_race_df.assign(Pct_Black = Pct_black)
chi_race_df = chi_race_df.assign(Pct_Other = Pct_other)
chi_race_df = chi_race_df.drop(['White', 'Black', 'Other races'], axis = 1)

In [126]:
# Filter on Chicago MSA and drop unneeded columns, rename columns
chi_housing = housing_df[housing_df['MSA_CMSAA'] == chicago_msa_code].drop(housing_drop_cols, axis = 1)
chi_housing.rename(columns = housing_col_dict, inplace = True)

In [127]:
# Calculate %s from total values
# Does renter/owner need to be over occupied units not total units?
chi_housing['% Occupied'] = chi_housing['Total Occupied Units']/chi_housing['Total Units']
chi_housing['% Vacant'] = chi_housing['Vacant']/chi_housing['Total Units']
chi_housing['% Owner Occupied'] = chi_housing['Owner occupied']/chi_housing['Total Occupied Units']
chi_housing['% Renter'] = chi_housing['Renter occupied']/chi_housing['Total Occupied Units']
chi_housing = chi_housing.drop(['Vacant', 'Owner occupied', 'Renter occupied'], axis = 1)

In [128]:
# Merge into one dataframe
full_df = chi_race_df.merge(chi_housing.drop(
    ['YEAR', 'BLOCKA', 'BLCK_GRPA', 'TRACTA', 'C_CITYA', 'COUNTY', 'MSA_CMSAA','STATE', 'Total Occupied Units'], 
    axis = 1), 'inner', left_index = True, right_index = True)

In [129]:
# Read out to CSV
full_df.to_csv("../1990_census_data.csv", sep = "|")