In [2]:
import os 
import csv
import requests
import json
import pandas as pd
import geopandas as gpd
from census import Census
from us import states
import matplotlib.pyplot as plt

In [None]:
# Base URL for the ACS data
base_url = "https://api.census.gov/data/{year}/acs/acs5"
subject_url = "https://api.census.gov/data/{year}/acs/acs5/subject"
profile_url = "https://api.census.gov/data/{year}/acs/acs5/profile"

# Location filters
location = "for=tract:*&in=state:17+county:031"

In [10]:
import json

# Load the JSON data from file
with open('variables.json', 'r') as file:
    data = json.load(file)

main_agg_rent = data['variables']['Housing']['HouseRent']['main_agg_rent']
print("Main Aggregate Rent Variable:", main_agg_rent)


Main Aggregate Rent Variable: NAME,B25060_001E


In [27]:
# Helper Function-1

def extract_info_from_filename(filename, ind_type):
    # Assuming filename is something like 'main_2017.csv'
    parts = filename.split('_')
    if len(parts) > 1:
        year_part = parts[-1]  # This would be '2017.csv'
        year = year_part.split('.')[0]  # This splits '2017.csv' into '2017' and 'csv' and takes the first part
        if ind_type == 'main':
            indicator = filename[5:-9]
        else:
            indicator = filename[4:-9]
        if year.isdigit():  # Check if 'year' is all digits
            return (indicator, int(year))
    return None, None

all_dataframes = []

In [30]:
for name, variable in variables.items():    

    ind_type = name.split('_')[0]

    for year in range(2015, 2020):
        # Construct the URL for the current year
        if variable[5:][0] == 'B':    
            url = f"{base_url.format(year=year)}?get={variable}&{location}"
        elif variable[5:][0] == 'S':
            url = f"{subject_url.format(year=year)}?get={variable}&{location}"
            print(url)
        else:
            url = f"{profile_url.format(year=year)}?get={variable}&{location}"
            
        # Make the request
        response = requests.get(url)

        data = response.json()

        current_dir = os.getcwd()

        # relative path specifically for Contract Rent indicator
        relative_path = os.path.join('data_downloaded', 'Economics', 'MedianIncome')

        # Combine the current directory with the relative path
        full_base_path = os.path.join(current_dir, relative_path)

        # Specify the path to save the CSV file, one for each year
        file_path = os.path.join(full_base_path, f'{ind_type}/{name}_{year}.csv')
        
        # Open a CSV file for writing for each year
        with open(file_path, mode='w', newline='') as file:
            writer = csv.writer(file)

            # Write the data into the CSV file
            writer.writerows(data)

        print(f"Data for {year} for {name} has been written to {file_path}")

https://api.census.gov/data/2015/acs/acs1/subject?get=NAME,S1903_C03_001E&for=tract:*&in=state:17+county:031


JSONDecodeError: [Errno Expecting value] error: unknown/unsupported geography heirarchy: 0

In [29]:
for ind_type in ['main', 'sub']:
    
    short_dir = os.path.join(full_base_path, f'{ind_type}')

    if ind_type == 'main':    

        id_counter = 1

        for filename in os.listdir(short_dir):

            if filename.endswith('.csv'):

                filepath = os.path.join(short_dir, filename)
            
                indicator_name, year = extract_info_from_filename(filename, ind_type)

                if indicator_name and year:
                    df = pd.read_csv(filepath)
                    # Create a new DataFrame with the required columns
                    new_df = pd.DataFrame({
                        'id': range(id_counter, id_counter + len(df)),
                        'indicator_id': [int(2)] * len(df),
                        'census_tract_id': df['tract'].astype(int),
                        'indicator_name': indicator_name,
                        'year': int(year),
                        'value': df.iloc[:, 1].fillna(0).astype(int)
                    })
                    
                    all_dataframes.append(new_df)

        # Concatenate all DataFrames
        final_dataframe = pd.concat(all_dataframes)

        # Save to a new CSV file
        final_dataframe.to_csv(os.path.join(short_dir, 'Main_Agg.csv'), index = False)
        print('aggregate table saved')

    else:

        all_sub_dataframes = []  # List to store each file's DataFrame
        id_counter = 1  # Initialize counter for the id column

        for filename in os.listdir(short_dir):
            if filename.startswith('sub') and filename.endswith('.csv'):
                filepath = os.path.join(short_dir, filename)
                sub_indicator_name, year = extract_info_from_filename(filename, ind_type)
        
                if sub_indicator_name and year:
                    df = pd.read_csv(filepath)
                    df['tract'] = pd.to_numeric(df['tract'], errors='coerce').fillna(0).astype(int)
                    df.iloc[:, 1] = pd.to_numeric(df.iloc[:, 1], errors='coerce').fillna(0).astype(int)
            
            # Create a new DataFrame with the required columns
                    new_sub_df = pd.DataFrame({
                        'id': range(id_counter, id_counter + len(df)),
                        'indicator_id': [int(2)] * len(df),
                        'census_tract_id': df['tract'].astype(int),
                        'sub_group_indicator_name': sub_indicator_name,
                        'year': int(year),
                        'value': df.iloc[:, 1].fillna(0).astype(int)
            })
                    id_counter += len(df)  # Increment the id counter
                    
                    # Append this new DataFrame to the list
                    all_sub_dataframes.append(new_sub_df)

# Concatenate all DataFrames for 'sub' into one
final_sub_dataframe = pd.concat(all_sub_dataframes, ignore_index=True)

# Save to a new CSV file
final_sub_dataframe.to_csv(os.path.join(short_dir, 'Sub_Agg.csv'), index=False)

print('sub_aggregate table saved')

aggregate table saved
sub_aggregate table saved
