In [119]:
import requests
import csv
import pandas as pd
import os
import re

# Base URL for the ACS data
base_url = "https://api.census.gov/data/{year}/acs/acs5"
subject_url = "https://api.census.gov/data/{year}/acs/acs5/subject"
profile_url = "https://api.census.gov/data/{year}/acs/acs5/profile"

# Places to change
# (1) variables (2) relative path

# Variables to fetch

# Economics #

# Median Income
#variables = {'main_median_income': "NAME,S1903_C03_001E", 'sub_median_white': "NAME,S1903_C03_002E",
             #'sub_median_black': "NAME,S1903_C03_003E", 'sub_median_ind_ala': "NAME,S1903_C03_004E",
             #'sub_median_asia': "NAME,S1903_C03_005E", 'sub_median_hawai': "NAME,S1903_C03_006E",
             #'sub_median_other': "NAME,S1903_C03_007E"  }

# Mean Income
#variables = {'main_mean_income': "NAME,S1902_C03_019E", 'sub_mean_white': "NAME,S1902_C03_020E",
             #'sub_mean_black': "NAME,S1902_C03_021E", 'sub_mean_ind_ala': "NAME,S1902_C03_022E",
             #'sub_mean_asia': "NAME,S1902_C03_023E", 'sub_mean_hawai': "NAME,S1902_C03_024E",
             #'sub_mean_other': "NAME,S1902_C03_025E"}

# Housing #

# HouseRent
# variables = {'main_agg_rent': "NAME,B25060_001E", 'sub_median_rent': "NAME,B25058_001E", 
             #'sub_lower_rent': "NAME,B25057_001E", 'sub_upper_rent': "NAME,B25059_001E"}
# HouseholdType
#variables = {'main_household_total': "NAME,B11001_001E", 'sub_household_family': "NAME,B11001_002EA", 
             #'sub_household_nonfamily': "NAME,B11001_007E"}


# Education
# median earning
#variables = {'main_median_earning': "NAME,S1501_C01_059E", 'sub_less_high': "NAME,S1501_C01_060E",
             #'sub_high': "NAME,S1501_C01_061E", 'sub_college': "NAME,S1501_C01_062E",
             #'sub_bachelor': "NAME,S1501_C01_063E", 'sub_grad': "NAME,S1501_C01_064E"}

# enrollment
#variables = {'main_enroll': "NAME,S1401_C01_001E", 'sub_nursery': "NAME,S1401_C01_002E",
             #'sub_kind_12': "NAME,S1401_C01_003E", 'sub_college': "NAME,S1401_C01_008E",
             #'sub_grad': "NAME,S1401_C01_009E"}

# Health #
# Disability
#variables = {'main_disability': "NAME,S1810_C02_001E", 'sub_hearing': "NAME,S1810_C02_019E",
             #'sub_vision': "NAME,S1810_C02_029E", 'sub_cognitive': "NAME,S1810_C02_039E",
             #'sub_ambulatory': "NAME,S1810_C02_047E", 'sub_self_care': "NAME,S1810_C02_055E",
             #'sub_ind_living': "NAME,S1810_C02_063E"}

# Insurance
#variables = {'main_population': "NAME,S2701_C01_001E", 'sub_insured': "NAME,S2701_C02_001E",
             #'sub_uninsured': "NAME,S2701_C04_001E"}

# Population #
# Races
#variables = {'main_population': "NAME,S2701_C01_001E", 'sub_insured': "NAME,S2701_C02_001E",
             #'sub_uninsured': "NAME,S2701_C04_001E"}

#variables = {'main_population': "NAME,DP05_0033E", 'sub_pop_white': "NAME,DP05_0037E",
             #'sub_pop_black': "NAME,DP05_0038E", 'sub_pop_ind_ala': "NAME,DP05_0039E",
             #'sub_pop_asia': "NAME,DP05_0044E", 'sub_pop_hawai': "NAME,DP05_0052E",
             #'sub_pop_other': "NAME,DP05_0057E", 'sub_pop_two': "NAME,DP05_0058E"}

variables = {'main_median_age': "NAME,B01002_001E", 'sub_male_age': "NAME,B01002_002E",
             'sub_female_age': "NAME,B01002_003E"}

# Location filters
location = "for=tract:*&in=state:17+county:031"

In [120]:
# Helper Function-1

def extract_info_from_filename(filename, ind_type):
    # Assuming filename is something like 'main_2017.csv'
    parts = filename.split('_')
    if len(parts) > 1:
        year_part = parts[-1]  # This would be '2017.csv'
        year = year_part.split('.')[0]  # This splits '2017.csv' into '2017' and 'csv' and takes the first part
        if ind_type == 'main':
            indicator = filename[5:-9]
        else:
            indicator = filename[4:-9]
        if year.isdigit():  # Check if 'year' is all digits
            return (indicator, int(year))
    return None, None

all_dataframes = []

In [121]:
for name, variable in variables.items():    

    ind_type = name.split('_')[0]

    for year in range(2017, 2023):
        # Construct the URL for the current year
        if variable[5:][0] == 'B':    
            url = f"{base_url.format(year=year)}?get={variable}&{location}"
        elif variable[5:][0] == 'S':
            url = f"{subject_url.format(year=year)}?get={variable}&{location}"
        else:
            url = f"{profile_url.format(year=year)}?get={variable}&{location}"
            
        # Make the request
        response = requests.get(url)

        data = response.json()

        current_dir = os.getcwd()

        # relative path specifically for Contract Rent indicator
        relative_path = os.path.join('data_downloaded', 'Population', 'MedianAge')

        # Combine the current directory with the relative path
        full_base_path = os.path.join(current_dir, relative_path)

        # Specify the path to save the CSV file, one for each year
        file_path = os.path.join(full_base_path, f'{ind_type}/{name}_{year}.csv')
        
        # Open a CSV file for writing for each year
        with open(file_path, mode='w', newline='') as file:
            writer = csv.writer(file)

            # Write the data into the CSV file
            writer.writerows(data)

        print(f"Data for {year} for {name} has been written to {file_path}")

Data for 2017 for main_median_age has been written to /home/yujie0706/DataForGood-chicago/dfg_chi/backend/data_downloaded/Population/MedianAge/main/main_median_age_2017.csv
Data for 2018 for main_median_age has been written to /home/yujie0706/DataForGood-chicago/dfg_chi/backend/data_downloaded/Population/MedianAge/main/main_median_age_2018.csv
Data for 2019 for main_median_age has been written to /home/yujie0706/DataForGood-chicago/dfg_chi/backend/data_downloaded/Population/MedianAge/main/main_median_age_2019.csv
Data for 2020 for main_median_age has been written to /home/yujie0706/DataForGood-chicago/dfg_chi/backend/data_downloaded/Population/MedianAge/main/main_median_age_2020.csv
Data for 2021 for main_median_age has been written to /home/yujie0706/DataForGood-chicago/dfg_chi/backend/data_downloaded/Population/MedianAge/main/main_median_age_2021.csv
Data for 2022 for main_median_age has been written to /home/yujie0706/DataForGood-chicago/dfg_chi/backend/data_downloaded/Population/Me

In [122]:
for ind_type in ['main', 'sub']:
    
    short_dir = os.path.join(full_base_path, f'{ind_type}')

    if ind_type == 'main':    

        id_counter = 1

        for filename in os.listdir(short_dir):

            if filename.endswith('.csv'):

                filepath = os.path.join(short_dir, filename)
            
                indicator_name, year = extract_info_from_filename(filename, ind_type)

                if indicator_name and year:
                    df = pd.read_csv(filepath)
                    # Create a new DataFrame with the required columns
                    new_df = pd.DataFrame({
                        'id': range(id_counter, id_counter + len(df)),
                        'indicator_id': [int(2)] * len(df),
                        'census_tract_id': df['tract'].astype(int),
                        'indicator_name': indicator_name,
                        'year': int(year),
                        'value': df.iloc[:, 1].fillna(0).astype(int)
                    })
                    
                    all_dataframes.append(new_df)

        # Concatenate all DataFrames
        final_dataframe = pd.concat(all_dataframes)

        # Save to a new CSV file
        final_dataframe.to_csv(os.path.join(short_dir, 'Main_Agg.csv'), index = False)
        print('aggregate table saved')

    else:

        all_sub_dataframes = []  # List to store each file's DataFrame
        id_counter = 1  # Initialize counter for the id column

        for filename in os.listdir(short_dir):
            if filename.startswith('sub') and filename.endswith('.csv'):
                filepath = os.path.join(short_dir, filename)
                sub_indicator_name, year = extract_info_from_filename(filename, ind_type)
        
                if sub_indicator_name and year:
                    df = pd.read_csv(filepath)
                    df['tract'] = pd.to_numeric(df['tract'], errors='coerce').fillna(0).astype(int)
                    df.iloc[:, 1] = pd.to_numeric(df.iloc[:, 1], errors='coerce').fillna(0).astype(int)
            
            # Create a new DataFrame with the required columns
                    new_sub_df = pd.DataFrame({
                        'id': range(id_counter, id_counter + len(df)),
                        'indicator_id': [int(2)] * len(df),
                        'census_tract_id': df['tract'].astype(int),
                        'sub_group_indicator_name': sub_indicator_name,
                        'year': int(year),
                        'value': df.iloc[:, 1].fillna(0).astype(int)
            })
                    id_counter += len(df)  # Increment the id counter
                    
                    # Append this new DataFrame to the list
                    all_sub_dataframes.append(new_sub_df)

# Concatenate all DataFrames for 'sub' into one
final_sub_dataframe = pd.concat(all_sub_dataframes, ignore_index=True)

# Save to a new CSV file
final_sub_dataframe.to_csv(os.path.join(short_dir, 'Sub_Agg.csv'), index=False)

print('sub_aggregate table saved')

aggregate table saved
sub_aggregate table saved


  df.iloc[:, 1] = pd.to_numeric(df.iloc[:, 1], errors='coerce').fillna(0).astype(int)
  df.iloc[:, 1] = pd.to_numeric(df.iloc[:, 1], errors='coerce').fillna(0).astype(int)
  df.iloc[:, 1] = pd.to_numeric(df.iloc[:, 1], errors='coerce').fillna(0).astype(int)
  df.iloc[:, 1] = pd.to_numeric(df.iloc[:, 1], errors='coerce').fillna(0).astype(int)
  df.iloc[:, 1] = pd.to_numeric(df.iloc[:, 1], errors='coerce').fillna(0).astype(int)
  df.iloc[:, 1] = pd.to_numeric(df.iloc[:, 1], errors='coerce').fillna(0).astype(int)
  df.iloc[:, 1] = pd.to_numeric(df.iloc[:, 1], errors='coerce').fillna(0).astype(int)
  df.iloc[:, 1] = pd.to_numeric(df.iloc[:, 1], errors='coerce').fillna(0).astype(int)
  df.iloc[:, 1] = pd.to_numeric(df.iloc[:, 1], errors='coerce').fillna(0).astype(int)
  df.iloc[:, 1] = pd.to_numeric(df.iloc[:, 1], errors='coerce').fillna(0).astype(int)
  df.iloc[:, 1] = pd.to_numeric(df.iloc[:, 1], errors='coerce').fillna(0).astype(int)
  df.iloc[:, 1] = pd.to_numeric(df.iloc[:, 1], errors=