In [21]:
import requests
import csv
import pandas as pd
import os
import re

# Base URL for the ACS data
base_url = "https://api.census.gov/data/{year}/acs/acs5"

# Variables to fetch

#Houserent
variables = {'main_agg_rent': "NAME,B25060_001E", 'sub_median_rent': "NAME,B25058_001E", 
             'sub_lower_rent': "NAME,B25057_001E", 'sub_upper_rent': "NAME,B25059_001E"}

# Location filters
location = "for=tract:*&in=state:17+county:031"

In [42]:
def extract_info_from_filename(filename, ind_type):
    # Assuming filename is something like 'main_2017.csv'
    parts = filename.split('_')
    if len(parts) > 1:
        year_part = parts[-1]  # This would be '2017.csv'
        year = year_part.split('.')[0]  # This splits '2017.csv' into '2017' and 'csv' and takes the first part
        if ind_type == 'main':
            indicator = filename[5:-9]
        else:
            indicator = filename[4:-9]
        if year.isdigit():  # Check if 'year' is all digits
            return (indicator, int(year))
    return None, None


all_dataframes = []

In [38]:
name = 'main_agg_rent_2015'
print(extract_info_from_filename(name, 'main'))

('agg_rent', 2015)


In [43]:
# Process each year from 2015 to 2019
for name, variable in variables.items():    

    ind_type = name.split('_')[0]

    for year in range(2015, 2023):
        # Construct the URL for the current year
        url = f"{base_url.format(year=year)}?get={variable}&{location}"

        # Make the request
        response = requests.get(url)
        data = response.json()

        # Specify the path to save the CSV file, one for each year
        file_path = f'/home/yujie0706/DataForGood-chicago/dfg_chi/backend/data_downloaded/Housing/ContractRent/{ind_type}/{name}_{year}.csv'
        
        # Open a CSV file for writing for each year
        with open(file_path, mode='w', newline='') as file:
            writer = csv.writer(file)

            # Write the data into the CSV file
            writer.writerows(data)

        print(f"Data for {year} for {name} has been written to {file_path}")

Data for 2015 for main_agg_rent has been written to /home/yujie0706/DataForGood-chicago/dfg_chi/backend/data_downloaded/Housing/ContractRent/main/main_agg_rent_2015.csv
Data for 2016 for main_agg_rent has been written to /home/yujie0706/DataForGood-chicago/dfg_chi/backend/data_downloaded/Housing/ContractRent/main/main_agg_rent_2016.csv
Data for 2017 for main_agg_rent has been written to /home/yujie0706/DataForGood-chicago/dfg_chi/backend/data_downloaded/Housing/ContractRent/main/main_agg_rent_2017.csv
Data for 2018 for main_agg_rent has been written to /home/yujie0706/DataForGood-chicago/dfg_chi/backend/data_downloaded/Housing/ContractRent/main/main_agg_rent_2018.csv
Data for 2019 for main_agg_rent has been written to /home/yujie0706/DataForGood-chicago/dfg_chi/backend/data_downloaded/Housing/ContractRent/main/main_agg_rent_2019.csv
Data for 2020 for main_agg_rent has been written to /home/yujie0706/DataForGood-chicago/dfg_chi/backend/data_downloaded/Housing/ContractRent/main/main_agg_

In [45]:
for ind_type in ['main', 'sub']:
    
    short_dir = short_dir = f'/home/yujie0706/DataForGood-chicago/dfg_chi/backend/data_downloaded/Housing/ContractRent/{ind_type}'

    if ind_type == 'main':    
        for filename in os.listdir(short_dir):
            if filename.endswith('.csv'):
                filepath = os.path.join(short_dir, filename)
            
                indicator_name, year = extract_info_from_filename(filename, ind_type)

                if indicator_name and year:
                    df = pd.read_csv(filepath)
                    # Create a new DataFrame with the required columns
                    new_df = pd.DataFrame({
                        'census_tract_id': df['tract'].astype(int),
                        'indicator_name': indicator_name,
                        'value': df.iloc[:, 1].fillna(0).astype(int),  # Assuming value is in the second column
                        'year': int(year)
                    })
                    all_dataframes.append(new_df)

        # Concatenate all DataFrames
        final_dataframe = pd.concat(all_dataframes)

        # Save to a new CSV file
        final_dataframe.to_csv('/home/yujie0706/DataForGood-chicago/dfg_chi/backend/data_downloaded/Housing/ContractRent/Agg_HouseRent.csv', index=False)
        print('aggregate table saved')

    else:
        all_sub_dataframes = []  # List to store each file's DataFrame
        id_counter = 1  # Initialize counter for the id column

        for filename in os.listdir(short_dir):
            if filename.startswith('sub') and filename.endswith('.csv'):
                filepath = os.path.join(short_dir, filename)
                sub_indicator_name, year = extract_info_from_filename(filename, ind_type)
        
                if sub_indicator_name and year:
                    df = pd.read_csv(filepath)
                    df['tract'] = pd.to_numeric(df['tract'], errors='coerce').fillna(0).astype(int)
                    df.iloc[:, 1] = pd.to_numeric(df.iloc[:, 1], errors='coerce').fillna(0).astype(int)
            
            # Create a new DataFrame with the required columns
                    new_sub_df = pd.DataFrame({
                        'id': range(id_counter, id_counter + len(df)),
                        'census_tract_id': df['tract'].astype(int),
                        'sub_group_indicator_name': sub_indicator_name,
                        'year': int(year),
                        'value': df.iloc[:, 1].fillna(0).astype(int)
            })
                    id_counter += len(df)  # Increment the id counter
                    
                    # Append this new DataFrame to the list
                    all_sub_dataframes.append(new_sub_df)

# Concatenate all DataFrames for 'sub' into one
final_sub_dataframe = pd.concat(all_sub_dataframes, ignore_index=True)

# Save to a new CSV file
final_sub_dataframe.to_csv('/home/yujie0706/DataForGood-chicago/dfg_chi/backend/data_downloaded/Housing/ContractRent/Sub_Agg.csv', index=False)

print('sub_aggregate table saved')

        


    

aggregate table saved
sub_aggregate table saved


  df.iloc[:, 1] = pd.to_numeric(df.iloc[:, 1], errors='coerce').fillna(0).astype(int)
  df.iloc[:, 1] = pd.to_numeric(df.iloc[:, 1], errors='coerce').fillna(0).astype(int)
  df.iloc[:, 1] = pd.to_numeric(df.iloc[:, 1], errors='coerce').fillna(0).astype(int)
