In [25]:
import pandas as pd
import requests

# Function to fetch data from the Census API
def fetch_census_data(url, params):
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        return pd.DataFrame(data[1:], columns=data[0])
    else:
        print(f"Error: {response.status_code} - {response.text}")
        return pd.DataFrame()

# Define endpoints
url_decennial = 'https://api.census.gov/data/2020/dec/pl'
url_acs = 'https://api.census.gov/data/2020/acs/acs5'

# Parameters for Decennial Census
params_decennial = {
    'get': 'NAME,P1_001N,P1_002N,P1_026N',
    'for': 'county:*',
    'in': 'state:*'
}

# Parameters for ACS (income data and median earnings)
params_acs = {
    'get': 'NAME,B19013_001E,B19013_001M,B20002_001E',
    'for': 'county:*',
    'in': 'state:*'
}

# Fetch data from both APIs
df_decennial = fetch_census_data(url_decennial, params_decennial)
df_acs = fetch_census_data(url_acs, params_acs)

# Debug: Print column names
print("Decennial Census Data Columns:", df_decennial.columns)
print("ACS Data Columns:", df_acs.columns)

if not df_decennial.empty and not df_acs.empty:
    # Rename columns based on actual data
    df_decennial.rename(columns={
        'NAME': 'County_State',
        'P1_001N': 'Total_Population',
        'P1_002N': 'Male_Population',
        'P1_026N': 'Female_Population',
        'state': 'State_Code',
        'county': 'County_Code'
    }, inplace=True)

    df_acs.rename(columns={
        'NAME': 'County_State',
        'B19013_001E': 'Median_Income',
        'B19013_001M': 'Margin_Error_Income',
        'B20002_001E': 'Median_Earnings',
        'state': 'State_Code',
        'county': 'County_Code'
    }, inplace=True)
# Convert numeric columns with validation
numeric_columns_decennial = ['Total_Population', 'Male_Population', 'Female_Population']
numeric_columns_acs = ['Median_Income', 'Margin_Error_Income', 'Median_Earnings']

for col in numeric_columns_decennial:
    if col in df_decennial.columns:
        df_decennial[col] = pd.to_numeric(df_decennial[col], errors='coerce')

for col in numeric_columns_acs:
    if col in df_acs.columns:
        df_acs[col] = pd.to_numeric(df_acs[col], errors='coerce')

# Merge datasets on common keys with validation
if all(key in df_decennial.columns for key in ['County_State', 'State_Code', 'County_Code']) and \
   all(key in df_acs.columns for key in ['County_State', 'State_Code', 'County_Code', 'Median_Income', 'Margin_Error_Income', 'Median_Earnings']):
    df = pd.merge(
        df_decennial,
        df_acs[['County_State', 'State_Code', 'County_Code', 'Median_Income', 'Margin_Error_Income', 'Median_Earnings']],
        on=['County_State', 'State_Code', 'County_Code'],
        how='inner'
    )

    # Calculate percentages for male and female population
    df['Percent_Male'] = (df['Male_Population'] / df['Total_Population'] * 100).round(2)
    df['Percent_Female'] = (df['Female_Population'] / df['Total_Population'] * 100).round(2)

    # Handle division by zero for percentages
    df.loc[df['Total_Population'] == 0, ['Percent_Male', 'Percent_Female']] = 0

    print("Dataset successfully processed and merged.")
else:
    print("Missing required columns for merging.")

    

SyntaxError: invalid syntax (1414979543.py, line 116)

In [37]:
# Convert numeric columns with validation
numeric_columns_decennial = ['Total_Population', 'Male_Population', 'Female_Population']
numeric_columns_acs = ['Median_Income', 'Margin_Error_Income', 'Median_Earnings']

# Debug: Print column names
print("Decennial Census Data Columns:", df_decennial.columns)
print("ACS Data Columns:", df_acs.columns)

if not df_decennial.empty and not df_acs.empty:
    # Rename columns based on actual data
    df_decennial.rename(columns={
        'NAME': 'County_State',
        'P1_001N': 'Total_Population',
        'P1_002N': 'Male_Population',
        'P1_026N': 'Female_Population',
        'state': 'State_Code',
        'county': 'County_Code'
    }, inplace=True)

    df_acs.rename(columns={
        'NAME': 'County_State',
        'B19013_001E': 'Median_Income',
        'B19013_001M': 'Margin_Error_Income',
        'state': 'State_Code',
        'county': 'County_Code'
    }, inplace=True)

for col in numeric_columns_decennial:
    if col in df_decennial.columns:
        df_decennial[col] = pd.to_numeric(df_decennial[col], errors='coerce')

for col in numeric_columns_acs:
    if col in df_acs.columns:
        df_acs[col] = pd.to_numeric(df_acs[col], errors='coerce')

# Merge datasets on common keys with validation
if all(key in df_decennial.columns for key in ['County_State', 'State_Code', 'County_Code']) and \
   all(key in df_acs.columns for key in ['County_State', 'State_Code', 'County_Code', 'Median_Income', 'Margin_Error_Income', 'Median_Earnings']):
    df = pd.merge(
        df_decennial,
        df_acs[['County_State', 'State_Code', 'County_Code', 'Median_Income', 'Margin_Error_Income', 'Median_Earnings']],
        on=['County_State', 'State_Code', 'County_Code'],
        how='inner'
    )

    # Calculate percentages for male and female population
    df['Percent_Male'] = (df['Male_Population'] / df['Total_Population'] * 100).round(2)
    df['Percent_Female'] = (df['Female_Population'] / df['Total_Population'] * 100).round(2)

    # Handle division by zero for percentages
    df.loc[df['Total_Population'] == 0, ['Percent_Male', 'Percent_Female']] = 0

    print("Dataset successfully processed and merged.")
else:
    print("Missing required columns for merging.")


Decennial Census Data Columns: Index(['County_State', 'Total_Population', 'Male_Population',
       'Female_Population', 'State_Code', 'County_Code'],
      dtype='object')
ACS Data Columns: Index(['County_State', 'Median_Income', 'Margin_Error_Income',
       'Median_Earnings', 'State_Code', 'County_Code'],
      dtype='object')
Dataset successfully processed and merged.


In [31]:
# Map state codes to state names
    state_mapping = {
        '01': 'Alabama', '02': 'Alaska', '04': 'Arizona', '05': 'Arkansas', '06': 'California',
        '08': 'Colorado', '09': 'Connecticut', '10': 'Delaware', '11': 'District of Columbia',
        '12': 'Florida', '13': 'Georgia', '15': 'Hawaii', '16': 'Idaho', '17': 'Illinois',
        '18': 'Indiana', '19': 'Iowa', '20': 'Kansas', '21': 'Kentucky', '22': 'Louisiana',
        '23': 'Maine', '24': 'Maryland', '25': 'Massachusetts', '26': 'Michigan', '27': 'Minnesota',
        '28': 'Mississippi', '29': 'Missouri', '30': 'Montana', '31': 'Nebraska', '32': 'Nevada',
        '33': 'New Hampshire', '34': 'New Jersey', '35': 'New Mexico', '36': 'New York',
        '37': 'North Carolina', '38': 'North Dakota', '39': 'Ohio', '40': 'Oklahoma', '41': 'Oregon',
        '42': 'Pennsylvania', '44': 'Rhode Island', '45': 'South Carolina', '46': 'South Dakota',
        '47': 'Tennessee', '48': 'Texas', '49': 'Utah', '50': 'Vermont', '51': 'Virginia',
        '53': 'Washington', '54': 'West Virginia', '55': 'Wisconsin', '56': 'Wyoming'
    }
    df['State_Name'] = df['State_Code'].map(state_mapping)

    # Reorder columns for better readability
    df = df[['County_State', 'State_Name', 'County_Code', 'Total_Population', 'Male_Population', 
             'Female_Population', 'Percent_Male', 'Percent_Female', 'Median_Income', 'Margin_Error_Income', 'Median_Earnings']]

    # Save the final dataset to a CSV file
    csv_filename = 'census_data_income_earnings.csv'
    df.to_csv(csv_filename, index=False)
    print(f"Data successfully saved to {csv_filename}")
else:
    print("Error in fetching data from the APIs.")

IndentationError: unexpected indent (1714072285.py, line 2)

In [17]:
# Convert numeric columns
    for col in ['Total_Population', 'Male_Population', 'Female_Population', 'Median_Income', 'Margin_Error_Income', 'Median_Earnings']:
        df_decennial[col] = pd.to_numeric(df_decennial[col], errors='coerce')
        df_acs[col] = pd.to_numeric(df_acs[col], errors='coerce')

# Merge datasets on common keys
    df = pd.merge(df_decennial, df_acs[['County_State', 'State_Code', 'County_Code', 'Median_Income', 'Margin_Error_Income', 'Median_Earnings']],
                  on=['County_State', 'State_Code', 'County_Code'], how='inner')

# Calculate percentages for male and female population
    df['Percent_Male'] = (df['Male_Population'] / df['Total_Population'] * 100).round(2)
    df['Percent_Female'] = (df['Female_Population'] / df['Total_Population'] * 100).round(2)

IndentationError: unexpected indent (4154683020.py, line 2)

In [33]:

    # Map state codes to state names
    state_mapping = {
        '01': 'Alabama', '02': 'Alaska', '04': 'Arizona', '05': 'Arkansas', '06': 'California',
        '08': 'Colorado', '09': 'Connecticut', '10': 'Delaware', '11': 'District of Columbia',
        '12': 'Florida', '13': 'Georgia', '15': 'Hawaii', '16': 'Idaho', '17': 'Illinois',
        '18': 'Indiana', '19': 'Iowa', '20': 'Kansas', '21': 'Kentucky', '22': 'Louisiana',
        '23': 'Maine', '24': 'Maryland', '25': 'Massachusetts', '26': 'Michigan', '27': 'Minnesota',
        '28': 'Mississippi', '29': 'Missouri', '30': 'Montana', '31': 'Nebraska', '32': 'Nevada',
        '33': 'New Hampshire', '34': 'New Jersey', '35': 'New Mexico', '36': 'New York',
        '37': 'North Carolina', '38': 'North Dakota', '39': 'Ohio', '40': 'Oklahoma', '41': 'Oregon',
        '42': 'Pennsylvania', '44': 'Rhode Island', '45': 'South Carolina', '46': 'South Dakota',
        '47': 'Tennessee', '48': 'Texas', '49': 'Utah', '50': 'Vermont', '51': 'Virginia',
        '53': 'Washington', '54': 'West Virginia', '55': 'Wisconsin', '56': 'Wyoming'
    }
    df['State_Name'] = df['State_Code'].map(state_mapping)

    # Reorder columns for better readability
    df = df[['County_State', 'State_Name', 'County_Code', 'Total_Population', 'Male_Population', 
             'Female_Population', 'Percent_Male', 'Percent_Female', 'Median_Income', 'Margin_Error_Income', 'Median_Earnings']]

    # Save the final dataset to a CSV file
    csv_filename = 'census_data_income_earnings.csv'
    df.to_csv(csv_filename, index=False)
    print(f"Data successfully saved to {csv_filename}")
else:
    print("Error in fetching data from the APIs.")


SyntaxError: invalid syntax (1591783968.py, line 25)