In [2]:
import requests
import pandas as pd

In [3]:
# Census api link
api_url = 'https://api.census.gov/data/2023/acs/acsse?get=group(K200802)&ucgid=0100000US'

try:  
    #make a request to the API
    response = requests.get(api_url)

    #raise an error for bad responses
    response.raise_for_status()

    #parse the JSON respons
    data = response.json()

    if data and len(data) > 1:
        column_names = data[0]
        data_rows = data[1:]

        df = pd.DataFrame(data_rows, columns=column_names)
        
        print("Succesffully retrieved data:")
          # Display the first few rows of the DataFrame
        df.info()  # Display DataFrame information
    else:
        print("No data found or the response is empty.")
except requests.exceptions.RequestException as e:
    print(f"An error occurred while fetching data from the API: {e}")
except ValueError as e:
    print(f"Error parsing JSON response: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

Succesffully retrieved data:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 23 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   GEO_ID         1 non-null      object
 1   NAME           1 non-null      object
 2   K200802_001E   1 non-null      object
 3   K200802_001EA  0 non-null      object
 4   K200802_001M   1 non-null      object
 5   K200802_001MA  0 non-null      object
 6   K200802_002E   1 non-null      object
 7   K200802_002EA  0 non-null      object
 8   K200802_002M   1 non-null      object
 9   K200802_002MA  0 non-null      object
 10  K200802_003E   1 non-null      object
 11  K200802_003EA  0 non-null      object
 12  K200802_003M   1 non-null      object
 13  K200802_003MA  0 non-null      object
 14  K200802_004E   1 non-null      object
 15  K200802_004EA  0 non-null      object
 16  K200802_004M   1 non-null      object
 17  K200802_004MA  0 non-null      object
 18  K2008

In [4]:
df.head()


Unnamed: 0,GEO_ID,NAME,K200802_001E,K200802_001EA,K200802_001M,K200802_001MA,K200802_002E,K200802_002EA,K200802_002M,K200802_002MA,...,K200802_003MA,K200802_004E,K200802_004EA,K200802_004M,K200802_004MA,K200802_005E,K200802_005EA,K200802_005M,K200802_005MA,ucgid
0,0100000US,United States,139948165,,145766,,17483569,,77779,,...,,40641128,,134999,,12503861,,75748,,0100000US


In [5]:
# Rename columns for clarity
rename_mapping = {
    'GEO_ID': 'Geographic Identifier',
    'NAME': 'Location Name',
    'K200802_001E': 'Travel Time to Work - Total Estimate',
    'K200802_001EA': 'Travel Time to Work - Total Estimate Annotation',
    'K200802_001M': 'Travel Time to Work - Total Margin of Error',
    'K200802_001MA': 'Travel Time to Work - Total MOE Annotation',
    'K200802_002E': 'Travel Time to Work - Less than 10 Min Estimate',
    'K200802_002EA': 'Travel Time to Work - Less than 10 Min Annotation',
    'K200802_002M': 'Travel Time to Work - Less than 10 Min MOE',
    'K200802_002MA': 'Travel Time to Work - Less than 10 Min MOE Annotation',
    'K200802_003E': 'Travel Time to Work - 10 to 29 Min Estimate',
    'K200802_003EA': 'Travel Time to Work - 10 to 29 Min Annotation',
    'K200802_003M': 'Travel Time to Work - 10 to 29 Min MOE',
    'K200802_003MA': 'Travel Time to Work - 10 to 29 Min MOE Annotation',
    'K200802_004E': 'Travel Time to Work - 30 to 59 Min Estimate',
    'K200802_004EA': 'Travel Time to Work - 30 to 59 Min Annotation',
    'K200802_004M': 'Travel Time to Work - 30 to 59 Min MOE',
    'K200802_004MA': 'Travel Time to Work - 30 to 59 Min MOE Annotation',
    'K200802_005E': 'Travel Time to Work - 60+ Min Estimate',
    'K200802_005EA': 'Travel Time to Work - 60+ Min Annotation',
    'K200802_005M': 'Travel Time to Work - 60+ Min MOE',
    'K200802_005MA': 'Travel Time to Work - 60+ Min MOE Annotation',
    'ucgid': 'Universal Geographic ID' # Or you could drop this if 'GEO_ID' is sufficient
}

df = df.rename(columns=rename_mapping)

print(df.columns.tolist())
df.head()

['Geographic Identifier', 'Location Name', 'Travel Time to Work - Total Estimate', 'Travel Time to Work - Total Estimate Annotation', 'Travel Time to Work - Total Margin of Error', 'Travel Time to Work - Total MOE Annotation', 'Travel Time to Work - Less than 10 Min Estimate', 'Travel Time to Work - Less than 10 Min Annotation', 'Travel Time to Work - Less than 10 Min MOE', 'Travel Time to Work - Less than 10 Min MOE Annotation', 'Travel Time to Work - 10 to 29 Min Estimate', 'Travel Time to Work - 10 to 29 Min Annotation', 'Travel Time to Work - 10 to 29 Min MOE', 'Travel Time to Work - 10 to 29 Min MOE Annotation', 'Travel Time to Work - 30 to 59 Min Estimate', 'Travel Time to Work - 30 to 59 Min Annotation', 'Travel Time to Work - 30 to 59 Min MOE', 'Travel Time to Work - 30 to 59 Min MOE Annotation', 'Travel Time to Work - 60+ Min Estimate', 'Travel Time to Work - 60+ Min Annotation', 'Travel Time to Work - 60+ Min MOE', 'Travel Time to Work - 60+ Min MOE Annotation', 'Universal Ge

Unnamed: 0,Geographic Identifier,Location Name,Travel Time to Work - Total Estimate,Travel Time to Work - Total Estimate Annotation,Travel Time to Work - Total Margin of Error,Travel Time to Work - Total MOE Annotation,Travel Time to Work - Less than 10 Min Estimate,Travel Time to Work - Less than 10 Min Annotation,Travel Time to Work - Less than 10 Min MOE,Travel Time to Work - Less than 10 Min MOE Annotation,...,Travel Time to Work - 10 to 29 Min MOE Annotation,Travel Time to Work - 30 to 59 Min Estimate,Travel Time to Work - 30 to 59 Min Annotation,Travel Time to Work - 30 to 59 Min MOE,Travel Time to Work - 30 to 59 Min MOE Annotation,Travel Time to Work - 60+ Min Estimate,Travel Time to Work - 60+ Min Annotation,Travel Time to Work - 60+ Min MOE,Travel Time to Work - 60+ Min MOE Annotation,Universal Geographic ID
0,0100000US,United States,139948165,,145766,,17483569,,77779,,...,,40641128,,134999,,12503861,,75748,,0100000US


In [6]:
#drop columns that are not needed
df.drop(df.columns[df.columns.str.contains('Universal')], axis=1, inplace=True)
df.head()

Unnamed: 0,Geographic Identifier,Location Name,Travel Time to Work - Total Estimate,Travel Time to Work - Total Estimate Annotation,Travel Time to Work - Total Margin of Error,Travel Time to Work - Total MOE Annotation,Travel Time to Work - Less than 10 Min Estimate,Travel Time to Work - Less than 10 Min Annotation,Travel Time to Work - Less than 10 Min MOE,Travel Time to Work - Less than 10 Min MOE Annotation,...,Travel Time to Work - 10 to 29 Min MOE,Travel Time to Work - 10 to 29 Min MOE Annotation,Travel Time to Work - 30 to 59 Min Estimate,Travel Time to Work - 30 to 59 Min Annotation,Travel Time to Work - 30 to 59 Min MOE,Travel Time to Work - 30 to 59 Min MOE Annotation,Travel Time to Work - 60+ Min Estimate,Travel Time to Work - 60+ Min Annotation,Travel Time to Work - 60+ Min MOE,Travel Time to Work - 60+ Min MOE Annotation
0,0100000US,United States,139948165,,145766,,17483569,,77779,,...,156877,,40641128,,134999,,12503861,,75748,


In [7]:
#pivot dataframe for easier analysis
df_pivot = df.melt(

    id_vars=['Location Name'],
    value_vars=[
        'Travel Time to Work - Total Estimate',
        'Travel Time to Work - Less than 10 Min Estimate',
        'Travel Time to Work - 10 to 29 Min Estimate',
        'Travel Time to Work - 30 to 59 Min Estimate',
        'Travel Time to Work - 60+ Min Estimate'
    ],
    var_name='Travel Time',
    value_name='Amount'

)

In [8]:

df_pivot.drop(df.columns[df.columns.str.contains('Location')], axis=1, inplace=True)

df_pivot.head()

Unnamed: 0,Travel Time,Amount
0,Travel Time to Work - Total Estimate,139948165
1,Travel Time to Work - Less than 10 Min Estimate,17483569
2,Travel Time to Work - 10 to 29 Min Estimate,69319607
3,Travel Time to Work - 30 to 59 Min Estimate,40641128
4,Travel Time to Work - 60+ Min Estimate,12503861


In [9]:
df_pivot = df_pivot.rename(columns={
    'Travel Time': 'US Travel Times to Work',
    'Amount': 'Total Workers'
})

In [10]:
#rename rows in the 'US Travel Times to Work' column
df_pivot['US Travel Times to Work'] = df_pivot['US Travel Times to Work'].replace({
    'Travel Time to Work - Total Estimate': 'Total Travel Time',
    'Travel Time to Work - Less than 10 Min Estimate': 'Less than 10 Minutes',
    'Travel Time to Work - 10 to 29 Min Estimate': '10 to 29 Minutes',
    'Travel Time to Work - 30 to 59 Min Estimate': '30 to 59 Minutes',
    'Travel Time to Work - 60+ Min Estimate': '60+ Minutes'
})

In [11]:
#remove total estimate row      
df_pivot = df_pivot.drop(index=[0])

In [12]:
df_pivot.head()
# Save the cleaned and pivoted DataFrame to a CSV file
df_pivot.to_csv('travel_times_to_work.csv', index=False)

In [13]:
df.drop(df.columns[df.columns.str.contains('MOE')], axis=1, inplace=True)
df.head()

Unnamed: 0,Geographic Identifier,Location Name,Travel Time to Work - Total Estimate,Travel Time to Work - Total Estimate Annotation,Travel Time to Work - Total Margin of Error,Travel Time to Work - Less than 10 Min Estimate,Travel Time to Work - Less than 10 Min Annotation,Travel Time to Work - 10 to 29 Min Estimate,Travel Time to Work - 10 to 29 Min Annotation,Travel Time to Work - 30 to 59 Min Estimate,Travel Time to Work - 30 to 59 Min Annotation,Travel Time to Work - 60+ Min Estimate,Travel Time to Work - 60+ Min Annotation
0,0100000US,United States,139948165,,145766,17483569,,69319607,,40641128,,12503861,


Means of Transportation to Work by Workers' Earnings in the Past 12 Months (in 2023 Inflation-Adjusted Dollars)

In [14]:
# Census api link
api_url = 'https://api.census.gov/data/2023/acs/acs1?get=group(C08119)&ucgid=0100000US'

try:  
    #make a request to the API
    response = requests.get(api_url)

    #raise an error for bad responses
    response.raise_for_status()

    #parse the JSON respons
    data2 = response.json()

    if data2 and len(data2) > 1:
        column_names = data2[0]
        data_rows = data2[1:]

        df_earnings = pd.DataFrame(data_rows, columns=column_names)
        
        print("Succesffully retrieved data:")
          # Display the first few rows of the DataFrame
        df_earnings.info()  # Display DataFrame information
    else:
        print("No data found or the response is empty.")
except requests.exceptions.RequestException as e:
    print(f"An error occurred while fetching data from the API: {e}")
except ValueError as e:
    print(f"Error parsing JSON response: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

Succesffully retrieved data:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Columns: 219 entries, C08119_001E to ucgid
dtypes: object(219)
memory usage: 1.8+ KB


In [15]:
df_earnings.head()

Unnamed: 0,C08119_001E,C08119_001EA,C08119_001M,C08119_001MA,C08119_002E,C08119_002EA,C08119_002M,C08119_002MA,C08119_003E,C08119_003EA,...,C08119_053EA,C08119_053M,C08119_053MA,C08119_054E,C08119_054EA,C08119_054M,C08119_054MA,GEO_ID,NAME,ucgid
0,162417601,,156567,,14774933,,75361,,7738093,,...,,23817,,10502520,,62911,,0100000US,United States,0100000US
