In [None]:
import pandas as pd
from sodapy import Socrata
from typing import Final
import os


In [None]:
# Constants
APP_TOKEN: Final = os.getenv("SOCRATA_APP_TOKEN")
WHERE_CLAUSE: Final = "border = 'US-Canada Border' AND date >= '2017-01-01'"
DATASET_IDENTIFIER: Final = "keg4-3bc2"

In [None]:
app_token_status = ""
if APP_TOKEN:
    app_token_status = "Successfully loaded app token."
else:
    app_token_status = "Warning: APP_TOKEN not found."

In [None]:
client = Socrata("data.bts.gov", APP_TOKEN)


In [None]:
# Fetch number of expected results for desired Sodapy query
row_count = client.get(
    DATASET_IDENTIFIER,
    query=f"SELECT count(*) WHERE {WHERE_CLAUSE}",
)[0]["count"]
print("row_count", row_count)

In [None]:
row_count = int(row_count)
results = client.get(DATASET_IDENTIFIER, limit=row_count, where=WHERE_CLAUSE)
client.close()
print("length", len(results))


In [None]:
# Convert to pandas DataFrame
results_df = pd.DataFrame.from_records(results)

Data cleaning

In [None]:
# remove unneeded columns
results_df.drop(columns=["point"], inplace=True)

In [None]:
# Null values
nulls_df = results_df[results_df.isnull().any(axis=1)]
print("Null values")
print(nulls_df)

# As of 8/17/2025, the "state" field of the new port of entry "Chief Mountain Mt Poe" hasn't been populated yet.
# Will set "state" to "MT" for all records with port_code == "3315"
results_df["port_code"] = results_df["port_code"].astype(str)
results_df.loc[results_df["port_code"] == "3315", "state"] = "MT"

# Check null values again
print("Check null values again")
print(results_df[results_df.isnull().any(axis=1)])

print("Check rows with port code 3315")
print(results_df.loc[results_df["port_code"] == "3315"])

In [None]:
# Sanity check
print(results_df)