In [6]:
import pandas as pd
import datawrapper as dw
import requests

In [7]:
# Using Treasury's Fiscal Data API to get pieces of the Daily Treasury Statement
# https://fiscal.treasury.gov/reports-statements/dts/
# Fetch the latest WITHDRAWAL data by category/department for the current fiscal year, FY2025 (can adjust for beyond)

# Define the base URL
url = "https://api.fiscaldata.treasury.gov/services/api/fiscal_service/v1/accounting/dts/deposits_withdrawals_operating_cash"

# Initialize an empty list to store the data
latest_data = []

# Loop through FY2025 and its quarters
for year in range(2025, 2026):  # FY2025 and beyond
    for quarter in range(1, 5):  # Quarters 1 to 4
        print(f"Fetching data for FY {year}, Quarter {quarter}...")
        
        # Define the parameters dynamically for each year and quarter
        params = {
            "filter": f"transaction_type:eq:Withdrawals,record_fiscal_year:eq:{year},record_fiscal_quarter:eq:{quarter}",
            "page[size]": 10000 # set page size to 10000 because API defaults to 100
        }
        
        # Make the GET request
        response = requests.get(url, params=params)
        
        # Check the response status
        if response.status_code == 200:
            data = response.json()  # Parse the JSON response
            
            # Extract the 'data' field and append it to the list
            if "data" in data:
                latest_data.extend(data["data"])
            else:
                print(f"No data found for FY {year}, Quarter {quarter}.")
        else:
            print(f"Request failed for FY {year}, Quarter {quarter} with status code {response.status_code}: {response.text}")

# Convert the list of data to a DataFrame
withdrawals_latest = pd.DataFrame(latest_data)

# Convert record_date to datetime
if not withdrawals_latest.empty:
    withdrawals_latest["record_date"] = pd.to_datetime(withdrawals_latest["record_date"])

# Display DataFrame info
withdrawals_latest.info()

# Print the range of dates in the data
if not withdrawals_latest.empty:
    min_date_latest, max_date_latest = withdrawals_latest["record_date"].agg(["min", "max"])
    print(f"Date range in latest data: {min_date_latest} to {max_date_latest}")

# Display the first few rows
withdrawals_latest.head()

Fetching data for FY 2025, Quarter 1...
Fetching data for FY 2025, Quarter 2...
Fetching data for FY 2025, Quarter 3...
Fetching data for FY 2025, Quarter 4...
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14214 entries, 0 to 14213
Data columns (total 17 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   record_date              14214 non-null  datetime64[ns]
 1   account_type             14214 non-null  object        
 2   transaction_type         14214 non-null  object        
 3   transaction_catg         14214 non-null  object        
 4   transaction_catg_desc    14214 non-null  object        
 5   transaction_today_amt    14214 non-null  object        
 6   transaction_mtd_amt      14214 non-null  object        
 7   transaction_fytd_amt     14214 non-null  object        
 8   table_nbr                14214 non-null  object        
 9   table_nm                 14214 non-null  object        

Unnamed: 0,record_date,account_type,transaction_type,transaction_catg,transaction_catg_desc,transaction_today_amt,transaction_mtd_amt,transaction_fytd_amt,table_nbr,table_nm,src_line_nbr,record_fiscal_year,record_fiscal_quarter,record_calendar_year,record_calendar_quarter,record_calendar_month,record_calendar_day
0,2024-10-01,Treasury General Account (TGA),Withdrawals,Corporation for Public Broadcasting,,0,0,0,II,Deposits and Withdrawals of Operating Cash,82,2025,1,2024,4,10,1
1,2024-10-01,Treasury General Account (TGA),Withdrawals,Dept of Agriculture (USDA) - misc,,330,330,330,II,Deposits and Withdrawals of Operating Cash,83,2025,1,2024,4,10,1
2,2024-10-01,Treasury General Account (TGA),Withdrawals,USDA - Child Nutrition,,48,48,48,II,Deposits and Withdrawals of Operating Cash,84,2025,1,2024,4,10,1
3,2024-10-01,Treasury General Account (TGA),Withdrawals,USDA - Commodity Credit Corporation,,33,33,33,II,Deposits and Withdrawals of Operating Cash,85,2025,1,2024,4,10,1
4,2024-10-01,Treasury General Account (TGA),Withdrawals,USDA - Federal Crop Insurance Corp Fund,,94,94,94,II,Deposits and Withdrawals of Operating Cash,86,2025,1,2024,4,10,1


In [8]:
# Assuming `historical_withdrawals` contains the historical data (FY20??–FY2024); set in prior script to get archive data
# We've saved that historical data in a pickle file in the data directory
# and `latest_withdrawals` contains the latest data (FY2025 forward) as drawn from Fiscal Data API

# Read the historical withdrawals data from parquet file; with set path
withdrawals_historic_file_path = "data/withdrawals_historic.parquet"
withdrawals_historic = pd.read_parquet(withdrawals_historic_file_path)

# Combine DataFrames (historic and latest)
withdrawals = pd.concat([withdrawals_historic, withdrawals_latest], ignore_index=True)

# Drop duplicate rows if any (optional, based on your data)
# Repeated tests of method show there are none, and should be impossible, but as a safety measure
withdrawals = withdrawals.drop_duplicates()

# Ensure the record_date column is in datetime format (should be already; but as a precaution)
withdrawals["record_date"] = pd.to_datetime(withdrawals["record_date"])

# Convert amounts to numeric and handle errors for non-numeric values/blanks
withdrawals["transaction_today_amt"] = pd.to_numeric(withdrawals["transaction_today_amt"], errors='coerce')

# Sort the combined DataFrame by record_date
withdrawals = withdrawals.sort_values(by="record_date").reset_index(drop=True)

# Display the combined DataFrame info
withdrawals.info()

# Print the range of dates in the data
if not withdrawals.empty:
    min_date_latest, max_date_latest = withdrawals["record_date"].agg(["min", "max"])
    print(f"Date range in data: {min_date_latest} to {max_date_latest}")
    

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 187482 entries, 0 to 187481
Data columns (total 17 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   record_date              187482 non-null  datetime64[ns]
 1   account_type             187482 non-null  object        
 2   transaction_type         187482 non-null  object        
 3   transaction_catg         187482 non-null  object        
 4   transaction_catg_desc    187482 non-null  object        
 5   transaction_today_amt    187482 non-null  int64         
 6   transaction_mtd_amt      187482 non-null  object        
 7   transaction_fytd_amt     187482 non-null  object        
 8   table_nbr                187482 non-null  object        
 9   table_nm                 187482 non-null  object        
 10  src_line_nbr             187482 non-null  object        
 11  record_fiscal_year       187482 non-null  object        
 12  record_fiscal_qu

In [9]:
# Filter the combined_withdrawals DataFrame for withdrawals and exclude specific categories
withdrawals = withdrawals[
    (withdrawals["transaction_catg"] != "Public Debt Cash Redemp. (Table IIIB)") &
    (withdrawals["transaction_catg"] != "Public Debt Cash Redemp. (Table III-B)") &
    (withdrawals["account_type"] != "Treasury General Account Total Withdrawals")
]

# Show withdrawals DataFrame info and head
withdrawals.info()
withdrawals.head()

<class 'pandas.core.frame.DataFrame'>
Index: 183576 entries, 0 to 187480
Data columns (total 17 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   record_date              183576 non-null  datetime64[ns]
 1   account_type             183576 non-null  object        
 2   transaction_type         183576 non-null  object        
 3   transaction_catg         183576 non-null  object        
 4   transaction_catg_desc    183576 non-null  object        
 5   transaction_today_amt    183576 non-null  int64         
 6   transaction_mtd_amt      183576 non-null  object        
 7   transaction_fytd_amt     183576 non-null  object        
 8   table_nbr                183576 non-null  object        
 9   table_nm                 183576 non-null  object        
 10  src_line_nbr             183576 non-null  object        
 11  record_fiscal_year       183576 non-null  object        
 12  record_fiscal_quarter

Unnamed: 0,record_date,account_type,transaction_type,transaction_catg,transaction_catg_desc,transaction_today_amt,transaction_mtd_amt,transaction_fytd_amt,table_nbr,table_nm,src_line_nbr,record_fiscal_year,record_fiscal_quarter,record_calendar_year,record_calendar_quarter,record_calendar_month,record_calendar_day
0,2012-10-01,Federal Reserve Account,Withdrawals,Commodity Credit Corporation programs,,39,39,39,II,Deposits and Withdrawals of Operating Cash,33,2013,1,2012,4,10,1
1,2012-10-01,Federal Reserve Account,Withdrawals,Temporary Assistance for Needy Families (HHS),,31,31,31,II,Deposits and Withdrawals of Operating Cash,56,2013,1,2012,4,10,1
2,2012-10-01,Federal Reserve Account,Withdrawals,Transportation Security Admin. (DHS),,0,0,0,II,Deposits and Withdrawals of Operating Cash,57,2013,1,2012,4,10,1
3,2012-10-01,Federal Reserve Account,Withdrawals,Unemployment Insurance Benefits,,210,210,210,II,Deposits and Withdrawals of Operating Cash,58,2013,1,2012,4,10,1
4,2012-10-01,Federal Reserve Account,Withdrawals,Veterans Affairs programs,,491,491,491,II,Deposits and Withdrawals of Operating Cash,59,2013,1,2012,4,10,1


In [10]:
# Save the combined/latest withdrawals DataFrame to a CSV file for backup in case of API changes or data loss/removal by government
withdrawals.to_csv("output/withdrawals.csv", index=False)
# Save the combined/latest withdrawals DataFrame to a parquet file for faster reloading with formatting for pipeline, analysis and visualization
withdrawals.to_parquet("data/withdrawals.parquet")
