In [1]:
import pandas as pd
import datawrapper as dw
import requests

In [2]:
# Using Treasury's Fiscal Data API to get pieces of the Daily Treasury Statement
# https://fiscal.treasury.gov/reports-statements/dts/
# Fetch the latest DEPOSITS data by category/department for the current fiscal year, FY2025 (can adjust for beyond)

# Define the base URL
url = "https://api.fiscaldata.treasury.gov/services/api/fiscal_service/v1/accounting/dts/deposits_withdrawals_operating_cash"

# Initialize an empty list to store the data
latest_deposits_data = []

# Loop through FY2025 and its quarters
for year in range(2025, 2026):  # FY2025 and beyond
    for quarter in range(1, 5):  # Quarters 1 to 4
        print(f"Fetching data for FY {year}, Quarter {quarter}...")
        
        # Define the parameters dynamically for each year and quarter
        params = {
            "filter": f"transaction_type:eq:Deposits,record_fiscal_year:eq:{year},record_fiscal_quarter:eq:{quarter}",
            "page[size]": 10000 # set page size to 10000 because API defaults to 100
        }
        
        # Make the GET request
        response = requests.get(url, params=params)
        
        # Check the response status
        if response.status_code == 200:
            data = response.json()  # Parse the JSON response
            
            # Extract the 'data' field and append it to the list
            if "data" in data:
                latest_deposits_data.extend(data["data"])
            else:
                print(f"No data found for FY {year}, Quarter {quarter}.")
        else:
            print(f"Request failed for FY {year}, Quarter {quarter} with status code {response.status_code}: {response.text}")

# Convert the list of data to a DataFrame
deposits_latest = pd.DataFrame(latest_deposits_data)

# Convert record_date to datetime
if not deposits_latest.empty:
    deposits_latest["record_date"] = pd.to_datetime(deposits_latest["record_date"])

# Display DataFrame info
deposits_latest.info()

# Print the range of dates in the data
if not deposits_latest.empty:
    min_date_latest, max_date_latest = deposits_latest["record_date"].agg(["min", "max"])
    print(f"Date range in latest data: {min_date_latest} to {max_date_latest}")

# Display the first few rows
deposits_latest.head()

Fetching data for FY 2025, Quarter 1...
Fetching data for FY 2025, Quarter 2...
Fetching data for FY 2025, Quarter 3...
Fetching data for FY 2025, Quarter 4...
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 11760 entries, 0 to 11759
Data columns (total 17 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   record_date              11760 non-null  datetime64[ns]
 1   account_type             11760 non-null  object        
 2   transaction_type         11760 non-null  object        
 3   transaction_catg         11760 non-null  object        
 4   transaction_catg_desc    11760 non-null  object        
 5   transaction_today_amt    11760 non-null  object        
 6   transaction_mtd_amt      11760 non-null  object        
 7   transaction_fytd_amt     11760 non-null  object        
 8   table_nbr                11760 non-null  object        
 9   table_nm                 11760 non-null  object        

Unnamed: 0,record_date,account_type,transaction_type,transaction_catg,transaction_catg_desc,transaction_today_amt,transaction_mtd_amt,transaction_fytd_amt,table_nbr,table_nm,src_line_nbr,record_fiscal_year,record_fiscal_quarter,record_calendar_year,record_calendar_quarter,record_calendar_month,record_calendar_day
0,2024-10-01,Treasury General Account (TGA),Deposits,Dept of Agriculture (USDA) - misc,,8,8,8,II,Deposits and Withdrawals of Operating Cash,1,2025,1,2024,4,10,1
1,2024-10-01,Treasury General Account (TGA),Deposits,USDA - Commodity Credit Corporation,,37,37,37,II,Deposits and Withdrawals of Operating Cash,2,2025,1,2024,4,10,1
2,2024-10-01,Treasury General Account (TGA),Deposits,USDA - Federal Crop Insurance Corp Fund,,0,0,0,II,Deposits and Withdrawals of Operating Cash,3,2025,1,2024,4,10,1
3,2024-10-01,Treasury General Account (TGA),Deposits,USDA - Loan Repayments,,39,39,39,II,Deposits and Withdrawals of Operating Cash,4,2025,1,2024,4,10,1
4,2024-10-01,Treasury General Account (TGA),Deposits,Dept of Commerce (DOC),,15,15,15,II,Deposits and Withdrawals of Operating Cash,5,2025,1,2024,4,10,1


In [3]:
# Assuming `historical_deposits` contains the historical data (FY20??–FY2024); set in prior script to get archive data
# We've saved that historical data in a pickle file in the data directory
# and `latest_deposits` contains the latest data (FY2025 forward) as drawn from Fiscal Data API

# Read the historical deposits data from parquet file; with set path
deposits_historic_file_path = "data/deposits_historic.parquet"
deposits_historic = pd.read_parquet(deposits_historic_file_path)

# Combine DataFrames (historic and latest)
deposits = pd.concat([deposits_historic, deposits_latest], ignore_index=True)

# Drop duplicate rows if any (optional, based on your data)
# Repeated tests of method show there are none, and should be impossible, but as a safety measure
deposits = deposits.drop_duplicates()

# Ensure the record_date column is in datetime format (should be already; but as a precaution)
deposits["record_date"] = pd.to_datetime(deposits["record_date"])

# Convert amounts to numeric and handle errors for non-numeric values/blanks
deposits["transaction_today_amt"] = pd.to_numeric(deposits["transaction_today_amt"], errors='coerce')

# Sort the combined DataFrame by record_date
deposits = deposits.sort_values(by="record_date").reset_index(drop=True)

# Display the combined DataFrame info
deposits.info()

# Print the range of dates in the data
if not deposits.empty:
    min_date_latest, max_date_latest = deposits["record_date"].agg(["min", "max"])
    print(f"Date range in data: {min_date_latest} to {max_date_latest}")
    

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 130800 entries, 0 to 130799
Data columns (total 17 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   record_date              130800 non-null  datetime64[ns]
 1   account_type             130800 non-null  object        
 2   transaction_type         130800 non-null  object        
 3   transaction_catg         130800 non-null  object        
 4   transaction_catg_desc    130800 non-null  object        
 5   transaction_today_amt    130800 non-null  int64         
 6   transaction_mtd_amt      130800 non-null  object        
 7   transaction_fytd_amt     130800 non-null  object        
 8   table_nbr                130800 non-null  object        
 9   table_nm                 130800 non-null  object        
 10  src_line_nbr             130800 non-null  object        
 11  record_fiscal_year       130800 non-null  object        
 12  record_fiscal_qu

In [4]:
# Filter the deposits DataFrame for any that need to be excluded by specific categories
deposits = deposits[
 #   (deposits["transaction_catg"] != "Public Debt Cash Redemp. (Table IIIB)") &
 #   (deposits["transaction_catg"] != "Public Debt Cash Redemp. (Table III-B)") &
    (deposits["transaction_catg"] != "Sub-Total Deposits") &
    (deposits["account_type"] != "Treasury General Account Total Deposits")
]


# Some notes on the whys here:
# The total deposits records will create duplicated/doubled totals in calculations
# See more in DTS documentation at fiscal data api

# Show deposits DataFrame info and head
deposits.info()
deposits.head()

<class 'pandas.core.frame.DataFrame'>
Index: 129672 entries, 0 to 130798
Data columns (total 17 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   record_date              129672 non-null  datetime64[ns]
 1   account_type             129672 non-null  object        
 2   transaction_type         129672 non-null  object        
 3   transaction_catg         129672 non-null  object        
 4   transaction_catg_desc    129672 non-null  object        
 5   transaction_today_amt    129672 non-null  int64         
 6   transaction_mtd_amt      129672 non-null  object        
 7   transaction_fytd_amt     129672 non-null  object        
 8   table_nbr                129672 non-null  object        
 9   table_nm                 129672 non-null  object        
 10  src_line_nbr             129672 non-null  object        
 11  record_fiscal_year       129672 non-null  object        
 12  record_fiscal_quarter

Unnamed: 0,record_date,account_type,transaction_type,transaction_catg,transaction_catg_desc,transaction_today_amt,transaction_mtd_amt,transaction_fytd_amt,table_nbr,table_nm,src_line_nbr,record_fiscal_year,record_fiscal_quarter,record_calendar_year,record_calendar_quarter,record_calendar_month,record_calendar_day
0,2012-10-01,Federal Reserve Account,Deposits,Agriculture Loan Repayments (misc),,77,77,77,II,Deposits and Withdrawals of Operating Cash,1,2013,1,2012,4,10,1
1,2012-10-01,Short-Term Cash Investments,Deposits,Transfers from Federal Reserve Account (Table V),,0,0,0,II,Deposits and Withdrawals of Operating Cash,31,2013,1,2012,4,10,1
2,2012-10-01,Federal Reserve Account,Deposits,Transfers from Depositaries,,0,0,0,II,Deposits and Withdrawals of Operating Cash,29,2013,1,2012,4,10,1
3,2012-10-01,Federal Reserve Account,Deposits,Change in Balance of Uncollected Funds,,0,0,0,II,Deposits and Withdrawals of Operating Cash,28,2013,1,2012,4,10,1
4,2012-10-01,Federal Reserve Account,Deposits,Other Deposits,TARP,73,73,73,II,Deposits and Withdrawals of Operating Cash,26,2013,1,2012,4,10,1


In [5]:
# Save the combined/latest deposits DataFrame to a CSV file for backup in case of API changes or data loss/removal by government
deposits.to_csv("output/deposits.csv", index=False)
# Save the combined/latest deposits DataFrame to a parquet file for faster reloading with formatting for pipeline, analysis and visualization
deposits.to_parquet("data/deposits.parquet")
