In [16]:
import pandas as pd
import datawrapper as dw
import requests

In [None]:
# Using Treasury's Fiscal Data API to get pieces of the Daily Treasury Statement
# https://fiscal.treasury.gov/reports-statements/dts/
# Fetch the latest WITHDRAWAL data by category/department for the current fiscal year, FY2025 (can adjust for beyond)

# Define the base URL
url = "https://api.fiscaldata.treasury.gov/services/api/fiscal_service/v1/accounting/dts/deposits_withdrawals_operating_cash"

# Initialize an empty list to store the data
latest_data = []

# Loop through FY2025 and its quarters
for year in range(2025, 2026):  # FY2025 and beyond
    for quarter in range(1, 5):  # Quarters 1 to 4
        print(f"Fetching data for FY {year}, Quarter {quarter}...")
        
        # Define the parameters dynamically for each year and quarter
        params = {
            "filter": f"transaction_type:eq:Withdrawals,record_fiscal_year:eq:{year},record_fiscal_quarter:eq:{quarter}",
            "page[size]": 10000 # set page size to 10000 because API defaults to 100
        }
        
        # Make the GET request
        response = requests.get(url, params=params)
        
        # Check the response status
        if response.status_code == 200:
            data = response.json()  # Parse the JSON response
            
            # Extract the 'data' field and append it to the list
            if "data" in data:
                latest_data.extend(data["data"])
            else:
                print(f"No data found for FY {year}, Quarter {quarter}.")
        else:
            print(f"Request failed for FY {year}, Quarter {quarter} with status code {response.status_code}: {response.text}")

# Convert the list of data to a DataFrame
withdrawals_latest = pd.DataFrame(latest_data)

# Convert record_date to datetime
if not withdrawals_latest.empty:
    withdrawals_latest["record_date"] = pd.to_datetime(withdrawals_latest["record_date"])

# Display DataFrame info
withdrawals_latest.info()

# Print the range of dates in the data
if not withdrawals_latest.empty:
    min_date_latest, max_date_latest = withdrawals_latest["record_date"].agg(["min", "max"])
    print(f"Date range in latest data: {min_date_latest} to {max_date_latest}")

# Display the first few rows
withdrawals_latest.head()

Fetching data for FY 2025, Quarter 1...
Fetching data for FY 2025, Quarter 2...
Fetching data for FY 2025, Quarter 3...
Fetching data for FY 2025, Quarter 4...
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14214 entries, 0 to 14213
Data columns (total 17 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   record_date              14214 non-null  datetime64[ns]
 1   account_type             14214 non-null  object        
 2   transaction_type         14214 non-null  object        
 3   transaction_catg         14214 non-null  object        
 4   transaction_catg_desc    14214 non-null  object        
 5   transaction_today_amt    14214 non-null  object        
 6   transaction_mtd_amt      14214 non-null  object        
 7   transaction_fytd_amt     14214 non-null  object        
 8   table_nbr                14214 non-null  object        
 9   table_nm                 14214 non-null  object        

Unnamed: 0,record_date,account_type,transaction_type,transaction_catg,transaction_catg_desc,transaction_today_amt,transaction_mtd_amt,transaction_fytd_amt,table_nbr,table_nm,src_line_nbr,record_fiscal_year,record_fiscal_quarter,record_calendar_year,record_calendar_quarter,record_calendar_month,record_calendar_day
0,2024-10-01,Treasury General Account (TGA),Withdrawals,Corporation for Public Broadcasting,,0,0,0,II,Deposits and Withdrawals of Operating Cash,82,2025,1,2024,4,10,1
1,2024-10-01,Treasury General Account (TGA),Withdrawals,Dept of Agriculture (USDA) - misc,,330,330,330,II,Deposits and Withdrawals of Operating Cash,83,2025,1,2024,4,10,1
2,2024-10-01,Treasury General Account (TGA),Withdrawals,USDA - Child Nutrition,,48,48,48,II,Deposits and Withdrawals of Operating Cash,84,2025,1,2024,4,10,1
3,2024-10-01,Treasury General Account (TGA),Withdrawals,USDA - Commodity Credit Corporation,,33,33,33,II,Deposits and Withdrawals of Operating Cash,85,2025,1,2024,4,10,1
4,2024-10-01,Treasury General Account (TGA),Withdrawals,USDA - Federal Crop Insurance Corp Fund,,94,94,94,II,Deposits and Withdrawals of Operating Cash,86,2025,1,2024,4,10,1
5,2024-10-01,Treasury General Account (TGA),Withdrawals,USDA - Loan Payments,,46,46,46,II,Deposits and Withdrawals of Operating Cash,87,2025,1,2024,4,10,1
6,2024-10-01,Treasury General Account (TGA),Withdrawals,USDA - Other Farm Service,,22,22,22,II,Deposits and Withdrawals of Operating Cash,88,2025,1,2024,4,10,1
7,2024-10-01,Treasury General Account (TGA),Withdrawals,USDA - Supp Nutrition Assist Prog (SNAP),,203,203,203,II,Deposits and Withdrawals of Operating Cash,89,2025,1,2024,4,10,1
8,2024-10-01,Treasury General Account (TGA),Withdrawals,USDA - Supp Nutrition Assist Prog (WIC),,26,26,26,II,Deposits and Withdrawals of Operating Cash,90,2025,1,2024,4,10,1
9,2024-10-01,Treasury General Account (TGA),Withdrawals,Dept of Commerce (DOC),,5,5,5,II,Deposits and Withdrawals of Operating Cash,91,2025,1,2024,4,10,1


In [None]:
# Assuming `historical_withdrawals` contains the historical data (FY20??–FY2024); set in prior script to get archive data
# We've saved that historical data in a pickle file in the data directory
# and `latest_withdrawals` contains the latest data (FY2025 forward) as drawn from Fiscal Data API

# Read the historical withdrawals data from parquet file; with set path
withdrawals_historic_file_path = "data/withdrawals_historic.parquet"
withdrawals_historic = pd.read_parquet(withdrawals_historic_file_path)

# Combine DataFrames (historic and latest)
withdrawals = pd.concat([withdrawals_historic, withdrawals_latest], ignore_index=True)

# Drop duplicate rows if any (optional, based on your data)
# Repeated tests of method show there are none, and should be impossible, but as a safety measure
withdrawals = withdrawals.drop_duplicates()

# Ensure the record_date column is in datetime format (should be already; but as a precaution)
withdrawals["record_date"] = pd.to_datetime(withdrawals["record_date"])

# Convert amounts to numeric and handle errors for non-numeric values/blanks
withdrawals["transaction_today_amt"] = pd.to_numeric(withdrawals["transaction_today_amt"], errors='coerce')

# Sort the combined DataFrame by record_date
withdrawals = withdrawals.sort_values(by="record_date").reset_index(drop=True)

# Display the combined DataFrame info
withdrawals.info()

# Print the range of dates in the data
if not withdrawals.empty:
    min_date_latest, max_date_latest = withdrawals["record_date"].agg(["min", "max"])
    print(f"Date range in data: {min_date_latest} to {max_date_latest}")

# Save all withdrawals data to a new csv
withdrawals_file_path = "data/withdrawals_combined.csv"
withdrawals.to_csv(withdrawals_file_path, index=False)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 187482 entries, 0 to 187481
Data columns (total 17 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   record_date              187482 non-null  datetime64[ns]
 1   account_type             187482 non-null  object        
 2   transaction_type         187482 non-null  object        
 3   transaction_catg         187482 non-null  object        
 4   transaction_catg_desc    187482 non-null  object        
 5   transaction_today_amt    187482 non-null  int64         
 6   transaction_mtd_amt      187482 non-null  object        
 7   transaction_fytd_amt     187482 non-null  object        
 8   table_nbr                187482 non-null  object        
 9   table_nm                 187482 non-null  object        
 10  src_line_nbr             187482 non-null  object        
 11  record_fiscal_year       187482 non-null  object        
 12  record_fiscal_qu

In [19]:
# Filter the combined_withdrawals DataFrame for withdrawals and exclude specific categories
withdrawals = withdrawals[
    (withdrawals["transaction_catg"] != "Public Debt Cash Redemp. (Table IIIB)") &
    (withdrawals["transaction_catg"] != "Public Debt Cash Redemp. (Table III-B)") &
    (withdrawals["account_type"] != "Treasury General Account Total Withdrawals")
]

# Show withdrawals DataFrame info and head
withdrawals.info()
withdrawals.head()

<class 'pandas.core.frame.DataFrame'>
Index: 183576 entries, 0 to 187480
Data columns (total 17 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   record_date              183576 non-null  datetime64[ns]
 1   account_type             183576 non-null  object        
 2   transaction_type         183576 non-null  object        
 3   transaction_catg         183576 non-null  object        
 4   transaction_catg_desc    183576 non-null  object        
 5   transaction_today_amt    183576 non-null  int64         
 6   transaction_mtd_amt      183576 non-null  object        
 7   transaction_fytd_amt     183576 non-null  object        
 8   table_nbr                183576 non-null  object        
 9   table_nm                 183576 non-null  object        
 10  src_line_nbr             183576 non-null  object        
 11  record_fiscal_year       183576 non-null  object        
 12  record_fiscal_quarter

Unnamed: 0,record_date,account_type,transaction_type,transaction_catg,transaction_catg_desc,transaction_today_amt,transaction_mtd_amt,transaction_fytd_amt,table_nbr,table_nm,src_line_nbr,record_fiscal_year,record_fiscal_quarter,record_calendar_year,record_calendar_quarter,record_calendar_month,record_calendar_day
0,2012-10-01,Federal Reserve Account,Withdrawals,Commodity Credit Corporation programs,,39,39,39,II,Deposits and Withdrawals of Operating Cash,33,2013,1,2012,4,10,1
1,2012-10-01,Federal Reserve Account,Withdrawals,Temporary Assistance for Needy Families (HHS),,31,31,31,II,Deposits and Withdrawals of Operating Cash,56,2013,1,2012,4,10,1
2,2012-10-01,Federal Reserve Account,Withdrawals,Transportation Security Admin. (DHS),,0,0,0,II,Deposits and Withdrawals of Operating Cash,57,2013,1,2012,4,10,1
3,2012-10-01,Federal Reserve Account,Withdrawals,Unemployment Insurance Benefits,,210,210,210,II,Deposits and Withdrawals of Operating Cash,58,2013,1,2012,4,10,1
4,2012-10-01,Federal Reserve Account,Withdrawals,Veterans Affairs programs,,491,491,491,II,Deposits and Withdrawals of Operating Cash,59,2013,1,2012,4,10,1


In [29]:
# Filter for spending between Jan. 20, 2025, and the most recent date
filtered_withdrawals = withdrawals[
    (withdrawals["record_date"] >= "2025-01-20") & 
    (withdrawals["record_date"] <= pd.Timestamp.now())
]

# Group by transaction_catg and sum the transaction_today_amt for each category
category_spending = (
    filtered_withdrawals
    .groupby("transaction_catg", as_index=False)
    .agg({"transaction_today_amt": "sum"})
    .rename(columns={"transaction_today_amt": "total_spending"})
)

# Sort by total_spending in descending order
category_spending = category_spending.sort_values(by="total_spending", ascending=False).reset_index(drop=True)


# Save the category spending DataFrame to a CSV file for further analysis
category_spending.to_csv("category_spending.csv", index=False)

# Display the info and first few rows of the category spending DataFrame
category_spending.info()
category_spending.head()

total_spending_2025 = category_spending["total_spending"].sum()
print(f"Total spending in 2025: ${total_spending_2025:,.2f}")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 103 entries, 0 to 102
Data columns (total 2 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   transaction_catg  103 non-null    object
 1   total_spending    103 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 1.7+ KB
Total spending in 2025: $19,232,995.00


In [22]:
# Create an empty list to store yearly spending DataFrames
yearly_spending_list = []

# Loop through each fiscal year
for year in range(2012,2026):
    # Define the start and end dates for the fiscal year
    start_date = pd.to_datetime(f"{year}-01-20")
    end_date = pd.to_datetime(f"{year}-12-31")
    
    # Filter withdrawals for the given fiscal year
    filtered_withdrawals = withdrawals[
        (withdrawals["record_date"] >= start_date) &
        (withdrawals["record_date"] <= end_date)
    ]
    
    # Group by transaction_catg and sum the transaction_today_amt for each category
    yearly_spending = (
        filtered_withdrawals
        .groupby("transaction_catg", as_index=False)
        .agg({"transaction_today_amt": "sum"})
        .rename(columns={"transaction_today_amt": f"total_spending_{year}"})
    )
    
    # Append the yearly spending DataFrame to the list
    yearly_spending_list.append(yearly_spending)

# Merge all yearly spending DataFrames into a single pivot table
pivot_table = yearly_spending_list[0]
for yearly_spending in yearly_spending_list[1:]:
    pivot_table = pivot_table.merge(yearly_spending, on="transaction_catg", how="outer")

# Fill NaN values with 0 for categories that don't appear in some years
pivot_table = pivot_table.fillna(0)

# Sort the pivot table by total spending in the most recent year (descending order)
pivot_table = pivot_table.sort_values(by=f"total_spending_2025", ascending=False).reset_index(drop=True)

# Save the pivot table to a CSV file for further analysis
pivot_table.to_csv("category_spending_pivot.csv", index=False)

# Display the info and first few rows of the pivot table
pivot_table.info()
pivot_table.head(25)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 157 entries, 0 to 156
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   transaction_catg     157 non-null    object 
 1   total_spending_2012  157 non-null    float64
 2   total_spending_2013  157 non-null    float64
 3   total_spending_2014  157 non-null    float64
 4   total_spending_2015  157 non-null    float64
 5   total_spending_2016  157 non-null    float64
 6   total_spending_2017  157 non-null    float64
 7   total_spending_2018  157 non-null    float64
 8   total_spending_2019  157 non-null    float64
 9   total_spending_2020  157 non-null    float64
 10  total_spending_2021  157 non-null    float64
 11  total_spending_2022  157 non-null    float64
 12  total_spending_2023  157 non-null    float64
 13  total_spending_2024  157 non-null    float64
 14  total_spending_2025  157 non-null    float64
dtypes: float64(14), object(1)
memory usage: 

Unnamed: 0,transaction_catg,total_spending_2012,total_spending_2013,total_spending_2014,total_spending_2015,total_spending_2016,total_spending_2017,total_spending_2018,total_spending_2019,total_spending_2020,total_spending_2021,total_spending_2022,total_spending_2023,total_spending_2024,total_spending_2025
0,SSA - Benefits Payments,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,464452.0,1024744.0,1158301.0,1233920.0,360049.0
1,Taxes - Individual Tax Refunds (EFT),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,282344.0,296430.0,225230.0
2,HHS - Grants to States for Medicaid,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,396657.0,501606.0,563776.0,579785.0,596494.0,164800.0
3,Interest on Treasury Securities,57517.0,212524.0,221156.0,230775.0,232898.0,236778.0,264597.0,302195.0,306527.0,288081.0,299124.0,392357.0,515039.0,147430.0
4,HHS - Federal Supple Med Insr Trust Fund,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,336829.0,376198.0,405316.0,466724.0,504822.0,136861.0
5,Dept of Defense (DoD) - misc,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,121119.0,440078.0,118309.0
6,HHS - Federal Hospital Insr Trust Fund,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,329452.0,304924.0,317147.0,373366.0,391453.0,107710.0
7,Unclassified,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1351.0,0.0,0.0,62355.0,225886.0,72633.0
8,Federal Salaries (EFT),43881.0,157918.0,158915.0,160909.0,164287.0,167595.0,178789.0,185190.0,194990.0,195629.0,203147.0,218451.0,229686.0,61395.0
9,Dept of Education (ED),0.0,0.0,0.0,0.0,0.0,0.0,0.0,14967.0,167212.0,221905.0,238337.0,227943.0,225893.0,59940.0


In [23]:
# Calculate the max date so far this year
max_date = withdrawals["record_date"].max()
current_year = max_date.year
days_elapsed = (max_date - pd.to_datetime(f"{current_year}-01-20")).days

# Create an empty list to store yearly spending DataFrames
yearly_spending_list = []

# Loop through each fiscal year
for year in range(2012,2026):
    # Define the start and end dates for the fiscal year
    start_date = pd.to_datetime(f"{year}-01-20")
    end_date = start_date + pd.Timedelta(days=days_elapsed)
    
    # Filter withdrawals for the given fiscal year and time period
    filtered_withdrawals = withdrawals[
        (withdrawals["record_date"] >= start_date) &
        (withdrawals["record_date"] <= end_date)
    ]
    
    # Group by transaction_catg and sum the transaction_today_amt for each category
    yearly_spending = (
        filtered_withdrawals
        .groupby("transaction_catg", as_index=False)
        .agg({"transaction_today_amt": "sum"})
        .rename(columns={"transaction_today_amt": f"total_spending_{year}"})
    )
    
    # Append the yearly spending DataFrame to the list
    yearly_spending_list.append(yearly_spending)

# Merge all yearly spending DataFrames into a single pivot table
pivot_table = yearly_spending_list[0]
for yearly_spending in yearly_spending_list[1:]:
    pivot_table = pivot_table.merge(yearly_spending, on="transaction_catg", how="outer")

# Fill NaN values with 0 for categories that don't appear in some years
pivot_table = pivot_table.fillna(0)

# add a column for increase in spending from 2024 to 2025
pivot_table["change_1yr"] = pivot_table["total_spending_2025"] - pivot_table["total_spending_2024"]

# Sort the pivot table by total spending in the most recent year (descending order)
pivot_table = pivot_table.sort_values(by=f"change_1yr", ascending=False).reset_index(drop=True)

# Save the pivot table to a CSV file for further analysis
pivot_table.to_csv("category_spending_pivot.csv", index=False)

# Display the info and first few rows of the pivot table
pivot_table.info()
# print days elapsed since jan 20
print(f"Days elapsed since January 20, 2025: {days_elapsed}")
pivot_table.head(25)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 149 entries, 0 to 148
Data columns (total 16 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   transaction_catg     149 non-null    object 
 1   total_spending_2012  149 non-null    float64
 2   total_spending_2013  149 non-null    float64
 3   total_spending_2014  149 non-null    float64
 4   total_spending_2015  149 non-null    float64
 5   total_spending_2016  149 non-null    float64
 6   total_spending_2017  149 non-null    float64
 7   total_spending_2018  149 non-null    float64
 8   total_spending_2019  149 non-null    float64
 9   total_spending_2020  149 non-null    float64
 10  total_spending_2021  149 non-null    float64
 11  total_spending_2022  149 non-null    float64
 12  total_spending_2023  149 non-null    float64
 13  total_spending_2024  149 non-null    float64
 14  total_spending_2025  149 non-null    float64
 15  change_1yr           149 non-null    flo

Unnamed: 0,transaction_catg,total_spending_2012,total_spending_2013,total_spending_2014,total_spending_2015,total_spending_2016,total_spending_2017,total_spending_2018,total_spending_2019,total_spending_2020,total_spending_2021,total_spending_2022,total_spending_2023,total_spending_2024,total_spending_2025,change_1yr
0,SSA - Benefits Payments,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,270687.0,305889.0,324972.0,360049.0,35077.0
1,Interest on Treasury Securities,0.0,54392.0,55602.0,58888.0,60654.0,61321.0,64984.0,74881.0,79783.0,74049.0,72756.0,94063.0,119036.0,147430.0,28394.0
2,HHS - Marketplace Payments,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5819.0,13112.0,17779.0,31834.0,29845.0,46501.0,16656.0
3,Unclassified,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,56788.0,72633.0,15845.0
4,HHS - Federal Supple Med Insr Trust Fund,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,68687.0,97458.0,101756.0,118456.0,124312.0,136861.0,12549.0
5,Taxes - Individual Tax Refunds (EFT),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,192035.0,213975.0,225230.0,11255.0
6,HHS - Medicare Prescription Drugs,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8231.0,22870.0,25879.0,27045.0,29251.0,40297.0,11046.0
7,Dept of Defense (DoD) - misc,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,108512.0,118309.0,9797.0
8,Dept of Veterans Affairs (VA),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15017.0,16126.0,18558.0,21173.0,21785.0,30111.0,8326.0
9,DoD - Health,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6863.0,6863.0


In [24]:
# Repeat this to get the grand totals across all of government for each year during this same time frame

# Calculate the max date so far this year
max_date = withdrawals["record_date"].max()
current_year = max_date.year
days_elapsed = (max_date - pd.to_datetime(f"{current_year}-01-20")).days

# Create a list to store total spending for each year
yearly_totals = []

# Loop through each fiscal year
for year in range(2020, 2026):
    # Define the start and end dates for the fiscal year
    start_date = pd.to_datetime(f"{year}-01-20")
    end_date = start_date + pd.Timedelta(days=days_elapsed)
    
    # Filter withdrawals for the given fiscal year and time period
    filtered_withdrawals = withdrawals[
        (withdrawals["record_date"] >= start_date) &
        (withdrawals["record_date"] <= end_date)
    ]
    
    # Calculate the total spending for the year
    total_spending = filtered_withdrawals["transaction_today_amt"].sum()
    
    # Append the total spending to the list
    yearly_totals.append({"year": year, "total_spending": total_spending})

# Convert the yearly totals to a DataFrame
yearly_totals_df = pd.DataFrame(yearly_totals)

# Save the yearly totals DataFrame to a CSV file for further analysis
yearly_totals_df.to_csv("yearly_total_spending.csv", index=False)

# Display the info and first few rows of the yearly totals DataFrame
yearly_totals_df.info()
yearly_totals_df.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 2 columns):
 #   Column          Non-Null Count  Dtype
---  ------          --------------  -----
 0   year            6 non-null      int64
 1   total_spending  6 non-null      int64
dtypes: int64(2)
memory usage: 228.0 bytes


Unnamed: 0,year,total_spending
0,2020,1865892
1,2021,2282141
2,2022,1930719
3,2023,3873922
4,2024,1882985
5,2025,2081489
