In [18]:
import pandas as pd
import datawrapper as dw
import requests

In [19]:
# Load the latest clean withdrawals data following update
withdrawals_combined_file_path = "data/withdrawals.parquet"
withdrawals = pd.read_parquet(withdrawals_combined_file_path)

# Quick show of info, head and date range of record_date
print(withdrawals.info())
print(withdrawals.head())
print(withdrawals['record_date'].min(), withdrawals['record_date'].max())

<class 'pandas.core.frame.DataFrame'>
Index: 183576 entries, 0 to 187480
Data columns (total 17 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   record_date              183576 non-null  datetime64[ns]
 1   account_type             183576 non-null  object        
 2   transaction_type         183576 non-null  object        
 3   transaction_catg         183576 non-null  object        
 4   transaction_catg_desc    183576 non-null  object        
 5   transaction_today_amt    183576 non-null  int64         
 6   transaction_mtd_amt      183576 non-null  object        
 7   transaction_fytd_amt     183576 non-null  object        
 8   table_nbr                183576 non-null  object        
 9   table_nm                 183576 non-null  object        
 10  src_line_nbr             183576 non-null  object        
 11  record_fiscal_year       183576 non-null  object        
 12  record_fiscal_quarter

In [20]:
# Filter for spending between Jan. 20, 2025, and the most recent date
filtered_withdrawals = withdrawals[
    (withdrawals["record_date"] >= "2025-01-20") & 
    (withdrawals["record_date"] <= pd.Timestamp.now())
]

# Group by transaction_catg and sum the transaction_today_amt for each category
category_spending = (
    filtered_withdrawals
    .groupby("transaction_catg", as_index=False)
    .agg({"transaction_today_amt": "sum"})
    .rename(columns={"transaction_today_amt": "total_spending"})
)

# Sort by total_spending in descending order
category_spending = category_spending.sort_values(by="total_spending", ascending=False).reset_index(drop=True)

# Display the info and first few rows of the category spending DataFrame
print(category_spending.info())
print(category_spending.head())

total_spending_2025 = category_spending["total_spending"].sum()
print(f"Total spending in Trump Administration since Jan. 20: ${total_spending_2025:,.2f}")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 101 entries, 0 to 100
Data columns (total 2 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   transaction_catg  101 non-null    object
 1   total_spending    101 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 1.7+ KB
None
                           transaction_catg  total_spending
0                   SSA - Benefits Payments          360049
1      Taxes - Individual Tax Refunds (EFT)          225230
2       HHS - Grants to States for Medicaid          164800
3           Interest on Treasury Securities          147430
4  HHS - Federal Supple Med Insr Trust Fund          136861
Total spending in Trump Administration since Jan. 20: $2,081,489.00


In [21]:
# Create an empty list to store yearly spending DataFrames IN CALENDAR YEARS
yearly_spending_list = []

# Loop through each fiscal year
for year in range(2013,2026):
    # Define the start and end dates for the fiscal year
    start_date = pd.to_datetime(f"{year}-01-01")
    end_date = pd.to_datetime(f"{year}-12-31")
    
    # Filter withdrawals for the given fiscal year
    filtered_withdrawals = withdrawals[
        (withdrawals["record_date"] >= start_date) &
        (withdrawals["record_date"] <= end_date)
    ]
    
    # Group by transaction_catg and sum the transaction_today_amt for each category
    yearly_spending = (
        filtered_withdrawals
        .groupby("transaction_catg", as_index=False)
        .agg({"transaction_today_amt": "sum"})
        .rename(columns={"transaction_today_amt": f"total_spending_{year}"})
    )
    
    # Append the yearly spending DataFrame to the list
    yearly_spending_list.append(yearly_spending)

# Merge all yearly spending DataFrames into a single pivot table
total_annual_spending = yearly_spending_list[0]
for yearly_spending in yearly_spending_list[1:]:
    total_annual_spending = total_annual_spending.merge(yearly_spending, on="transaction_catg", how="outer")

# Fill NaN values with 0 for categories that don't appear in some years
total_annual_spending = total_annual_spending.fillna(0)

# Sort the pivot table by total spending in the most recent year (descending order) for the most recent full year
total_annual_spending = total_annual_spending.sort_values(by=f"total_spending_2024", ascending=False).reset_index(drop=True)

# Save the pivot table to a CSV file for further analysis
total_annual_spending.to_csv("output/total_annual_spending.csv", index=False)

# Display the info and first few rows of the pivot table
total_annual_spending.info()
total_annual_spending.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 157 entries, 0 to 156
Data columns (total 14 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   transaction_catg     157 non-null    object 
 1   total_spending_2013  157 non-null    float64
 2   total_spending_2014  157 non-null    float64
 3   total_spending_2015  157 non-null    float64
 4   total_spending_2016  157 non-null    float64
 5   total_spending_2017  157 non-null    float64
 6   total_spending_2018  157 non-null    float64
 7   total_spending_2019  157 non-null    float64
 8   total_spending_2020  157 non-null    float64
 9   total_spending_2021  157 non-null    float64
 10  total_spending_2022  157 non-null    float64
 11  total_spending_2023  157 non-null    float64
 12  total_spending_2024  157 non-null    float64
 13  total_spending_2025  157 non-null    float64
dtypes: float64(13), object(1)
memory usage: 17.3+ KB


Unnamed: 0,transaction_catg,total_spending_2013,total_spending_2014,total_spending_2015,total_spending_2016,total_spending_2017,total_spending_2018,total_spending_2019,total_spending_2020,total_spending_2021,total_spending_2022,total_spending_2023,total_spending_2024,total_spending_2025
0,SSA - Benefits Payments,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,464452.0,1092176.0,1234004.0,1314180.0,444470.0
1,HHS - Grants to States for Medicaid,0.0,0.0,0.0,0.0,0.0,0.0,0.0,396657.0,528138.0,594407.0,612060.0,627333.0,197832.0
2,HHS - Federal Supple Med Insr Trust Fund,0.0,0.0,0.0,0.0,0.0,0.0,0.0,336829.0,399965.0,432463.0,497846.0,542064.0,171738.0
3,Interest on Treasury Securities,218238.0,226243.0,235495.0,237461.0,248086.0,276158.0,306855.0,311758.0,292967.0,303270.0,406904.0,534805.0,157178.0
4,Dept of Defense (DoD) - misc,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,121119.0,459096.0,137217.0


In [22]:
# Calculate the max date so far this year
max_date = withdrawals["record_date"].max()
current_year = max_date.year
days_elapsed = (max_date - pd.to_datetime(f"{current_year}-01-20")).days

# Create an empty list to store yearly spending DataFrames
yearly_spending_list = []

# Loop through each fiscal year
for year in range(2012,2026):
    # Define the start and end dates for the fiscal year
    start_date = pd.to_datetime(f"{year}-01-20")
    end_date = start_date + pd.Timedelta(days=days_elapsed)
    
    # Filter withdrawals for the given fiscal year and time period
    filtered_withdrawals = withdrawals[
        (withdrawals["record_date"] >= start_date) &
        (withdrawals["record_date"] <= end_date)
    ]
    
    # Group by transaction_catg and sum the transaction_today_amt for each category
    yearly_spending = (
        filtered_withdrawals
        .groupby("transaction_catg", as_index=False)
        .agg({"transaction_today_amt": "sum"})
        .rename(columns={"transaction_today_amt": f"total_spending_{year}"})
    )
    
    # Append the yearly spending DataFrame to the list
    yearly_spending_list.append(yearly_spending)

# Merge all yearly spending DataFrames into a single pivot table
first_days_spending = yearly_spending_list[0]
for yearly_spending in yearly_spending_list[1:]:
    first_days_spending = first_days_spending.merge(yearly_spending, on="transaction_catg", how="outer")

# Fill NaN values with 0 for categories that don't appear in some years
first_days_spending = first_days_spending.fillna(0)

# add a column for increase in spending from 2024 to 2025
first_days_spending["change_1yr"] = first_days_spending["total_spending_2025"] - first_days_spending["total_spending_2024"]

# Sort the pivot table by total spending in the most recent year (descending order)
first_days_spending = first_days_spending.sort_values(by=f"change_1yr", ascending=False).reset_index(drop=True)

# Save the pivot table to a CSV file for further analysis
first_days_spending.to_csv("output/yearly_first_days_bycategory.csv", index=False)

# Display the info and first few rows of the pivot table
first_days_spending.info()
# print days elapsed since jan 20
print(f"Days elapsed since January 20, 2025: {days_elapsed}")
first_days_spending.head(25)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 149 entries, 0 to 148
Data columns (total 16 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   transaction_catg     149 non-null    object 
 1   total_spending_2012  149 non-null    float64
 2   total_spending_2013  149 non-null    float64
 3   total_spending_2014  149 non-null    float64
 4   total_spending_2015  149 non-null    float64
 5   total_spending_2016  149 non-null    float64
 6   total_spending_2017  149 non-null    float64
 7   total_spending_2018  149 non-null    float64
 8   total_spending_2019  149 non-null    float64
 9   total_spending_2020  149 non-null    float64
 10  total_spending_2021  149 non-null    float64
 11  total_spending_2022  149 non-null    float64
 12  total_spending_2023  149 non-null    float64
 13  total_spending_2024  149 non-null    float64
 14  total_spending_2025  149 non-null    float64
 15  change_1yr           149 non-null    flo

Unnamed: 0,transaction_catg,total_spending_2012,total_spending_2013,total_spending_2014,total_spending_2015,total_spending_2016,total_spending_2017,total_spending_2018,total_spending_2019,total_spending_2020,total_spending_2021,total_spending_2022,total_spending_2023,total_spending_2024,total_spending_2025,change_1yr
0,SSA - Benefits Payments,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,270687.0,305889.0,324972.0,360049.0,35077.0
1,Interest on Treasury Securities,0.0,54392.0,55602.0,58888.0,60654.0,61321.0,64984.0,74881.0,79783.0,74049.0,72756.0,94063.0,119036.0,147430.0,28394.0
2,HHS - Marketplace Payments,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5819.0,13112.0,17779.0,31834.0,29845.0,46501.0,16656.0
3,Unclassified,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,56788.0,72633.0,15845.0
4,HHS - Federal Supple Med Insr Trust Fund,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,68687.0,97458.0,101756.0,118456.0,124312.0,136861.0,12549.0
5,Taxes - Individual Tax Refunds (EFT),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,192035.0,213975.0,225230.0,11255.0
6,HHS - Medicare Prescription Drugs,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8231.0,22870.0,25879.0,27045.0,29251.0,40297.0,11046.0
7,Dept of Defense (DoD) - misc,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,108512.0,118309.0,9797.0
8,Dept of Veterans Affairs (VA),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15017.0,16126.0,18558.0,21173.0,21785.0,30111.0,8326.0
9,DoD - Health,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6863.0,6863.0


In [23]:
# Repeat this to get the grand totals across all of government for each year during this same time frame

# Calculate the max date so far this year
max_date = withdrawals["record_date"].max()
current_year = max_date.year
days_elapsed = (max_date - pd.to_datetime(f"{current_year}-01-20")).days

# Create a list to store total spending for each year
yearly_totals = []

# Loop through each fiscal year
for year in range(2013, 2026):
    # Define the start and end dates for the fiscal year
    start_date = pd.to_datetime(f"{year}-01-20")
    end_date = start_date + pd.Timedelta(days=days_elapsed)
    
    # Filter withdrawals for the given fiscal year and time period
    filtered_withdrawals = withdrawals[
        (withdrawals["record_date"] >= start_date) &
        (withdrawals["record_date"] <= end_date)
    ]
    
    # Calculate the total spending for the year
    total_spending = filtered_withdrawals["transaction_today_amt"].sum()
    
    # Append the total spending to the list
    yearly_totals.append({"year": year, "total_spending": total_spending})

# Convert the yearly totals to a DataFrame
yearly_first_days_totals = pd.DataFrame(yearly_totals)

# Save the yearly totals DataFrame to a CSV file for further analysis
yearly_first_days_totals.to_csv("output/yearly_first_days_totals.csv", index=False)

# Display the info and first few rows of the yearly totals DataFrame
yearly_first_days_totals.info()
yearly_first_days_totals.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13 entries, 0 to 12
Data columns (total 2 columns):
 #   Column          Non-Null Count  Dtype
---  ------          --------------  -----
 0   year            13 non-null     int64
 1   total_spending  13 non-null     int64
dtypes: int64(2)
memory usage: 340.0 bytes


Unnamed: 0,year,total_spending
0,2013,1193542
1,2014,1210070
2,2015,1282409
3,2016,1311238
4,2017,1308815
5,2018,1357376
6,2019,1430332
7,2020,1865892
8,2021,2282141
9,2022,1930719
