In [1]:
import pandas as pd
import datawrapper as dw
import requests

In [2]:
# Load the latest clean withdrawals data following update
withdrawals_combined_file_path = "data/withdrawals.parquet"
withdrawals = pd.read_parquet(withdrawals_combined_file_path)

# Quick show of info, head and date range of record_date
print(withdrawals.info())
print(withdrawals.head())
print(withdrawals['record_date'].min(), withdrawals['record_date'].max())

<class 'pandas.core.frame.DataFrame'>
Index: 183312 entries, 0 to 187583
Data columns (total 17 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   record_date              183312 non-null  datetime64[ns]
 1   account_type             183312 non-null  object        
 2   transaction_type         183312 non-null  object        
 3   transaction_catg         183312 non-null  object        
 4   transaction_catg_desc    183312 non-null  object        
 5   transaction_today_amt    183312 non-null  int64         
 6   transaction_mtd_amt      183312 non-null  object        
 7   transaction_fytd_amt     183312 non-null  object        
 8   table_nbr                183312 non-null  object        
 9   table_nm                 183312 non-null  object        
 10  src_line_nbr             183312 non-null  object        
 11  record_fiscal_year       183312 non-null  object        
 12  record_fiscal_quarter

In [3]:
## THIS IS WHERE WE NEED TO INSERT OUR CROSSWALK AND RECATEGORIZE DATA TO SMOOTH OUT INCONSISTENCIES IN CATEGORY NAMES ACROSS YEARS


In [4]:
# Quick report on this year so far
# Filter for spending between Jan. 20, 2025, and the most recent date
filtered_withdrawals = withdrawals[
    (withdrawals["record_date"] >= "2025-01-20") & 
    (withdrawals["record_date"] <= pd.Timestamp.now())
]

# Group by transaction_catg and sum the transaction_today_amt for each category
category_spending = (
    filtered_withdrawals
    .groupby("transaction_catg", as_index=False)
    .agg({"transaction_today_amt": "sum"})
    .rename(columns={"transaction_today_amt": "total_spending"})
)

# Sort by total_spending in descending order
category_spending = category_spending.sort_values(by="total_spending", ascending=False).reset_index(drop=True)

# Display the info and first few rows of the category spending DataFrame
print(category_spending.info())
print(category_spending.head(10))

total_spending_2025 = category_spending["total_spending"].sum()
print(f"Total spending in Trump Administration since Jan. 20: ${total_spending_2025:,.2f}")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 101 entries, 0 to 100
Data columns (total 2 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   transaction_catg  101 non-null    object
 1   total_spending    101 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 1.7+ KB
None
                           transaction_catg  total_spending
0                   SSA - Benefits Payments          360369
1      Taxes - Individual Tax Refunds (EFT)          227647
2       HHS - Grants to States for Medicaid          169611
3           Interest on Treasury Securities          147433
4  HHS - Federal Supple Med Insr Trust Fund          137922
5              Dept of Defense (DoD) - misc          120887
6    HHS - Federal Hospital Insr Trust Fund          108957
7                              Unclassified           74466
8                    Federal Salaries (EFT)           61474
9                    Dept of Education (ED)           603

In [5]:
# REMEMBER THIS IS FOR CALENDAR YEAR TOTALS 
# FOR SOME INTEGRITY AND SANITY CHECKS
# Create an empty list to store yearly spending DataFrames IN CALENDAR YEARS
yearly_spending_list = []

# Loop through each CALENDAR year
for year in range(2013, 2026):
    # Filter withdrawals for the given CALENDAR year using record_calendar_year
    filtered_withdrawals = withdrawals[withdrawals["record_calendar_year"] == str(year)]
    
    # Group by transaction_catg and transaction_catg_desc, and sum the transaction_today_amt for each category
    yearly_spending = (
        filtered_withdrawals
        .groupby(["transaction_catg", "transaction_catg_desc"], as_index=False)
        .agg({"transaction_today_amt": "sum"})
        .rename(columns={"transaction_today_amt": f"total_{year}"})
    )
    
    # Append the yearly spending DataFrame to the list
    yearly_spending_list.append(yearly_spending)

# Merge all yearly spending DataFrames into a single pivot table
total_annual_spending = yearly_spending_list[0]
for yearly_spending in yearly_spending_list[1:]:
    total_annual_spending = total_annual_spending.merge(
        yearly_spending, 
        on=["transaction_catg", "transaction_catg_desc"], 
        how="outer"
    )

# Fill NaN values with 0 for categories that don't appear in some years
total_annual_spending = total_annual_spending.fillna(0)

# Sort the pivot table by total spending in the most recent year (descending order) for the most recent full year
total_annual_spending = total_annual_spending.sort_values(by=f"total_2024", ascending=False).reset_index(drop=True)

# Save the pivot table to a CSV file for further analysis
total_annual_spending.to_csv("output/total_annual_spending.csv", index=False)

# Display the info and first few rows of the pivot table
total_annual_spending.info()
total_annual_spending.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 213 entries, 0 to 212
Data columns (total 15 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   transaction_catg       213 non-null    object 
 1   transaction_catg_desc  213 non-null    object 
 2   total_2013             213 non-null    float64
 3   total_2014             213 non-null    float64
 4   total_2015             213 non-null    float64
 5   total_2016             213 non-null    float64
 6   total_2017             213 non-null    float64
 7   total_2018             213 non-null    float64
 8   total_2019             213 non-null    float64
 9   total_2020             213 non-null    float64
 10  total_2021             213 non-null    float64
 11  total_2022             213 non-null    float64
 12  total_2023             213 non-null    float64
 13  total_2024             213 non-null    float64
 14  total_2025             213 non-null    float64
dtypes: flo

Unnamed: 0,transaction_catg,transaction_catg_desc,total_2013,total_2014,total_2015,total_2016,total_2017,total_2018,total_2019,total_2020,total_2021,total_2022,total_2023,total_2024,total_2025
0,SSA - Benefits Payments,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,464452.0,1092176.0,1234004.0,1314180.0,444790.0
1,HHS - Grants to States for Medicaid,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,396657.0,528138.0,594407.0,612060.0,627333.0,202643.0
2,HHS - Federal Supple Med Insr Trust Fund,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,336829.0,399965.0,432463.0,497846.0,542064.0,172799.0
3,Interest on Treasury Securities,,218238.0,226243.0,235495.0,237461.0,248086.0,276158.0,306855.0,311758.0,292967.0,303270.0,406904.0,534805.0,157181.0
4,Dept of Defense (DoD) - misc,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,121119.0,459096.0,139795.0


In [6]:
# Calculate the max date so far this year
max_date = withdrawals["record_date"].max()
current_year = max_date.year
days_elapsed = (max_date - pd.to_datetime(f"{current_year}-01-20")).days

# Create an empty list to store yearly spending DataFrames
yearly_spending_list = []

# Loop through each fiscal year
for year in range(2013,2026):
    # Define the start and end dates for the fiscal year
    start_date = pd.to_datetime(f"{year}-01-20")
    end_date = start_date + pd.Timedelta(days=days_elapsed)
    
    # Filter withdrawals for the given fiscal year and time period
    filtered_withdrawals = withdrawals[
        (withdrawals["record_date"] >= start_date) &
        (withdrawals["record_date"] <= end_date)
    ]
    
    # Group by transaction_catg and sum the transaction_today_amt for each category
    yearly_spending = (
        filtered_withdrawals
        .groupby("transaction_catg", as_index=False)
        .agg({"transaction_today_amt": "sum"})
        .rename(columns={"transaction_today_amt": f"total_spending_{year}"})
    )
    
    # Append the yearly spending DataFrame to the list
    yearly_spending_list.append(yearly_spending)

# Merge all yearly spending DataFrames into a single pivot table
first_days_spending = yearly_spending_list[0]
for yearly_spending in yearly_spending_list[1:]:
    first_days_spending = first_days_spending.merge(yearly_spending, on="transaction_catg", how="outer")

# Fill NaN values with 0 for categories that don't appear in some years
first_days_spending = first_days_spending.fillna(0)

# add a column for increase in spending from 2024 to 2025
first_days_spending["change_1yr"] = first_days_spending["total_spending_2025"] - first_days_spending["total_spending_2024"]

# Sort the pivot table by total spending in the most recent year (descending order)
first_days_spending = first_days_spending.sort_values(by=f"change_1yr", ascending=False).reset_index(drop=True)

# Save the pivot table to a CSV file for further analysis
first_days_spending.to_csv("output/yearly_first_days_bycategory.csv", index=False)

# Display the info and first few rows of the pivot table
first_days_spending.info()
# print days elapsed since jan 20
print(f"Days elapsed since January 20, 2025: {days_elapsed}")
first_days_spending.head(20)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 148 entries, 0 to 147
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   transaction_catg     148 non-null    object 
 1   total_spending_2013  148 non-null    float64
 2   total_spending_2014  148 non-null    float64
 3   total_spending_2015  148 non-null    float64
 4   total_spending_2016  148 non-null    float64
 5   total_spending_2017  148 non-null    float64
 6   total_spending_2018  148 non-null    float64
 7   total_spending_2019  148 non-null    float64
 8   total_spending_2020  148 non-null    float64
 9   total_spending_2021  148 non-null    float64
 10  total_spending_2022  148 non-null    float64
 11  total_spending_2023  148 non-null    float64
 12  total_spending_2024  148 non-null    float64
 13  total_spending_2025  148 non-null    float64
 14  change_1yr           148 non-null    float64
dtypes: float64(14), object(1)
memory usage: 

Unnamed: 0,transaction_catg,total_spending_2013,total_spending_2014,total_spending_2015,total_spending_2016,total_spending_2017,total_spending_2018,total_spending_2019,total_spending_2020,total_spending_2021,total_spending_2022,total_spending_2023,total_spending_2024,total_spending_2025,change_1yr
0,SSA - Benefits Payments,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,270752.0,305889.0,324972.0,360369.0,35397.0
1,Interest on Treasury Securities,54393.0,55602.0,58888.0,60654.0,61321.0,64984.0,74881.0,79783.0,74049.0,72757.0,94063.0,119036.0,147433.0,28397.0
2,Unclassified,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,56788.0,74466.0,17678.0
3,HHS - Marketplace Payments,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5819.0,18313.0,24229.0,31834.0,29845.0,46501.0,16656.0
4,Taxes - Individual Tax Refunds (EFT),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,192035.0,213975.0,227647.0,13672.0
5,HHS - Federal Supple Med Insr Trust Fund,0.0,0.0,0.0,0.0,0.0,0.0,0.0,72267.0,97997.0,102508.0,118456.0,124312.0,137922.0,13610.0
6,Dept of Defense (DoD) - misc,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,108512.0,120887.0,12375.0
7,HHS - Medicare Prescription Drugs,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8231.0,22870.0,25887.0,27045.0,29251.0,40297.0,11046.0
8,Dept of Veterans Affairs (VA),0.0,0.0,0.0,0.0,0.0,0.0,0.0,15280.0,16424.0,18781.0,21173.0,21785.0,30562.0,8777.0
9,HHS - Grants to States for Medicaid,0.0,0.0,0.0,0.0,0.0,0.0,0.0,58838.0,131971.0,153938.0,165958.0,161582.0,169611.0,8029.0


In [7]:
# Repeat this to get the grand totals across all of government for each year during this same time frame

# Calculate the max date so far this year
max_date = withdrawals["record_date"].max()
current_year = max_date.year
days_elapsed = (max_date - pd.to_datetime(f"{current_year}-01-20")).days

# Create a list to store total spending for each year
yearly_totals = []

# Loop through each fiscal year
for year in range(2013, 2026):
    # Define the start and end dates for the fiscal year
    start_date = pd.to_datetime(f"{year}-01-20")
    end_date = start_date + pd.Timedelta(days=days_elapsed)
    
    # Filter withdrawals for the given fiscal year and time period
    filtered_withdrawals = withdrawals[
        (withdrawals["record_date"] >= start_date) &
        (withdrawals["record_date"] <= end_date)
    ]
    
    # Calculate the total spending for the year
    total_spending = filtered_withdrawals["transaction_today_amt"].sum()
    
    # Append the total spending to the list
    yearly_totals.append({"year": year, "total_spending": total_spending})

# Convert the yearly totals to a DataFrame
yearly_first_days_totals = pd.DataFrame(yearly_totals)

# Save the yearly totals DataFrame to a CSV file for further analysis
yearly_first_days_totals.to_csv("output/yearly_first_days_totals.csv", index=False)

# Display the info and first few rows of the yearly totals DataFrame
yearly_first_days_totals.info()
yearly_first_days_totals.head(20)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13 entries, 0 to 12
Data columns (total 2 columns):
 #   Column          Non-Null Count  Dtype
---  ------          --------------  -----
 0   year            13 non-null     int64
 1   total_spending  13 non-null     int64
dtypes: int64(2)
memory usage: 340.0 bytes


Unnamed: 0,year,total_spending
0,2013,1203918
1,2014,1222337
2,2015,1310908
3,2016,1326053
4,2017,1308815
5,2018,1357376
6,2019,1449014
7,2020,1902640
8,2021,2305574
9,2022,1852300
