In [1]:
import pandas as pd
import datawrapper as dw
import requests

In [2]:
# Load the latest clean withdrawals data from Treasury API following update
withdrawals_combined_file_path = "data/withdrawals.parquet"
withdrawals = pd.read_parquet(withdrawals_combined_file_path)

# Quick show of date range, info, head and date range of record_date
print(f"Date range of this data: from {withdrawals['record_date'].min()} to {withdrawals['record_date'].max()}")
withdrawals.info()
withdrawals.head(10)


Date range of this data: from 2012-10-01 00:00:00 to 2025-05-02 00:00:00
<class 'pandas.core.frame.DataFrame'>
Index: 184120 entries, 0 to 188407
Data columns (total 17 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   record_date              184120 non-null  datetime64[ns]
 1   account_type             184120 non-null  object        
 2   transaction_type         184120 non-null  object        
 3   transaction_catg         184120 non-null  object        
 4   transaction_catg_desc    184120 non-null  object        
 5   transaction_today_amt    184120 non-null  int64         
 6   transaction_mtd_amt      184120 non-null  object        
 7   transaction_fytd_amt     184120 non-null  object        
 8   table_nbr                184120 non-null  object        
 9   table_nm                 184120 non-null  object        
 10  src_line_nbr             184120 non-null  object        
 11  record_fis

Unnamed: 0,record_date,account_type,transaction_type,transaction_catg,transaction_catg_desc,transaction_today_amt,transaction_mtd_amt,transaction_fytd_amt,table_nbr,table_nm,src_line_nbr,record_fiscal_year,record_fiscal_quarter,record_calendar_year,record_calendar_quarter,record_calendar_month,record_calendar_day
0,2012-10-01,Federal Reserve Account,Withdrawals,Commodity Credit Corporation programs,,39,39,39,II,Deposits and Withdrawals of Operating Cash,33,2013,1,2012,4,10,1
1,2012-10-01,Federal Reserve Account,Withdrawals,Temporary Assistance for Needy Families (HHS),,31,31,31,II,Deposits and Withdrawals of Operating Cash,56,2013,1,2012,4,10,1
2,2012-10-01,Federal Reserve Account,Withdrawals,Transportation Security Admin. (DHS),,0,0,0,II,Deposits and Withdrawals of Operating Cash,57,2013,1,2012,4,10,1
3,2012-10-01,Federal Reserve Account,Withdrawals,Unemployment Insurance Benefits,,210,210,210,II,Deposits and Withdrawals of Operating Cash,58,2013,1,2012,4,10,1
4,2012-10-01,Federal Reserve Account,Withdrawals,Veterans Affairs programs,,491,491,491,II,Deposits and Withdrawals of Operating Cash,59,2013,1,2012,4,10,1
5,2012-10-01,Federal Reserve Account,Withdrawals,Other Withdrawals,Civil Service Retirement (EFT),4892,4892,4892,II,Deposits and Withdrawals of Operating Cash,60,2013,1,2012,4,10,1
6,2012-10-01,Federal Reserve Account,Withdrawals,Other Withdrawals,Emergency Prep & Response (DHS),63,63,63,II,Deposits and Withdrawals of Operating Cash,61,2013,1,2012,4,10,1
7,2012-10-01,Federal Reserve Account,Withdrawals,Other Withdrawals,Federal Financing Bank,113,113,113,II,Deposits and Withdrawals of Operating Cash,63,2013,1,2012,4,10,1
8,2012-10-01,Federal Reserve Account,Withdrawals,Other Withdrawals,International Monetary Fund,100,100,100,II,Deposits and Withdrawals of Operating Cash,64,2013,1,2012,4,10,1
9,2012-10-01,Federal Reserve Account,Withdrawals,Supple. Nutrition Assist. Program (SNAP),,24,24,24,II,Deposits and Withdrawals of Operating Cash,55,2013,1,2012,4,10,1


In [3]:
# Read category_crosswalk.csv
category_crosswalk_file_path = "data/category_crosswalk.csv"
category_crosswalk = pd.read_csv(category_crosswalk_file_path, keep_default_na=False)
# Display the info and first few rows of the crosswalk DataFrame
category_crosswalk.info()
category_crosswalk.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 214 entries, 0 to 213
Data columns (total 5 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   transaction_catg       214 non-null    object
 1   transaction_catg_desc  214 non-null    object
 2   category               214 non-null    object
 3   agency                 214 non-null    object
 4   cbs_notes              214 non-null    object
dtypes: object(5)
memory usage: 8.5+ KB


Unnamed: 0,transaction_catg,transaction_catg_desc,category,agency,cbs_notes
0,District of Columbia,,District of Columbia,DC,
1,Other Withdrawals,District of Columbia,District of Columbia,DC,
2,DHS - Customs & Border Protection (CBP),,Customs & Border Protection,DHS,
3,Other Withdrawals,Customs and Borders Protection (DHS),Customs & Border Protection,DHS,
4,Dept of Homeland Security (DHS) - misc,,Department of Homeland Security,DHS,
5,DHS - Fed Emergency Mgmt Agency (FEMA),,Federal Emergency Management Agency,DHS,
6,Other Withdrawals,Emergency Prep & Response (DHS),Federal Emergency Management Agency,DHS,
7,Unemployment Assist - FEMA Lost Wage Pmt,,FEMA Lost Wages Payments,DHS,
8,DHS - Transportation Security Admn (TSA),,Transportation Security Administration,DHS,
9,Transportation Security Admin. (DHS),,Transportation Security Administration,DHS,


In [4]:
# Merge data with crosswalk to standardize categories by adding new columns
withdrawals = withdrawals.merge(
    category_crosswalk, 
    how='left', 
    left_on=['transaction_catg', 'transaction_catg_desc'], 
    right_on=['transaction_catg', 'transaction_catg_desc']
)

# Display the info and first few rows of the updated DataFrame
withdrawals.info()
withdrawals.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 184120 entries, 0 to 184119
Data columns (total 20 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   record_date              184120 non-null  datetime64[ns]
 1   account_type             184120 non-null  object        
 2   transaction_type         184120 non-null  object        
 3   transaction_catg         184120 non-null  object        
 4   transaction_catg_desc    184120 non-null  object        
 5   transaction_today_amt    184120 non-null  int64         
 6   transaction_mtd_amt      184120 non-null  object        
 7   transaction_fytd_amt     184120 non-null  object        
 8   table_nbr                184120 non-null  object        
 9   table_nm                 184120 non-null  object        
 10  src_line_nbr             184120 non-null  object        
 11  record_fiscal_year       184120 non-null  object        
 12  record_fiscal_qu

Unnamed: 0,record_date,account_type,transaction_type,transaction_catg,transaction_catg_desc,transaction_today_amt,transaction_mtd_amt,transaction_fytd_amt,table_nbr,table_nm,src_line_nbr,record_fiscal_year,record_fiscal_quarter,record_calendar_year,record_calendar_quarter,record_calendar_month,record_calendar_day,category,agency,cbs_notes
0,2012-10-01,Federal Reserve Account,Withdrawals,Commodity Credit Corporation programs,,39,39,39,II,Deposits and Withdrawals of Operating Cash,33,2013,1,2012,4,10,1,USDA Commodity Credit Corporation,USDA,
1,2012-10-01,Federal Reserve Account,Withdrawals,Temporary Assistance for Needy Families (HHS),,31,31,31,II,Deposits and Withdrawals of Operating Cash,56,2013,1,2012,4,10,1,Temporary Assistance for Needy Families,HHS,
2,2012-10-01,Federal Reserve Account,Withdrawals,Transportation Security Admin. (DHS),,0,0,0,II,Deposits and Withdrawals of Operating Cash,57,2013,1,2012,4,10,1,Transportation Security Administration,DHS,
3,2012-10-01,Federal Reserve Account,Withdrawals,Unemployment Insurance Benefits,,210,210,210,II,Deposits and Withdrawals of Operating Cash,58,2013,1,2012,4,10,1,Unemployment Insurance Benefits,DOL,
4,2012-10-01,Federal Reserve Account,Withdrawals,Veterans Affairs programs,,491,491,491,II,Deposits and Withdrawals of Operating Cash,59,2013,1,2012,4,10,1,Department of Veterans Affairs,VA,
5,2012-10-01,Federal Reserve Account,Withdrawals,Other Withdrawals,Civil Service Retirement (EFT),4892,4892,4892,II,Deposits and Withdrawals of Operating Cash,60,2013,1,2012,4,10,1,Civil Service Retirement & Disability,OPM,
6,2012-10-01,Federal Reserve Account,Withdrawals,Other Withdrawals,Emergency Prep & Response (DHS),63,63,63,II,Deposits and Withdrawals of Operating Cash,61,2013,1,2012,4,10,1,Federal Emergency Management Agency,DHS,
7,2012-10-01,Federal Reserve Account,Withdrawals,Other Withdrawals,Federal Financing Bank,113,113,113,II,Deposits and Withdrawals of Operating Cash,63,2013,1,2012,4,10,1,Federal Financing Bank,Treasury,
8,2012-10-01,Federal Reserve Account,Withdrawals,Other Withdrawals,International Monetary Fund,100,100,100,II,Deposits and Withdrawals of Operating Cash,64,2013,1,2012,4,10,1,International Monetary Fund,IMF,
9,2012-10-01,Federal Reserve Account,Withdrawals,Supple. Nutrition Assist. Program (SNAP),,24,24,24,II,Deposits and Withdrawals of Operating Cash,55,2013,1,2012,4,10,1,Supplemental Nutrition Assistance Program (SNAP),USDA,


In [5]:
# count instances of each category in withdrawals and 
category_counts = withdrawals['category'].value_counts().reset_index()
category_counts.columns = ['category', 'count']
# Display the info and first few rows of the category counts DataFrame
category_counts.info()
category_counts.head(10)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 123 entries, 0 to 122
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   category  123 non-null    object
 1   count     123 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 2.1+ KB


Unnamed: 0,category,count
0,Medicare,8412
1,Excluded Transfers Should Be Empty,4796
2,USDA Commodity Credit Corporation,3161
3,Interest on Treasury Securities,3161
4,Federal Employees Insurance Payments,3161
5,Federal Highway Administration,3161
6,Federal Employee Salaries,3161
7,Food and Nutrition Service / Child Nutrition,3161
8,General Services Administration,3161
9,Department of Education,3161


In [6]:
# Quick report on this year so far
# Filter for spending between Jan. 20, 2025, and the most recent date
filtered_withdrawals = withdrawals[
    (withdrawals["record_date"] >= "2025-01-20") & 
    (withdrawals["record_date"] <= pd.Timestamp.now())
]

# Group by transaction_catg and sum the transaction_today_amt for each category
category_spending = (
    filtered_withdrawals
    .groupby("category", as_index=False)
    .agg({"transaction_today_amt": "sum"})
    .rename(columns={"transaction_today_amt": "total_spending"})
)

# Sort by total_spending in descending order
category_spending = category_spending.sort_values(by="total_spending", ascending=False).reset_index(drop=True)

# Display the info and first few rows of the category spending DataFrame
print(category_spending.info())
print(category_spending.head(10))

total_spending_2025 = category_spending["total_spending"].sum()
print(f"Total spending in Trump Administration since Jan. 20: ${total_spending_2025:,.2f}")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 96 entries, 0 to 95
Data columns (total 2 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   category        96 non-null     object
 1   total_spending  96 non-null     int64 
dtypes: int64(1), object(1)
memory usage: 1.6+ KB
None
                           category  total_spending
0  Social Security Benefit Payments          416174
1                          Medicare          373271
2            Individual Tax Refunds          253079
3                          Medicaid          189358
4   Interest on Treasury Securities          175154
5             Department of Defense          133359
6                      Unclassified           87171
7         Federal Employee Salaries           71308
8         Veterans Affairs Benefits           68001
9           Department of Education           64469
Total spending in Trump Administration since Jan. 20: $2,430,436.00


In [7]:
# REMEMBER THIS IS FOR CALENDAR YEAR TOTALS 
# FOR INTEGRITY AND SANITY CHECKS

# Create an empty list to store yearly spending DataFrames IN CALENDAR YEARS
yearly_spending_list = []

# Loop through each CALENDAR year
for year in range(2013, 2026):
    # Filter withdrawals for the given CALENDAR year using record_calendar_year
    filtered_withdrawals = withdrawals[withdrawals["record_calendar_year"] == str(year)]
    
    # Group by transaction_catg and transaction_catg_desc, and sum the transaction_today_amt for each category
    yearly_spending = (
        filtered_withdrawals
        .groupby(["category","agency"], as_index=False)
        .agg({"transaction_today_amt": "sum"})
        .rename(columns={"transaction_today_amt": f"cy_total_{year}"})
    )
    
    # Append the yearly spending DataFrame to the list
    yearly_spending_list.append(yearly_spending)

# Merge all yearly spending DataFrames into a single pivot table
total_cy_spending = yearly_spending_list[0]
for yearly_spending in yearly_spending_list[1:]:
    total_cy_spending = total_cy_spending.merge(
        yearly_spending, 
        on=["category", "agency"], 
        how="outer"
    )

# Fill NaN values with 0 for categories that don't appear in some years
total_cy_spending = total_cy_spending.fillna(0)

# Sort the pivot table by total spending in the most recent year (descending order) for the most recent full year
total_cy_spending = total_cy_spending.sort_values(by=f"cy_total_2024", ascending=False).reset_index(drop=True)

# Save the pivot table to a CSV file for further analysis
total_cy_spending.to_csv("output/total_annual_spending_CY_live.csv", index=False)

# Display the info and first few rows of the pivot table
total_cy_spending.info()
total_cy_spending.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 124 entries, 0 to 123
Data columns (total 15 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   category       124 non-null    object 
 1   agency         124 non-null    object 
 2   cy_total_2013  124 non-null    float64
 3   cy_total_2014  124 non-null    float64
 4   cy_total_2015  124 non-null    float64
 5   cy_total_2016  124 non-null    float64
 6   cy_total_2017  124 non-null    float64
 7   cy_total_2018  124 non-null    float64
 8   cy_total_2019  124 non-null    float64
 9   cy_total_2020  124 non-null    float64
 10  cy_total_2021  124 non-null    float64
 11  cy_total_2022  124 non-null    float64
 12  cy_total_2023  124 non-null    float64
 13  cy_total_2024  124 non-null    float64
 14  cy_total_2025  124 non-null    float64
dtypes: float64(13), object(2)
memory usage: 14.7+ KB


Unnamed: 0,category,agency,cy_total_2013,cy_total_2014,cy_total_2015,cy_total_2016,cy_total_2017,cy_total_2018,cy_total_2019,cy_total_2020,cy_total_2021,cy_total_2022,cy_total_2023,cy_total_2024,cy_total_2025
0,Social Security Benefit Payments,SSA,733297.0,770085.0,828724.0,802027.0,848662.0,881750.0,934338.0,996462.0,987040.0,1092176.0,1234004.0,1314180.0,500595.0
1,Medicare,HHS,575039.0,617792.0,639005.0,671211.0,702102.0,732098.0,786710.0,933490.0,856194.0,921773.0,1044782.0,1144779.0,450566.0
2,Medicaid,HHS,259718.0,309563.0,351968.0,367109.0,366781.0,383679.0,409163.0,476323.0,528138.0,594407.0,612060.0,627333.0,222390.0
3,Interest on Treasury Securities,Treasury,218238.0,226243.0,235495.0,237461.0,248086.0,276158.0,306855.0,311758.0,292967.0,303270.0,406904.0,534805.0,184902.0
4,Department of Defense,DOD,332566.0,311008.0,293467.0,287796.0,281109.0,325855.0,361517.0,372204.0,372111.0,402349.0,398241.0,459096.0,152267.0


In [8]:
# REPEATING TOTALS FOR FISCAL YEARS

# Create an empty list to store yearly spending DataFrames IN CALENDAR YEARS
yearly_spending_list = []

# Loop through each CALENDAR year
for year in range(2013, 2026):
    # Filter withdrawals for the given CALENDAR year using record_calendar_year
    filtered_withdrawals = withdrawals[withdrawals["record_fiscal_year"] == str(year)]
    
    # Group by transaction_catg and transaction_catg_desc, and sum the transaction_today_amt for each category
    yearly_spending = (
        filtered_withdrawals
        .groupby(["category","agency"], as_index=False)
        .agg({"transaction_today_amt": "sum"})
        .rename(columns={"transaction_today_amt": f"fy_total_{year}"})
    )
    
    # Append the yearly spending DataFrame to the list
    yearly_spending_list.append(yearly_spending)

# Merge all yearly spending DataFrames into a single pivot table
total_fy_spending = yearly_spending_list[0]
for yearly_spending in yearly_spending_list[1:]:
    total_fy_spending = total_fy_spending.merge(
        yearly_spending, 
        on=["category", "agency"], 
        how="outer"
    )

# Fill NaN values with 0 for categories that don't appear in some years
total_fy_spending = total_fy_spending.fillna(0)

# Sort the pivot table by total spending in the most recent year (descending order) for the most recent full year
total_fy_spending = total_fy_spending.sort_values(by=f"fy_total_2024", ascending=False).reset_index(drop=True)

# Save the pivot table to a CSV file for further analysis
total_fy_spending.to_csv("output/total_annual_spending_FY_live.csv", index=False)

# Display the info and first few rows of the pivot table
total_fy_spending.info()
total_fy_spending.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 125 entries, 0 to 124
Data columns (total 15 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   category       125 non-null    object 
 1   agency         125 non-null    object 
 2   fy_total_2013  125 non-null    float64
 3   fy_total_2014  125 non-null    float64
 4   fy_total_2015  125 non-null    float64
 5   fy_total_2016  125 non-null    float64
 6   fy_total_2017  125 non-null    float64
 7   fy_total_2018  125 non-null    float64
 8   fy_total_2019  125 non-null    float64
 9   fy_total_2020  125 non-null    float64
 10  fy_total_2021  125 non-null    float64
 11  fy_total_2022  125 non-null    float64
 12  fy_total_2023  125 non-null    float64
 13  fy_total_2024  125 non-null    float64
 14  fy_total_2025  125 non-null    float64
dtypes: float64(13), object(2)
memory usage: 14.8+ KB


Unnamed: 0,category,agency,fy_total_2013,fy_total_2014,fy_total_2015,fy_total_2016,fy_total_2017,fy_total_2018,fy_total_2019,fy_total_2020,fy_total_2021,fy_total_2022,fy_total_2023,fy_total_2024,fy_total_2025
0,Social Security Benefit Payments,SSA,719760.0,761805.0,796360.0,820464.0,843112.0,873049.0,921207.0,964474.0,1000040.0,1070680.0,1198329.0,1293434.0,833467.0
1,Medicare,HHS,573184.0,604703.0,630499.0,686482.0,700247.0,702285.0,774237.0,915480.0,858398.0,935606.0,1029054.0,1067157.0,741655.0
2,Medicaid,HHS,258121.0,295015.0,342555.0,361788.0,368584.0,382403.0,404214.0,454212.0,514623.0,583853.0,610671.0,616138.0,380977.0
3,Interest on Treasury Securities,Treasury,224700.0,223299.0,233086.0,242560.0,239930.0,260375.0,305671.0,314679.0,294828.0,299450.0,372808.0,501518.0,327755.0
4,Department of Defense,DOD,343094.0,315607.0,296113.0,289703.0,278699.0,315345.0,350188.0,373641.0,370108.0,394136.0,382716.0,455503.0,276979.0


In [9]:
# Calculate the max date so far this year
max_date = withdrawals["record_date"].max()
current_year = max_date.year
days_elapsed = (max_date - pd.to_datetime(f"{current_year}-01-20")).days

# Create an empty list to store yearly spending DataFrames
yearly_spending_list = []

# Loop through each fiscal year
for year in range(2013,2026):
    # Define the start and end dates for the fiscal year
    start_date = pd.to_datetime(f"{year}-01-20")
    end_date = start_date + pd.Timedelta(days=days_elapsed)
    
    # Filter withdrawals for the given fiscal year and time period
    filtered_withdrawals = withdrawals[
        (withdrawals["record_date"] >= start_date) &
        (withdrawals["record_date"] <= end_date)
    ]
    
    # Group by transaction_catg and sum the transaction_today_amt for each category
    yearly_spending = (
        filtered_withdrawals
        .groupby("category", as_index=False)
        .agg({"transaction_today_amt": "sum"})
        .rename(columns={"transaction_today_amt": f"{year}"})
    )
    
    # Append the yearly spending DataFrame to the list
    yearly_spending_list.append(yearly_spending)

# Merge all yearly spending DataFrames into a single pivot table
first_days_spending = yearly_spending_list[0]
for yearly_spending in yearly_spending_list[1:]:
    first_days_spending = first_days_spending.merge(yearly_spending, on="category", how="outer")

# Fill NaN values with 0 for categories that don't appear in some years
first_days_spending = first_days_spending.fillna(0)

# add a column for increase in spending from 2024 to 2025
first_days_spending["change_1yr"] = first_days_spending["2025"] - first_days_spending["2024"]

# Sort the pivot table by total spending in the most recent year (descending order)
first_days_spending = first_days_spending.sort_values(by=f"change_1yr", ascending=False).reset_index(drop=True)

# Save the pivot table to a CSV file for further analysis
first_days_spending.to_csv("output/yearly_first_days_bycategory_live.csv", index=False)

# Display the info and first few rows of the pivot table
first_days_spending.info()
# print days elapsed since jan 20
print(f"Days elapsed since January 20, 2025: {days_elapsed}")
first_days_spending.head(50)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 121 entries, 0 to 120
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   category    121 non-null    object 
 1   2013        121 non-null    float64
 2   2014        121 non-null    float64
 3   2015        121 non-null    float64
 4   2016        121 non-null    float64
 5   2017        121 non-null    float64
 6   2018        121 non-null    float64
 7   2019        121 non-null    float64
 8   2020        121 non-null    float64
 9   2021        121 non-null    float64
 10  2022        121 non-null    float64
 11  2023        121 non-null    float64
 12  2024        121 non-null    float64
 13  2025        121 non-null    float64
 14  change_1yr  121 non-null    float64
dtypes: float64(14), object(1)
memory usage: 14.3+ KB
Days elapsed since January 20, 2025: 102


Unnamed: 0,category,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,change_1yr
0,Social Security Benefit Payments,192756.0,228379.0,251492.0,233864.0,226458.0,235362.0,249961.0,283560.0,290313.0,293261.0,331686.0,352789.0,416174.0,63385.0
1,Medicare,175380.0,182953.0,191143.0,199050.0,211039.0,219208.0,246022.0,347764.0,263138.0,271447.0,312867.0,331462.0,373271.0,41809.0
2,Interest on Treasury Securities,60544.0,61689.0,65345.0,60653.0,68543.0,73546.0,86095.0,90547.0,82466.0,81677.0,112779.0,142477.0,175154.0,32677.0
3,Unclassified,91889.0,89398.0,89521.0,89848.0,87368.0,86641.0,91305.0,101898.0,146691.0,96067.0,119378.0,66127.0,87171.0,21044.0
4,Medicaid,72097.0,83738.0,100379.0,100818.0,104701.0,109731.0,117035.0,131629.0,142915.0,166045.0,179064.0,176391.0,189358.0,12967.0
5,Department of Defense,93849.0,90594.0,84832.0,83752.0,80096.0,88738.0,101095.0,107676.0,104061.0,109864.0,105532.0,121255.0,133359.0,12104.0
6,Department of Veterans Affairs,13384.0,15119.0,16459.0,16000.0,17454.0,17885.0,19617.0,17137.0,18296.0,21132.0,23688.0,25814.0,34043.0,8229.0
7,Department of Agriculture,1979.0,1712.0,1554.0,1696.0,1618.0,1685.0,2457.0,2862.0,12392.0,6898.0,6821.0,8049.0,15783.0,7734.0
8,DOD Health,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7678.0,7678.0
9,Individual Tax Refunds,219605.0,226681.0,227585.0,229923.0,236040.0,241232.0,236349.0,387948.0,221710.0,255800.0,229051.0,245602.0,253079.0,7477.0


In [10]:
# Filter first_days_spend to show only categories with more than $100 million in spending
first_days_spending_100m = first_days_spending[first_days_spending["2025"] >= 100]
# filter out the category "Unclassified"
first_days_spending_100m = first_days_spending_100m[first_days_spending_100m["category"] != "Unclassified"]

# show me the whole table
print(first_days_spending_100m)

# Save the filtered DataFrame to a CSV file
first_days_spending_100m.to_csv("output/yearly_first_days_bycategory_100m_live.csv", index=False)



                                         category      2013      2014  \
0                Social Security Benefit Payments  192756.0  228379.0   
1                                        Medicare  175380.0  182953.0   
2                 Interest on Treasury Securities   60544.0   61689.0   
4                                        Medicaid   72097.0   83738.0   
5                           Department of Defense   93849.0   90594.0   
..                                            ...       ...       ...   
116  Agency for International Development (USAID)    1671.0    2353.0   
117                          Business Tax Refunds    9324.0    6926.0   
118             Federal Communications Commission       0.0       0.0   
119          Federal Employees Insurance Payments   20411.0   21340.0   
120                       Department of Education   57334.0   58480.0   

         2015      2016      2017      2018      2019      2020      2021  \
0    251492.0  233864.0  226458.0  235362.0  2

In [11]:
# From first_days_spending, let's filter for just Medicare, Medicaid, and Social Security from category
senior_programs = first_days_spending[
    first_days_spending["category"].isin([
        "Medicare", 
        "Medicaid", 
        "Social Security Benefit Payments"
    ])
]
# drop the change_1yr column
senior_programs = senior_programs.drop(columns=["change_1yr"])

# Repeat this for Interest On Treasury Securities
interest_payments = first_days_spending[
    first_days_spending["category"] == "Interest on Treasury Securities"
]
# drop the change_1yr column
interest_payments = interest_payments.drop(columns=["change_1yr"])

# Repeat for Department of Defense, Military Active Duty Pay and Veterans Benefits
military_veterans_direct = first_days_spending[
    first_days_spending["category"].isin([
        "DOD Military Active Duty Pay", 
        "DOD Military Retirement",
        "Veterans Affairs Benefits"
    ])
]
# drop the change_1yr column
military_veterans_direct = military_veterans_direct.drop(columns=["change_1yr"])

# Repeat for Federal Employee Salaries and Civil Service Retirement
federal_employee_salaries = first_days_spending[
    first_days_spending["category"].isin([
        "Federal Employee Salaries", 
        "Civil Service Retirement & Disability"
    ])
]
# drop the change_1yr column
federal_employee_salaries = federal_employee_salaries.drop(columns=["change_1yr"])

# Repeat for Department of Defense, Military Active Duty Pay and Veterans Benefits
decreasing_agencies = first_days_spending[
    first_days_spending["category"].isin([
        "Department of Education", 
        "Agency for International Development (USAID)",
        "Department of State",
        "United States Postal Service",
        "Federal Communications Commission",
        "Federal Employees Insurance Payments",
        "HHS Payments to States"])
]
# drop the change_1yr column
decreasing_agencies = decreasing_agencies.drop(columns=["change_1yr"])


# Output all three DataFrames to CSV files
senior_programs.to_csv("output/senior_programs_first_days_live.csv", index=False)
interest_payments.to_csv("output/interest_payments_first_days_live.csv", index=False)
military_veterans_direct.to_csv("output/military_veterans_first_days_live.csv", index=False)
federal_employee_salaries.to_csv("output/federal_employee_salaries_first_days_live.csv", index=False)
decreasing_agencies.to_csv("output/decreasing_agencies_first_days_live.csv", index=False)


In [12]:
# Repeat this to get the grand totals across all of government for each year during this same time frame

# Calculate the max date so far this year
max_date = withdrawals["record_date"].max()
current_year = max_date.year
days_elapsed = (max_date - pd.to_datetime(f"{current_year}-01-20")).days

# Create a list to store total spending for each year
yearly_totals = []

# Loop through each fiscal year
for year in range(2013, 2026):
    # Define the start and end dates for the fiscal year
    start_date = pd.to_datetime(f"{year}-01-20")
    end_date = start_date + pd.Timedelta(days=days_elapsed)
    
    # Filter withdrawals for the given fiscal year and time period
    filtered_withdrawals = withdrawals[
        (withdrawals["record_date"] >= start_date) &
        (withdrawals["record_date"] <= end_date)
    ]
    
    # Calculate the total spending for the year
    total_spending = filtered_withdrawals["transaction_today_amt"].sum()
    
    # Append the total spending to the list
    yearly_totals.append({"year": year, "total_spending": total_spending})

# Convert the yearly totals to a DataFrame
yearly_first_days_totals = pd.DataFrame(yearly_totals)

# Save the yearly totals DataFrame to a CSV file for further analysis
yearly_first_days_totals.to_csv("output/yearly_first_days_totals_live.csv", index=False)

# Display the info and first few rows of the yearly totals DataFrame
yearly_first_days_totals.info()
yearly_first_days_totals.head(20)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13 entries, 0 to 12
Data columns (total 2 columns):
 #   Column          Non-Null Count  Dtype
---  ------          --------------  -----
 0   year            13 non-null     int64
 1   total_spending  13 non-null     int64
dtypes: int64(2)
memory usage: 340.0 bytes


Unnamed: 0,year,total_spending
0,2013,1357248
1,2014,1402849
2,2015,1464556
3,2016,1457624
4,2017,1473121
5,2018,1540146
6,2019,1645528
7,2020,2264582
8,2021,2513583
9,2022,2076496


In [13]:
2193317-1974921


218396

In [14]:
# To create day by day charts for web, we're going to create a df with every date and number of days since Jan 20
# Filter all_dates to include only dates from January 20 to April 30 for each year
all_dates = pd.date_range(start="2013-01-20", end="2025-04-30", freq='D')
all_dates_df = pd.DataFrame({
    "record_date": all_dates,
    "day_number": all_dates.map(lambda date: (date - pd.Timestamp(f"{date.year}-01-20")).days + 1 if date <= pd.Timestamp(f"{date.year}-04-30") else 0)
})

# Ensure that only valid day numbers are included (greater than 0)
all_dates_df = all_dates_df[all_dates_df["day_number"] > 0]
# Remove rows with day_number greater than 100
all_dates_df = all_dates_df[all_dates_df["day_number"] <= 100]

# Show info and header of the filtered all_dates_df
all_dates_df.info()
all_dates_df.head(2000)


<class 'pandas.core.frame.DataFrame'>
Index: 1300 entries, 0 to 4482
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   record_date  1300 non-null   datetime64[ns]
 1   day_number   1300 non-null   int64         
dtypes: datetime64[ns](1), int64(1)
memory usage: 30.5 KB


Unnamed: 0,record_date,day_number
0,2013-01-20,1
1,2013-01-21,2
2,2013-01-22,3
3,2013-01-23,4
4,2013-01-24,5
...,...,...
4478,2025-04-25,96
4479,2025-04-26,97
4480,2025-04-27,98
4481,2025-04-28,99


In [15]:
# Create a new cell for daily totals, cumulative totals, and day numbers

# Calculate the max date so far this year
max_date = withdrawals["record_date"].max()
current_year = max_date.year
days_elapsed = (max_date - pd.to_datetime(f"{current_year}-01-20")).days

# Create an empty list to store daily spending DataFrames
daily_spending_list = []

# Loop through each fiscal year
for year in range(2013, 2026):
    # Define the start and end dates for the fiscal year
    start_date = pd.to_datetime(f"{year}-01-20")
    end_date = start_date + pd.Timedelta(days=days_elapsed)
    
    # Filter withdrawals for the given fiscal year and time period
    filtered_withdrawals = withdrawals[
        (withdrawals["record_date"] >= start_date) &
        (withdrawals["record_date"] <= end_date)
    ]
    
    # Group by record_date and calculate the total spending for each day
    daily_spending = (
        filtered_withdrawals
        .groupby("record_date", as_index=False)
        .agg({"transaction_today_amt": "sum"})
        .rename(columns={"transaction_today_amt": "daily_total"})
    )
    
    # Add a cumulative total column
    daily_spending["cumulative_total"] = daily_spending["daily_total"].cumsum()
    
    # Add the year column for reference
    daily_spending["year"] = year
    
    # Add the day_number column
    # daily_spending["day_number"] = (daily_spending["record_date"] - start_date).dt.days + 1
    
    # Append the daily spending DataFrame to the list
    daily_spending_list.append(daily_spending)

# Concatenate all daily spending DataFrames into a single DataFrame
all_daily_spending = pd.concat(daily_spending_list, ignore_index=True)

# Display the info and first few rows of the daily spending DataFrame
all_daily_spending.info()
all_daily_spending.head(200)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 940 entries, 0 to 939
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   record_date       940 non-null    datetime64[ns]
 1   daily_total       940 non-null    int64         
 2   cumulative_total  940 non-null    int64         
 3   year              940 non-null    int64         
dtypes: datetime64[ns](1), int64(3)
memory usage: 29.5 KB


Unnamed: 0,record_date,daily_total,cumulative_total,year
0,2013-01-22,12417,12417,2013
1,2013-01-23,23528,35945,2013
2,2013-01-24,13291,49236,2013
3,2013-01-25,9961,59197,2013
4,2013-01-28,9566,68763,2013
...,...,...,...,...
195,2015-04-01,58839,1069061,2015
196,2015-04-02,10306,1079367,2015
197,2015-04-03,34590,1113957,2015
198,2015-04-06,9097,1123054,2015


In [16]:
# Merge the all_dates_df with the all_daily_spending DataFrame to include all dates
all_dates_with_spending = all_dates_df.merge(
    all_daily_spending, 
    on="record_date", 
    how="left"
)

# Make sure we're sorted by record_date
all_dates_with_spending = all_dates_with_spending.sort_values(by="record_date").reset_index(drop=True)

# Fill NaN values in daily_total with 0
all_dates_with_spending["daily_total"] = all_dates_with_spending["daily_total"].fillna(0)

# In first two rows for dates Jan 20-21 of 2013 replace daily_total and cumulative_total with 0
all_dates_with_spending.loc[
    (all_dates_with_spending["record_date"].isin(["2013-01-20", "2013-01-21"])), 
    ["daily_total", "cumulative_total"]
] = 0.0

# Replace cumulative_total with 0 where day_number equals 1 and cumulative_total is NaN
all_dates_with_spending.loc[
    (all_dates_with_spending["day_number"] == 1) & (all_dates_with_spending["cumulative_total"].isna()), 
    "cumulative_total"
] = 0.0

# Fill NaN values in cumulative_total with the prior figure
all_dates_with_spending["cumulative_total"] = all_dates_with_spending["cumulative_total"].fillna(method='ffill')

# Repopulate year with the year from the record_date
all_dates_with_spending["year"] = all_dates_with_spending["record_date"].dt.year

# Display the info and first few rows of the daily spending DataFrame
all_dates_with_spending.info()
all_dates_with_spending.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1300 entries, 0 to 1299
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   record_date       1300 non-null   datetime64[ns]
 1   day_number        1300 non-null   int64         
 2   daily_total       1300 non-null   float64       
 3   cumulative_total  1300 non-null   float64       
 4   year              1300 non-null   int32         
dtypes: datetime64[ns](1), float64(2), int32(1), int64(1)
memory usage: 45.8 KB


  (all_dates_with_spending["record_date"].isin(["2013-01-20", "2013-01-21"])),
  all_dates_with_spending["cumulative_total"] = all_dates_with_spending["cumulative_total"].fillna(method='ffill')


Unnamed: 0,record_date,day_number,daily_total,cumulative_total,year
0,2013-01-20,1,0.0,0.0,2013
1,2013-01-21,2,0.0,0.0,2013
2,2013-01-22,3,12417.0,12417.0,2013
3,2013-01-23,4,23528.0,35945.0,2013
4,2013-01-24,5,13291.0,49236.0,2013


In [17]:
# Create pivoted table for use in web charts of first 100 days total spending by year

# Pivot the DataFrame to have dates as rows, years as columns, and cumulative totals as values
pivoted_spending = all_dates_with_spending.pivot(index="day_number", columns="year", values="cumulative_total")

# Save the pivoted DataFrame to a CSV file for further analysis
pivoted_spending.to_csv("output/first100_cum_spending_by_year_live.csv")

# Display the info and first few rows of the pivoted DataFrame
pivoted_spending.info()
pivoted_spending.head(100)



<class 'pandas.core.frame.DataFrame'>
Index: 100 entries, 1 to 100
Data columns (total 13 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   2013    100 non-null    float64
 1   2014    100 non-null    float64
 2   2015    100 non-null    float64
 3   2016    100 non-null    float64
 4   2017    100 non-null    float64
 5   2018    100 non-null    float64
 6   2019    100 non-null    float64
 7   2020    100 non-null    float64
 8   2021    100 non-null    float64
 9   2022    100 non-null    float64
 10  2023    100 non-null    float64
 11  2024    100 non-null    float64
 12  2025    100 non-null    float64
dtypes: float64(13)
memory usage: 10.9 KB


year,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025
day_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,0.0,0.0,11507.0,23720.0,11292.0,0.0,0.0,0.0,42616.0,13414.0,22687.0,0.0,0.0
2,0.0,11777.0,36902.0,34578.0,11292.0,0.0,0.0,10948.0,60861.0,37977.0,22687.0,0.0,17941.0
3,12417.0,35430.0,50071.0,45044.0,11292.0,15722.0,13842.0,45781.0,84738.0,37977.0,22687.0,22248.0,72520.0
4,35945.0,45847.0,59281.0,45044.0,19369.0,24658.0,42858.0,58553.0,84738.0,37977.0,39159.0,36415.0,90137.0
5,49236.0,55294.0,59281.0,45044.0,31206.0,51227.0,55805.0,71666.0,84738.0,52043.0,53367.0,74451.0,104818.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,1254754.0,1275941.0,1335082.0,1339925.0,1332846.0,1417038.0,1514113.0,2047260.0,2324908.0,1871140.0,1969649.0,1974921.0,2193317.0
97,1267066.0,1275941.0,1335082.0,1351572.0,1367144.0,1432749.0,1523678.0,2047260.0,2341448.0,1887102.0,2019134.0,1993566.0,2193317.0
98,1267066.0,1275941.0,1345874.0,1365503.0,1379777.0,1441755.0,1523678.0,2047260.0,2359532.0,1938125.0,2037578.0,2013836.0,2193317.0
99,1267066.0,1286712.0,1355689.0,1395907.0,1390784.0,1441755.0,1523678.0,2066862.0,2404579.0,1954258.0,2062124.0,2013836.0,2211159.0
