In [61]:
import pandas as pd
import datawrapper as dw
import requests

In [62]:
# Load the latest clean withdrawals data following update
withdrawals_combined_file_path = "data/withdrawals.parquet"
withdrawals = pd.read_parquet(withdrawals_combined_file_path)

# Quick show of date range, info, head and date range of record_date
print(f"Date range of this data: from {withdrawals['record_date'].min()} to {withdrawals['record_date'].max()}")
withdrawals.info()
withdrawals.head(10)


Date range of this data: from 2012-10-01 00:00:00 to 2025-04-22 00:00:00
<class 'pandas.core.frame.DataFrame'>
Index: 183312 entries, 0 to 187583
Data columns (total 17 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   record_date              183312 non-null  datetime64[ns]
 1   account_type             183312 non-null  object        
 2   transaction_type         183312 non-null  object        
 3   transaction_catg         183312 non-null  object        
 4   transaction_catg_desc    183312 non-null  object        
 5   transaction_today_amt    183312 non-null  int64         
 6   transaction_mtd_amt      183312 non-null  object        
 7   transaction_fytd_amt     183312 non-null  object        
 8   table_nbr                183312 non-null  object        
 9   table_nm                 183312 non-null  object        
 10  src_line_nbr             183312 non-null  object        
 11  record_fis

Unnamed: 0,record_date,account_type,transaction_type,transaction_catg,transaction_catg_desc,transaction_today_amt,transaction_mtd_amt,transaction_fytd_amt,table_nbr,table_nm,src_line_nbr,record_fiscal_year,record_fiscal_quarter,record_calendar_year,record_calendar_quarter,record_calendar_month,record_calendar_day
0,2012-10-01,Federal Reserve Account,Withdrawals,Commodity Credit Corporation programs,,39,39,39,II,Deposits and Withdrawals of Operating Cash,33,2013,1,2012,4,10,1
1,2012-10-01,Federal Reserve Account,Withdrawals,Temporary Assistance for Needy Families (HHS),,31,31,31,II,Deposits and Withdrawals of Operating Cash,56,2013,1,2012,4,10,1
2,2012-10-01,Federal Reserve Account,Withdrawals,Transportation Security Admin. (DHS),,0,0,0,II,Deposits and Withdrawals of Operating Cash,57,2013,1,2012,4,10,1
3,2012-10-01,Federal Reserve Account,Withdrawals,Unemployment Insurance Benefits,,210,210,210,II,Deposits and Withdrawals of Operating Cash,58,2013,1,2012,4,10,1
4,2012-10-01,Federal Reserve Account,Withdrawals,Veterans Affairs programs,,491,491,491,II,Deposits and Withdrawals of Operating Cash,59,2013,1,2012,4,10,1
5,2012-10-01,Federal Reserve Account,Withdrawals,Other Withdrawals,Civil Service Retirement (EFT),4892,4892,4892,II,Deposits and Withdrawals of Operating Cash,60,2013,1,2012,4,10,1
6,2012-10-01,Federal Reserve Account,Withdrawals,Other Withdrawals,Emergency Prep & Response (DHS),63,63,63,II,Deposits and Withdrawals of Operating Cash,61,2013,1,2012,4,10,1
7,2012-10-01,Federal Reserve Account,Withdrawals,Other Withdrawals,Federal Financing Bank,113,113,113,II,Deposits and Withdrawals of Operating Cash,63,2013,1,2012,4,10,1
8,2012-10-01,Federal Reserve Account,Withdrawals,Other Withdrawals,International Monetary Fund,100,100,100,II,Deposits and Withdrawals of Operating Cash,64,2013,1,2012,4,10,1
9,2012-10-01,Federal Reserve Account,Withdrawals,Supple. Nutrition Assist. Program (SNAP),,24,24,24,II,Deposits and Withdrawals of Operating Cash,55,2013,1,2012,4,10,1


In [63]:
# Read category_crosswalk.csv
category_crosswalk_file_path = "data/category_crosswalk.csv"
category_crosswalk = pd.read_csv(category_crosswalk_file_path, keep_default_na=False)
# Display the info and first few rows of the crosswalk DataFrame
category_crosswalk.info()
category_crosswalk.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 214 entries, 0 to 213
Data columns (total 5 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   transaction_catg       214 non-null    object
 1   transaction_catg_desc  214 non-null    object
 2   category               214 non-null    object
 3   agency                 214 non-null    object
 4   cbs_notes              214 non-null    object
dtypes: object(5)
memory usage: 8.5+ KB


Unnamed: 0,transaction_catg,transaction_catg_desc,category,agency,cbs_notes
0,District of Columbia,,District of Columbia,DC,
1,Other Withdrawals,District of Columbia,District of Columbia,DC,
2,DHS - Customs & Border Protection (CBP),,Customs & Border Protection,DHS,
3,Other Withdrawals,Customs and Borders Protection (DHS),Customs & Border Protection,DHS,
4,Dept of Homeland Security (DHS) - misc,,Department of Homeland Security,DHS,
5,DHS - Fed Emergency Mgmt Agency (FEMA),,Federal Emergency Management Agency,DHS,
6,Other Withdrawals,Emergency Prep & Response (DHS),Federal Emergency Management Agency,DHS,
7,Unemployment Assist - FEMA Lost Wage Pmt,,FEMA Lost Wages Payments,DHS,
8,DHS - Transportation Security Admn (TSA),,Transportation Security Administration,DHS,
9,Transportation Security Admin. (DHS),,Transportation Security Administration,DHS,


In [64]:
# Merge data with crosswalk to standardize categories by adding new columns
withdrawals = withdrawals.merge(
    category_crosswalk, 
    how='left', 
    left_on=['transaction_catg', 'transaction_catg_desc'], 
    right_on=['transaction_catg', 'transaction_catg_desc']
)

# Display the info and first few rows of the updated DataFrame
withdrawals.info()
withdrawals.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 183312 entries, 0 to 183311
Data columns (total 20 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   record_date              183312 non-null  datetime64[ns]
 1   account_type             183312 non-null  object        
 2   transaction_type         183312 non-null  object        
 3   transaction_catg         183312 non-null  object        
 4   transaction_catg_desc    183312 non-null  object        
 5   transaction_today_amt    183312 non-null  int64         
 6   transaction_mtd_amt      183312 non-null  object        
 7   transaction_fytd_amt     183312 non-null  object        
 8   table_nbr                183312 non-null  object        
 9   table_nm                 183312 non-null  object        
 10  src_line_nbr             183312 non-null  object        
 11  record_fiscal_year       183312 non-null  object        
 12  record_fiscal_qu

Unnamed: 0,record_date,account_type,transaction_type,transaction_catg,transaction_catg_desc,transaction_today_amt,transaction_mtd_amt,transaction_fytd_amt,table_nbr,table_nm,src_line_nbr,record_fiscal_year,record_fiscal_quarter,record_calendar_year,record_calendar_quarter,record_calendar_month,record_calendar_day,category,agency,cbs_notes
0,2012-10-01,Federal Reserve Account,Withdrawals,Commodity Credit Corporation programs,,39,39,39,II,Deposits and Withdrawals of Operating Cash,33,2013,1,2012,4,10,1,USDA Commodity Credit Corporation,USDA,
1,2012-10-01,Federal Reserve Account,Withdrawals,Temporary Assistance for Needy Families (HHS),,31,31,31,II,Deposits and Withdrawals of Operating Cash,56,2013,1,2012,4,10,1,Temporary Assistance for Needy Families,HHS,
2,2012-10-01,Federal Reserve Account,Withdrawals,Transportation Security Admin. (DHS),,0,0,0,II,Deposits and Withdrawals of Operating Cash,57,2013,1,2012,4,10,1,Transportation Security Administration,DHS,
3,2012-10-01,Federal Reserve Account,Withdrawals,Unemployment Insurance Benefits,,210,210,210,II,Deposits and Withdrawals of Operating Cash,58,2013,1,2012,4,10,1,Unemployment Insurance Benefits,DOL,
4,2012-10-01,Federal Reserve Account,Withdrawals,Veterans Affairs programs,,491,491,491,II,Deposits and Withdrawals of Operating Cash,59,2013,1,2012,4,10,1,Veterans Affairs Benefits,VA,
5,2012-10-01,Federal Reserve Account,Withdrawals,Other Withdrawals,Civil Service Retirement (EFT),4892,4892,4892,II,Deposits and Withdrawals of Operating Cash,60,2013,1,2012,4,10,1,Civil Service Retirement & Disability,OPM,
6,2012-10-01,Federal Reserve Account,Withdrawals,Other Withdrawals,Emergency Prep & Response (DHS),63,63,63,II,Deposits and Withdrawals of Operating Cash,61,2013,1,2012,4,10,1,Federal Emergency Management Agency,DHS,
7,2012-10-01,Federal Reserve Account,Withdrawals,Other Withdrawals,Federal Financing Bank,113,113,113,II,Deposits and Withdrawals of Operating Cash,63,2013,1,2012,4,10,1,Federal Financing Bank,Treasury,
8,2012-10-01,Federal Reserve Account,Withdrawals,Other Withdrawals,International Monetary Fund,100,100,100,II,Deposits and Withdrawals of Operating Cash,64,2013,1,2012,4,10,1,International Monetary Fund,IMF,
9,2012-10-01,Federal Reserve Account,Withdrawals,Supple. Nutrition Assist. Program (SNAP),,24,24,24,II,Deposits and Withdrawals of Operating Cash,55,2013,1,2012,4,10,1,Supplemental Nutrition Assistance Program (SNAP),USDA,


In [65]:
# count instances of each category in withdrawals and 
category_counts = withdrawals['category'].value_counts().reset_index()
category_counts.columns = ['category', 'count']
# Display the info and first few rows of the category counts DataFrame
category_counts.info()
category_counts.head(10)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 124 entries, 0 to 123
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   category  124 non-null    object
 1   count     124 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 2.1+ KB


Unnamed: 0,category,count
0,Medicare and CMS,5729
1,Excluded Transfers Should Be Empty,4796
2,Veterans Affairs Benefits,3403
3,Department of Housing and Urban Development,3153
4,Federal Employees Insurance Payments,3153
5,Federal Highway Administration,3153
6,Federal Employee Salaries,3153
7,General Services Administration,3153
8,Department of Education,3153
9,Interest on Treasury Securities,3153


In [66]:
# Quick report on this year so far
# Filter for spending between Jan. 20, 2025, and the most recent date
filtered_withdrawals = withdrawals[
    (withdrawals["record_date"] >= "2025-01-20") & 
    (withdrawals["record_date"] <= pd.Timestamp.now())
]

# Group by transaction_catg and sum the transaction_today_amt for each category
category_spending = (
    filtered_withdrawals
    .groupby("category", as_index=False)
    .agg({"transaction_today_amt": "sum"})
    .rename(columns={"transaction_today_amt": "total_spending"})
)

# Sort by total_spending in descending order
category_spending = category_spending.sort_values(by="total_spending", ascending=False).reset_index(drop=True)

# Display the info and first few rows of the category spending DataFrame
print(category_spending.info())
print(category_spending.head(10))

total_spending_2025 = category_spending["total_spending"].sum()
print(f"Total spending in Trump Administration since Jan. 20: ${total_spending_2025:,.2f}")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 97 entries, 0 to 96
Data columns (total 2 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   category        97 non-null     object
 1   total_spending  97 non-null     int64 
dtypes: int64(1), object(1)
memory usage: 1.6+ KB
None
                           category  total_spending
0  Social Security Benefit Payments          360369
1                  Medicare and CMS          259009
2            Individual Tax Refunds          227647
3                          Medicaid          169611
4   Interest on Treasury Securities          147433
5             Department of Defense          120887
6                      Unclassified           74466
7         Federal Employee Salaries           61474
8           Department of Education           60354
9         Veterans Affairs Benefits           51847
Total spending in Trump Administration since Jan. 20: $2,100,342.00


In [67]:
# REMEMBER THIS IS FOR CALENDAR YEAR TOTALS 
# FOR INTEGRITY AND SANITY CHECKS

# Create an empty list to store yearly spending DataFrames IN CALENDAR YEARS
yearly_spending_list = []

# Loop through each CALENDAR year
for year in range(2013, 2026):
    # Filter withdrawals for the given CALENDAR year using record_calendar_year
    filtered_withdrawals = withdrawals[withdrawals["record_calendar_year"] == str(year)]
    
    # Group by transaction_catg and transaction_catg_desc, and sum the transaction_today_amt for each category
    yearly_spending = (
        filtered_withdrawals
        .groupby(["category","agency"], as_index=False)
        .agg({"transaction_today_amt": "sum"})
        .rename(columns={"transaction_today_amt": f"cy_total_{year}"})
    )
    
    # Append the yearly spending DataFrame to the list
    yearly_spending_list.append(yearly_spending)

# Merge all yearly spending DataFrames into a single pivot table
total_cy_spending = yearly_spending_list[0]
for yearly_spending in yearly_spending_list[1:]:
    total_cy_spending = total_cy_spending.merge(
        yearly_spending, 
        on=["category", "agency"], 
        how="outer"
    )

# Fill NaN values with 0 for categories that don't appear in some years
total_cy_spending = total_cy_spending.fillna(0)

# Sort the pivot table by total spending in the most recent year (descending order) for the most recent full year
total_cy_spending = total_cy_spending.sort_values(by=f"cy_total_2024", ascending=False).reset_index(drop=True)

# Save the pivot table to a CSV file for further analysis
total_cy_spending.to_csv("output/total_annual_spending_CY.csv", index=False)

# Display the info and first few rows of the pivot table
total_cy_spending.info()
total_cy_spending.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 125 entries, 0 to 124
Data columns (total 15 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   category       125 non-null    object 
 1   agency         125 non-null    object 
 2   cy_total_2013  125 non-null    float64
 3   cy_total_2014  125 non-null    float64
 4   cy_total_2015  125 non-null    float64
 5   cy_total_2016  125 non-null    float64
 6   cy_total_2017  125 non-null    float64
 7   cy_total_2018  125 non-null    float64
 8   cy_total_2019  125 non-null    float64
 9   cy_total_2020  125 non-null    float64
 10  cy_total_2021  125 non-null    float64
 11  cy_total_2022  125 non-null    float64
 12  cy_total_2023  125 non-null    float64
 13  cy_total_2024  125 non-null    float64
 14  cy_total_2025  125 non-null    float64
dtypes: float64(13), object(2)
memory usage: 14.8+ KB


Unnamed: 0,category,agency,cy_total_2013,cy_total_2014,cy_total_2015,cy_total_2016,cy_total_2017,cy_total_2018,cy_total_2019,cy_total_2020,cy_total_2021,cy_total_2022,cy_total_2023,cy_total_2024,cy_total_2025
0,Social Security Benefit Payments,SSA,733297.0,770085.0,828724.0,802027.0,848662.0,881750.0,934338.0,996462.0,987040.0,1092176.0,1234004.0,1314180.0,444790.0
1,Medicare and CMS,HHS,575039.0,557084.0,387283.0,393878.0,404010.0,416309.0,426827.0,766623.0,763882.0,809424.0,927544.0,1012977.0,320821.0
2,Medicaid,HHS,259718.0,309563.0,351968.0,367109.0,366781.0,383679.0,409163.0,476323.0,528138.0,594407.0,612060.0,627333.0,202643.0
3,Interest on Treasury Securities,Treasury,218238.0,226243.0,235495.0,237461.0,248086.0,276158.0,306855.0,311758.0,292967.0,303270.0,406904.0,534805.0,157181.0
4,Department of Defense,DOD,332566.0,311008.0,293467.0,287796.0,281109.0,325855.0,361517.0,372204.0,372111.0,402349.0,398241.0,459096.0,139795.0


In [68]:
# REPEATING TOTALS FOR FISCAL YEARS

# Create an empty list to store yearly spending DataFrames IN CALENDAR YEARS
yearly_spending_list = []

# Loop through each CALENDAR year
for year in range(2013, 2026):
    # Filter withdrawals for the given CALENDAR year using record_calendar_year
    filtered_withdrawals = withdrawals[withdrawals["record_fiscal_year"] == str(year)]
    
    # Group by transaction_catg and transaction_catg_desc, and sum the transaction_today_amt for each category
    yearly_spending = (
        filtered_withdrawals
        .groupby(["category","agency"], as_index=False)
        .agg({"transaction_today_amt": "sum"})
        .rename(columns={"transaction_today_amt": f"fy_total_{year}"})
    )
    
    # Append the yearly spending DataFrame to the list
    yearly_spending_list.append(yearly_spending)

# Merge all yearly spending DataFrames into a single pivot table
total_fy_spending = yearly_spending_list[0]
for yearly_spending in yearly_spending_list[1:]:
    total_fy_spending = total_fy_spending.merge(
        yearly_spending, 
        on=["category", "agency"], 
        how="outer"
    )

# Fill NaN values with 0 for categories that don't appear in some years
total_fy_spending = total_fy_spending.fillna(0)

# Sort the pivot table by total spending in the most recent year (descending order) for the most recent full year
total_fy_spending = total_fy_spending.sort_values(by=f"fy_total_2024", ascending=False).reset_index(drop=True)

# Save the pivot table to a CSV file for further analysis
total_fy_spending.to_csv("output/total_annual_spending_FY.csv", index=False)

# Display the info and first few rows of the pivot table
total_fy_spending.info()
total_fy_spending.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 126 entries, 0 to 125
Data columns (total 15 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   category       126 non-null    object 
 1   agency         126 non-null    object 
 2   fy_total_2013  126 non-null    float64
 3   fy_total_2014  126 non-null    float64
 4   fy_total_2015  126 non-null    float64
 5   fy_total_2016  126 non-null    float64
 6   fy_total_2017  126 non-null    float64
 7   fy_total_2018  126 non-null    float64
 8   fy_total_2019  126 non-null    float64
 9   fy_total_2020  126 non-null    float64
 10  fy_total_2021  126 non-null    float64
 11  fy_total_2022  126 non-null    float64
 12  fy_total_2023  126 non-null    float64
 13  fy_total_2024  126 non-null    float64
 14  fy_total_2025  126 non-null    float64
dtypes: float64(13), object(2)
memory usage: 14.9+ KB


Unnamed: 0,category,agency,fy_total_2013,fy_total_2014,fy_total_2015,fy_total_2016,fy_total_2017,fy_total_2018,fy_total_2019,fy_total_2020,fy_total_2021,fy_total_2022,fy_total_2023,fy_total_2024,fy_total_2025
0,Social Security Benefit Payments,SSA,719760.0,761805.0,796360.0,820464.0,843112.0,873049.0,921207.0,964474.0,1000040.0,1070680.0,1198329.0,1293434.0,777662.0
1,Medicare and CMS,HHS,573184.0,604703.0,387303.0,392510.0,400080.0,411629.0,425856.0,687306.0,760631.0,818692.0,913142.0,951082.0,579029.0
2,Medicaid,HHS,258121.0,295015.0,342555.0,361788.0,368584.0,382403.0,404214.0,454212.0,514623.0,583853.0,610671.0,616138.0,361230.0
3,Interest on Treasury Securities,Treasury,224700.0,223299.0,233086.0,242560.0,239930.0,260375.0,305671.0,314679.0,294828.0,299450.0,372808.0,501518.0,300034.0
4,Department of Defense,DOD,343094.0,315607.0,296113.0,289703.0,278699.0,315345.0,350188.0,373641.0,370108.0,394136.0,382716.0,455503.0,264507.0


In [69]:
# Calculate the max date so far this year
max_date = withdrawals["record_date"].max()
current_year = max_date.year
days_elapsed = (max_date - pd.to_datetime(f"{current_year}-01-20")).days

# Create an empty list to store yearly spending DataFrames
yearly_spending_list = []

# Loop through each fiscal year
for year in range(2013,2026):
    # Define the start and end dates for the fiscal year
    start_date = pd.to_datetime(f"{year}-01-20")
    end_date = start_date + pd.Timedelta(days=days_elapsed)
    
    # Filter withdrawals for the given fiscal year and time period
    filtered_withdrawals = withdrawals[
        (withdrawals["record_date"] >= start_date) &
        (withdrawals["record_date"] <= end_date)
    ]
    
    # Group by transaction_catg and sum the transaction_today_amt for each category
    yearly_spending = (
        filtered_withdrawals
        .groupby("category", as_index=False)
        .agg({"transaction_today_amt": "sum"})
        .rename(columns={"transaction_today_amt": f"total_spending_{year}"})
    )
    
    # Append the yearly spending DataFrame to the list
    yearly_spending_list.append(yearly_spending)

# Merge all yearly spending DataFrames into a single pivot table
first_days_spending = yearly_spending_list[0]
for yearly_spending in yearly_spending_list[1:]:
    first_days_spending = first_days_spending.merge(yearly_spending, on="category", how="outer")

# Fill NaN values with 0 for categories that don't appear in some years
first_days_spending = first_days_spending.fillna(0)

# add a column for increase in spending from 2024 to 2025
first_days_spending["change_1yr"] = first_days_spending["total_spending_2025"] - first_days_spending["total_spending_2024"]

# Sort the pivot table by total spending in the most recent year (descending order)
first_days_spending = first_days_spending.sort_values(by=f"change_1yr", ascending=False).reset_index(drop=True)

# Save the pivot table to a CSV file for further analysis
first_days_spending.to_csv("output/yearly_first_days_bycategory.csv", index=False)

# Display the info and first few rows of the pivot table
first_days_spending.info()
# print days elapsed since jan 20
print(f"Days elapsed since January 20, 2025: {days_elapsed}")
first_days_spending.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 122 entries, 0 to 121
Data columns (total 15 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   category             122 non-null    object 
 1   total_spending_2013  122 non-null    float64
 2   total_spending_2014  122 non-null    float64
 3   total_spending_2015  122 non-null    float64
 4   total_spending_2016  122 non-null    float64
 5   total_spending_2017  122 non-null    float64
 6   total_spending_2018  122 non-null    float64
 7   total_spending_2019  122 non-null    float64
 8   total_spending_2020  122 non-null    float64
 9   total_spending_2021  122 non-null    float64
 10  total_spending_2022  122 non-null    float64
 11  total_spending_2023  122 non-null    float64
 12  total_spending_2024  122 non-null    float64
 13  total_spending_2025  122 non-null    float64
 14  change_1yr           122 non-null    float64
dtypes: float64(14), object(1)
memory usage: 

Unnamed: 0,category,total_spending_2013,total_spending_2014,total_spending_2015,total_spending_2016,total_spending_2017,total_spending_2018,total_spending_2019,total_spending_2020,total_spending_2021,total_spending_2022,total_spending_2023,total_spending_2024,total_spending_2025,change_1yr
0,Social Security Benefit Payments,180364.0,190870.0,227192.0,218961.0,210442.0,218314.0,231529.0,241788.0,270149.0,270752.0,305889.0,324972.0,360369.0,35397.0
1,Interest on Treasury Securities,54393.0,55602.0,58888.0,60654.0,61321.0,64984.0,74881.0,79783.0,74049.0,72757.0,94063.0,119036.0,147433.0,28397.0
2,Medicare and CMS,146205.0,152577.0,102925.0,103138.0,103893.0,103638.0,109537.0,227471.0,194500.0,196009.0,225459.0,234294.0,259009.0,24715.0
3,Unclassified,80097.0,78402.0,80066.0,81535.0,76691.0,74663.0,78378.0,84277.0,135836.0,85322.0,104517.0,56788.0,74466.0,17678.0
4,HHS Marketplace Payments,0.0,0.0,9829.0,14522.0,15760.0,18852.0,17790.0,14164.0,18313.0,24229.0,31834.0,29845.0,46501.0,16656.0
5,Individual Tax Refunds,203441.0,211307.0,217399.0,215261.0,220833.0,219783.0,218023.0,345916.0,209907.0,235044.0,207607.0,213975.0,227647.0,13672.0
6,Department of Defense,83896.0,80545.0,76645.0,76727.0,72820.0,78864.0,91925.0,96206.0,95539.0,101672.0,95740.0,108512.0,120887.0,12375.0
7,Medicare Prescription Drugs,0.0,0.0,58044.0,63644.0,70557.0,75515.0,85437.0,70330.0,22870.0,25887.0,27045.0,29251.0,40297.0,11046.0
8,Department of Veterans Affairs,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15280.0,16424.0,18781.0,21173.0,21785.0,30562.0,8777.0
9,Medicaid,64170.0,75087.0,91707.0,92641.0,93782.0,97208.0,105896.0,116471.0,131971.0,153938.0,165958.0,161582.0,169611.0,8029.0


In [70]:
# Repeat this to get the grand totals across all of government for each year during this same time frame

# Calculate the max date so far this year
max_date = withdrawals["record_date"].max()
current_year = max_date.year
days_elapsed = (max_date - pd.to_datetime(f"{current_year}-01-20")).days

# Create a list to store total spending for each year
yearly_totals = []

# Loop through each fiscal year
for year in range(2013, 2026):
    # Define the start and end dates for the fiscal year
    start_date = pd.to_datetime(f"{year}-01-20")
    end_date = start_date + pd.Timedelta(days=days_elapsed)
    
    # Filter withdrawals for the given fiscal year and time period
    filtered_withdrawals = withdrawals[
        (withdrawals["record_date"] >= start_date) &
        (withdrawals["record_date"] <= end_date)
    ]
    
    # Calculate the total spending for the year
    total_spending = filtered_withdrawals["transaction_today_amt"].sum()
    
    # Append the total spending to the list
    yearly_totals.append({"year": year, "total_spending": total_spending})

# Convert the yearly totals to a DataFrame
yearly_first_days_totals = pd.DataFrame(yearly_totals)

# Save the yearly totals DataFrame to a CSV file for further analysis
yearly_first_days_totals.to_csv("output/yearly_first_days_totals.csv", index=False)

# Display the info and first few rows of the yearly totals DataFrame
yearly_first_days_totals.info()
yearly_first_days_totals.head(20)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13 entries, 0 to 12
Data columns (total 2 columns):
 #   Column          Non-Null Count  Dtype
---  ------          --------------  -----
 0   year            13 non-null     int64
 1   total_spending  13 non-null     int64
dtypes: int64(2)
memory usage: 340.0 bytes


Unnamed: 0,year,total_spending
0,2013,1203918
1,2014,1222337
2,2015,1310908
3,2016,1326053
4,2017,1308815
5,2018,1357376
6,2019,1449014
7,2020,1902640
8,2021,2305574
9,2022,1852300
