In [1]:
import pandas as pd
import datawrapper as dw
import requests

In [2]:
# Load the latest clean deposits data from Treasury API following update
deposits_combined_file_path = "data/deposits.parquet"
deposits = pd.read_parquet(deposits_combined_file_path)

# Quick show of date range, info, head and date range of record_date
print(f"Date range of this data: from {deposits['record_date'].min()} to {deposits['record_date'].max()}")
deposits.info()
deposits.head(10)


Date range of this data: from 2012-10-01 00:00:00 to 2025-05-02 00:00:00
<class 'pandas.core.frame.DataFrame'>
Index: 129672 entries, 0 to 130798
Data columns (total 17 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   record_date              129672 non-null  datetime64[ns]
 1   account_type             129672 non-null  object        
 2   transaction_type         129672 non-null  object        
 3   transaction_catg         129672 non-null  object        
 4   transaction_catg_desc    129672 non-null  object        
 5   transaction_today_amt    129672 non-null  int64         
 6   transaction_mtd_amt      129672 non-null  object        
 7   transaction_fytd_amt     129672 non-null  object        
 8   table_nbr                129672 non-null  object        
 9   table_nm                 129672 non-null  object        
 10  src_line_nbr             129672 non-null  object        
 11  record_fis

Unnamed: 0,record_date,account_type,transaction_type,transaction_catg,transaction_catg_desc,transaction_today_amt,transaction_mtd_amt,transaction_fytd_amt,table_nbr,table_nm,src_line_nbr,record_fiscal_year,record_fiscal_quarter,record_calendar_year,record_calendar_quarter,record_calendar_month,record_calendar_day
0,2012-10-01,Federal Reserve Account,Deposits,Agriculture Loan Repayments (misc),,77,77,77,II,Deposits and Withdrawals of Operating Cash,1,2013,1,2012,4,10,1
1,2012-10-01,Short-Term Cash Investments,Deposits,Transfers from Federal Reserve Account (Table V),,0,0,0,II,Deposits and Withdrawals of Operating Cash,31,2013,1,2012,4,10,1
2,2012-10-01,Federal Reserve Account,Deposits,Transfers from Depositaries,,0,0,0,II,Deposits and Withdrawals of Operating Cash,29,2013,1,2012,4,10,1
3,2012-10-01,Federal Reserve Account,Deposits,Change in Balance of Uncollected Funds,,0,0,0,II,Deposits and Withdrawals of Operating Cash,28,2013,1,2012,4,10,1
4,2012-10-01,Federal Reserve Account,Deposits,Other Deposits,TARP,73,73,73,II,Deposits and Withdrawals of Operating Cash,26,2013,1,2012,4,10,1
5,2012-10-01,Federal Reserve Account,Deposits,Other Deposits,Natl Railroad Retirement Inv Trust,30,30,30,II,Deposits and Withdrawals of Operating Cash,25,2013,1,2012,4,10,1
6,2012-10-01,Federal Reserve Account,Deposits,Other Deposits,Minerals Management Svc/Collections,468,468,468,II,Deposits and Withdrawals of Operating Cash,24,2013,1,2012,4,10,1
7,2012-10-01,Federal Reserve Account,Deposits,Other Deposits,Medicare Premiums,100,100,100,II,Deposits and Withdrawals of Operating Cash,23,2013,1,2012,4,10,1
8,2012-10-01,Federal Reserve Account,Deposits,Other Deposits,Interior,58,58,58,II,Deposits and Withdrawals of Operating Cash,22,2013,1,2012,4,10,1
9,2012-10-01,Federal Reserve Account,Deposits,Other Deposits,Federal Housing Admin: Note Sales,75,75,75,II,Deposits and Withdrawals of Operating Cash,21,2013,1,2012,4,10,1


In [3]:
# Read category_crosswalk.csv
deposits_category_crosswalk_file_path = "data/deposits_category_crosswalk.csv"
deposits_category_crosswalk = pd.read_csv(deposits_category_crosswalk_file_path, keep_default_na=False)
# Display the info and first few rows of the crosswalk DataFrame
deposits_category_crosswalk.info()
deposits_category_crosswalk.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 157 entries, 0 to 156
Data columns (total 5 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   transaction_catg       157 non-null    object
 1   transaction_catg_desc  157 non-null    object
 2   category               157 non-null    object
 3   agency                 157 non-null    object
 4   cbs_notes              157 non-null    object
dtypes: object(5)
memory usage: 6.3+ KB


Unnamed: 0,transaction_catg,transaction_catg_desc,category,agency,cbs_notes
0,Legislative Branch - misc,,Legislative Branch,Congress,
1,Legislative Branch - Library of Congress,,Library of Congress,Congress,
2,Other Deposits,Library of Congress,Library of Congress,Congress,
3,Judicial Branch - Courts,,Judicial Branch,Courts,
4,District of Columbia,,District of Columbia,DC,
5,Customs and Certain Excise Taxes,,Customs and Certain Excise Taxes,DHS,
6,DHS - Customs and Certain Excise Taxes,,Customs and Certain Excise Taxes,DHS,
7,Dept of Homeland Security (DHS) - misc,,Department of Homeland Security,DHS,
8,Other Deposits,Emergency Prep & Response (DHS),Emergency Preparedness & Response,DHS,
9,DHS - Fed Emergency Mgmt Agency (FEMA),,FEMA,DHS,


In [4]:
# Merge data with crosswalk to standardize categories by adding new columns
deposits = deposits.merge(
    deposits_category_crosswalk, 
    how='left', 
    left_on=['transaction_catg', 'transaction_catg_desc'], 
    right_on=['transaction_catg', 'transaction_catg_desc']
)

# Display the info and first few rows of the updated DataFrame
deposits.info()
deposits.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 129672 entries, 0 to 129671
Data columns (total 20 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   record_date              129672 non-null  datetime64[ns]
 1   account_type             129672 non-null  object        
 2   transaction_type         129672 non-null  object        
 3   transaction_catg         129672 non-null  object        
 4   transaction_catg_desc    129672 non-null  object        
 5   transaction_today_amt    129672 non-null  int64         
 6   transaction_mtd_amt      129672 non-null  object        
 7   transaction_fytd_amt     129672 non-null  object        
 8   table_nbr                129672 non-null  object        
 9   table_nm                 129672 non-null  object        
 10  src_line_nbr             129672 non-null  object        
 11  record_fiscal_year       129672 non-null  object        
 12  record_fiscal_qu

Unnamed: 0,record_date,account_type,transaction_type,transaction_catg,transaction_catg_desc,transaction_today_amt,transaction_mtd_amt,transaction_fytd_amt,table_nbr,table_nm,src_line_nbr,record_fiscal_year,record_fiscal_quarter,record_calendar_year,record_calendar_quarter,record_calendar_month,record_calendar_day,category,agency,cbs_notes
0,2012-10-01,Federal Reserve Account,Deposits,Agriculture Loan Repayments (misc),,77,77,77,II,Deposits and Withdrawals of Operating Cash,1,2013,1,2012,4,10,1,Agriculture Loan Repayments,USDA,
1,2012-10-01,Short-Term Cash Investments,Deposits,Transfers from Federal Reserve Account (Table V),,0,0,0,II,Deposits and Withdrawals of Operating Cash,31,2013,1,2012,4,10,1,Transfers from Federal Reserve Account (Table V),,
2,2012-10-01,Federal Reserve Account,Deposits,Transfers from Depositaries,,0,0,0,II,Deposits and Withdrawals of Operating Cash,29,2013,1,2012,4,10,1,Transfers from Depositaries,,
3,2012-10-01,Federal Reserve Account,Deposits,Change in Balance of Uncollected Funds,,0,0,0,II,Deposits and Withdrawals of Operating Cash,28,2013,1,2012,4,10,1,Change in Balance of Uncollected Funds,,
4,2012-10-01,Federal Reserve Account,Deposits,Other Deposits,TARP,73,73,73,II,Deposits and Withdrawals of Operating Cash,26,2013,1,2012,4,10,1,Troubled Asset Relief Program,Treasury,
5,2012-10-01,Federal Reserve Account,Deposits,Other Deposits,Natl Railroad Retirement Inv Trust,30,30,30,II,Deposits and Withdrawals of Operating Cash,25,2013,1,2012,4,10,1,Railroad Retirement Investment Trust,,
6,2012-10-01,Federal Reserve Account,Deposits,Other Deposits,Minerals Management Svc/Collections,468,468,468,II,Deposits and Withdrawals of Operating Cash,24,2013,1,2012,4,10,1,Minerals Management Services Collections,DOI,
7,2012-10-01,Federal Reserve Account,Deposits,Other Deposits,Medicare Premiums,100,100,100,II,Deposits and Withdrawals of Operating Cash,23,2013,1,2012,4,10,1,Medicare Premiums,HHS,
8,2012-10-01,Federal Reserve Account,Deposits,Other Deposits,Interior,58,58,58,II,Deposits and Withdrawals of Operating Cash,22,2013,1,2012,4,10,1,Department of Interior,DOI,
9,2012-10-01,Federal Reserve Account,Deposits,Other Deposits,Federal Housing Admin: Note Sales,75,75,75,II,Deposits and Withdrawals of Operating Cash,21,2013,1,2012,4,10,1,Federal Housing Administration Note Sales,HUD,


In [5]:
# count instances of each category in deposits and 
category_counts_deposits = deposits['transaction_catg'].value_counts().reset_index()
category_counts_deposits.columns = ['transaction_catg', 'count']
# Display the info and first few rows of the category counts DataFrame
category_counts_deposits.info()
category_counts_deposits.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 108 entries, 0 to 107
Data columns (total 2 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   transaction_catg  108 non-null    object
 1   count             108 non-null    int64 
dtypes: int64(1), object(1)
memory usage: 1.8+ KB


Unnamed: 0,transaction_catg,count
0,Other Deposits,6636
1,Deposits by States,4212
2,Federal Reserve Earnings,3161
3,Postal Service,2763
4,Cash FTD's Received (Table IV),2604


In [6]:
# Count instances of each category and subcategory in deposits
subcategory_counts_deposits = deposits.groupby(['transaction_catg', 'transaction_catg_desc']).size().reset_index(name='count')

# Display the info and first few rows of the category counts DataFrame
subcategory_counts_deposits.info()
subcategory_counts_deposits.head()

# Output the category counts to a CSV file
subcategory_counts_deposits_file_path = "data/subcategory_counts_deposits.csv"
subcategory_counts_deposits.to_csv(subcategory_counts_deposits_file_path, index=False)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 157 entries, 0 to 156
Data columns (total 3 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   transaction_catg       157 non-null    object
 1   transaction_catg_desc  157 non-null    object
 2   count                  157 non-null    int64 
dtypes: int64(1), object(2)
memory usage: 3.8+ KB


In [7]:
# Quick report on this year so far
# Filter for deposits between Jan. 20, 2025, and the most recent date
filtered_deposits = deposits[
    (deposits["record_date"] >= "2025-01-20") & 
    (deposits["record_date"] <= pd.Timestamp.now())
]

# Group by transaction_catg and sum the transaction_today_amt for each category
category_deposits = (
    filtered_deposits
    .groupby("category", as_index=False)
    .agg({"transaction_today_amt": "sum"})
    .rename(columns={"transaction_today_amt": "total_deposits"})
)

# Sort by total_deposits in descending order
category_deposits = category_deposits.sort_values(by="total_deposits", ascending=False).reset_index(drop=True)

# Display the info and first few rows of the category deposits DataFrame
print(category_deposits.info())
print(category_deposits.head(10))

total_deposits_2025 = category_deposits["total_deposits"].sum()
print(f"Total deposits since Jan. 20, 2025: ${total_deposits_2025:,.2f}")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 78 entries, 0 to 77
Data columns (total 2 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   category        78 non-null     object
 1   total_deposits  78 non-null     int64 
dtypes: int64(1), object(1)
memory usage: 1.3+ KB
None
                                          category  total_deposits
0             Public Debt Cash Issues (Table IIIB)         8894178
1                      Taxes (Withheld Individual)         1074367
2  Taxes (Non Withheld Individual SECA Electronic)          367525
3       Taxes (Non Withheld Individual SECA Other)          251680
4                         Taxes (Corporate Income)          159324
5                 Customs and Certain Excise Taxes           44028
6           Federal Retirement Thrift Savings Plan           39517
7                                   Taxes (Excise)           31046
8                     United States Postal Service           277

In [8]:
# REMEMBER THIS IS FOR CALENDAR YEAR TOTALS ON DEPOSITS
# FOR INTEGRITY AND SANITY CHECKS

# Create an empty list to store yearly deposits DataFrames IN CALENDAR YEARS
yearly_cy_deposits_list = []

# Loop through each CALENDAR year
for year in range(2013, 2026):
    # Filter deposits for the given CALENDAR year using record_calendar_year
    filtered_deposits = deposits[deposits["record_calendar_year"] == str(year)]
    
    # Group by transaction_catg and transaction_catg_desc, and sum the transaction_today_amt for each category
    yearly_cy_deposits = (
        filtered_deposits
        .groupby(["category","agency"], as_index=False)
        .agg({"transaction_today_amt": "sum"})
        .rename(columns={"transaction_today_amt": f"cy_total_{year}"})
    )
    
    # Append the yearly deposits DataFrame to the list
    yearly_cy_deposits_list.append(yearly_cy_deposits)

# Merge all yearly deposits DataFrames into a single pivot table
total_cy_deposits = yearly_cy_deposits_list[0]
for yearly_cy_deposits in yearly_cy_deposits_list[1:]:
    total_cy_deposits = total_cy_deposits.merge(
        yearly_cy_deposits, 
        on=["category", "agency"], 
        how="outer"
    )

# Fill NaN values with 0 for categories that don't appear in some years
total_cy_deposits = total_cy_deposits.fillna(0)

# Sort the pivot table by total deposits in the most recent year (descending order) for the most recent full year
total_cy_deposits = total_cy_deposits.sort_values(by=f"cy_total_2024", ascending=False).reset_index(drop=True)

# Save the pivot table to a CSV file for further analysis
total_cy_deposits.to_csv("output/total_annual_deposits_CY_live.csv", index=False)

# Display the info and first few rows of the pivot table
total_cy_deposits.info()
total_cy_deposits.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 116 entries, 0 to 115
Data columns (total 15 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   category       116 non-null    object 
 1   agency         116 non-null    object 
 2   cy_total_2013  116 non-null    float64
 3   cy_total_2014  116 non-null    float64
 4   cy_total_2015  116 non-null    float64
 5   cy_total_2016  116 non-null    float64
 6   cy_total_2017  116 non-null    float64
 7   cy_total_2018  116 non-null    float64
 8   cy_total_2019  116 non-null    float64
 9   cy_total_2020  116 non-null    float64
 10  cy_total_2021  116 non-null    float64
 11  cy_total_2022  116 non-null    float64
 12  cy_total_2023  116 non-null    float64
 13  cy_total_2024  116 non-null    float64
 14  cy_total_2025  116 non-null    float64
dtypes: float64(13), object(2)
memory usage: 13.7+ KB


Unnamed: 0,category,agency,cy_total_2013,cy_total_2014,cy_total_2015,cy_total_2016,cy_total_2017,cy_total_2018,cy_total_2019,cy_total_2020,cy_total_2021,cy_total_2022,cy_total_2023,cy_total_2024,cy_total_2025
0,Public Debt Cash Issues (Table IIIB),,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5259391.0,23066959.0,29841122.0,10277579.0
1,Taxes (Withheld Individual),IRS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2684906.0,3295672.0,1253550.0
2,Taxes (Non Withheld Individual SECA Electronic),IRS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,412135.0,555701.0,440501.0
3,Taxes (Corporate Income),IRS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,462692.0,500689.0,179066.0
4,Taxes (Non Withheld Individual SECA Other),IRS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,392497.0,456694.0,267957.0


In [9]:
# REPEATING DEPOSIT TOTALS FOR FISCAL YEARS

# Create an empty list to store yearly deposits DataFrames IN CALENDAR YEARS
yearly_fy_deposits_list = []

# Loop through each CALENDAR year
for year in range(2013, 2026):
    # Filter deposits for the given CALENDAR year using record_calendar_year
    filtered_deposits = deposits[deposits["record_fiscal_year"] == str(year)]
    
    # Group by transaction_catg and transaction_catg_desc, and sum the transaction_today_amt for each category
    yearly_fy_deposits = (
        filtered_deposits
        .groupby(["category","agency"], as_index=False)
        .agg({"transaction_today_amt": "sum"})
        .rename(columns={"transaction_today_amt": f"fy_total_{year}"})
    )
    
    # Append the yearly deposits DataFrame to the list
    yearly_fy_deposits_list.append(yearly_fy_deposits)

# Merge all yearly deposits DataFrames into a single pivot table
total_fy_deposits = yearly_fy_deposits_list[0]
for yearly_fy_deposits in yearly_fy_deposits_list[1:]:
    total_fy_deposits = total_fy_deposits.merge(
        yearly_fy_deposits, 
        on=["category", "agency"], 
        how="outer"
    )

# Fill NaN values with 0 for categories that don't appear in some years
total_fy_deposits = total_fy_deposits.fillna(0)

# Sort the pivot table by total deposits in the most recent year (descending order) for the most recent full year
total_fy_deposits = total_fy_deposits.sort_values(by=f"fy_total_2024", ascending=False).reset_index(drop=True)

# Save the pivot table to a CSV file for further analysis
total_fy_deposits.to_csv("output/total_annual_deposits_FY_live.csv", index=False)

# Display the info and first few rows of the pivot table
total_fy_deposits.info()
total_fy_deposits.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 117 entries, 0 to 116
Data columns (total 15 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   category       117 non-null    object 
 1   agency         117 non-null    object 
 2   fy_total_2013  117 non-null    float64
 3   fy_total_2014  117 non-null    float64
 4   fy_total_2015  117 non-null    float64
 5   fy_total_2016  117 non-null    float64
 6   fy_total_2017  117 non-null    float64
 7   fy_total_2018  117 non-null    float64
 8   fy_total_2019  117 non-null    float64
 9   fy_total_2020  117 non-null    float64
 10  fy_total_2021  117 non-null    float64
 11  fy_total_2022  117 non-null    float64
 12  fy_total_2023  117 non-null    float64
 13  fy_total_2024  117 non-null    float64
 14  fy_total_2025  117 non-null    float64
dtypes: float64(13), object(2)
memory usage: 13.8+ KB


Unnamed: 0,category,agency,fy_total_2013,fy_total_2014,fy_total_2015,fy_total_2016,fy_total_2017,fy_total_2018,fy_total_2019,fy_total_2020,fy_total_2021,fy_total_2022,fy_total_2023,fy_total_2024,fy_total_2025
0,Public Debt Cash Issues (Table IIIB),,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,983652.0,20292348.0,28859579.0,18309472.0
1,Taxes (Withheld Individual),IRS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1901122.0,3234407.0,2098599.0
2,Taxes (Non Withheld Individual SECA Electronic),IRS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,334151.0,582115.0,492071.0
3,Taxes (Corporate Income),IRS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,308715.0,540255.0,293477.0
4,Taxes (Non Withheld Individual SECA Other),IRS,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,338724.0,473936.0,304488.0


In [10]:
# Calculate the max date so far this year
max_date = deposits["record_date"].max()
current_year = max_date.year
days_elapsed = (max_date - pd.to_datetime(f"{current_year}-01-20")).days

# Create an empty list to store yearly deposits DataFrames
yearly_deposits_list = []

# Loop through each fiscal year
for year in range(2013,2026):
    # Define the start and end dates for the fiscal year
    start_date = pd.to_datetime(f"{year}-01-20")
    end_date = start_date + pd.Timedelta(days=days_elapsed)
    
    # Filter for the given fiscal year and time period
    filtered_deposits = deposits[
        (deposits["record_date"] >= start_date) &
        (deposits["record_date"] <= end_date)
    ]
    
    # Group by transaction_catg and sum the transaction_today_amt for each category
    yearly_deposits = (
        filtered_deposits
        .groupby("category", as_index=False)
        .agg({"transaction_today_amt": "sum"})
        .rename(columns={"transaction_today_amt": f"{year}"})
    )
    
    # Append the yearly deposits DataFrame to the list
    yearly_deposits_list.append(yearly_deposits)

# Merge all yearly deposits DataFrames into a single pivot table
first_days_deposits = yearly_deposits_list[0]
for yearly_deposits in yearly_deposits_list[1:]:
    first_days_deposits = first_days_deposits.merge(yearly_deposits, on="category", how="outer")

# Fill NaN values with 0 for categories that don't appear in some years
first_days_deposits = first_days_deposits.fillna(0)

# add a column for increase in deposits from 2024 to 2025
first_days_deposits["change_1yr"] = first_days_deposits["2025"] - first_days_deposits["2024"]

# Sort the pivot table by total deposits in the most recent year (descending order)
first_days_deposits = first_days_deposits.sort_values(by=f"change_1yr", ascending=False).reset_index(drop=True)

# Save the pivot table to a CSV file for further analysis
first_days_deposits.to_csv("output/yearly_first_days_deposits_bycategory_live.csv", index=False)

# Display the info and first few rows of the pivot table
first_days_deposits.info()
# print days elapsed since jan 20
print(f"Days elapsed since January 20, 2025: {days_elapsed}")
first_days_deposits.head(50)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 105 entries, 0 to 104
Data columns (total 15 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   category    105 non-null    object 
 1   2013        105 non-null    float64
 2   2014        105 non-null    float64
 3   2015        105 non-null    float64
 4   2016        105 non-null    float64
 5   2017        105 non-null    float64
 6   2018        105 non-null    float64
 7   2019        105 non-null    float64
 8   2020        105 non-null    float64
 9   2021        105 non-null    float64
 10  2022        105 non-null    float64
 11  2023        105 non-null    float64
 12  2024        105 non-null    float64
 13  2025        105 non-null    float64
 14  change_1yr  105 non-null    float64
dtypes: float64(14), object(1)
memory usage: 12.4+ KB
Days elapsed since January 20, 2025: 102


Unnamed: 0,category,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025,change_1yr
0,Public Debt Cash Issues (Table IIIB),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5560743.0,8256545.0,8894178.0,637633.0
1,Taxes (Non Withheld Individual SECA Electronic),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,218738.0,283651.0,367525.0,83874.0
2,Taxes (Withheld Individual),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,733351.0,997151.0,1074367.0,77216.0
3,Federal Retirement Thrift Savings Plan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8045.0,10234.0,39517.0,29283.0
4,Taxes (Corporate Income),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,112272.0,137890.0,159324.0,21434.0
5,Taxes (Non Withheld Individual SECA Other),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,189779.0,231336.0,251680.0,20344.0
6,Customs and Certain Excise Taxes,12583.0,13390.0,14105.0,13686.0,13765.0,15353.0,24719.0,25895.0,29712.0,37659.0,30246.0,29470.0,44028.0,14558.0
7,Taxes (Excise),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20138.0,25374.0,31046.0,5672.0
8,Unemployment Insurance Deposits by States,20410.0,18405.0,15277.0,9778.0,14811.0,16895.0,16454.0,13505.0,11727.0,18502.0,18696.0,16629.0,18157.0,1528.0
9,Federal Reserve Earnings,18665.0,30725.0,24285.0,28200.0,23574.0,22811.0,13913.0,22427.0,22992.0,36829.0,88.0,655.0,2082.0,1427.0


In [11]:
# Filter first_days_deposits to show only categories with more than $100 million
first_days_deposits_100m = first_days_deposits[first_days_deposits["2025"] >= 100]
# filter out the category "Unclassified"
first_days_deposits_100m = first_days_deposits_100m[first_days_deposits_100m["category"] != "Unclassified"]

# show me the whole table
print(first_days_deposits_100m)

# Save the filtered DataFrame to a CSV file
first_days_deposits_100m.to_csv("output/yearly_first_days_deposits_bycategory_100m_live.csv", index=False)



                                            category    2013    2014    2015  \
0               Public Debt Cash Issues (Table IIIB)     0.0     0.0     0.0   
1    Taxes (Non Withheld Individual SECA Electronic)     0.0     0.0     0.0   
2                        Taxes (Withheld Individual)     0.0     0.0     0.0   
3             Federal Retirement Thrift Savings Plan     0.0     0.0     0.0   
4                           Taxes (Corporate Income)     0.0     0.0     0.0   
..                                               ...     ...     ...     ...   
100               Independent Agencies miscellaneous     0.0     0.0     0.0   
101                      International Monetary Fund  1779.0  3366.0  6240.0   
102                                    Seized Assets     0.0     0.0     0.0   
103                           Foreign Military Sales  7090.0  9092.0  7662.0   
104                   Federal Deposit Insurance Corp     0.0     0.0     0.0   

        2016    2017     2018     2019 

In [12]:
# Repeat this to get the grand total deposits across all of government for each year during this same time frame

# Calculate the max date so far this year
max_date = deposits["record_date"].max()
current_year = max_date.year
days_elapsed = (max_date - pd.to_datetime(f"{current_year}-01-20")).days

# Create a list to store total deposits for each year
yearly_totals = []

# Loop through each fiscal year
for year in range(2013, 2026):
    # Define the start and end dates for the fiscal year
    start_date = pd.to_datetime(f"{year}-01-20")
    end_date = start_date + pd.Timedelta(days=days_elapsed)
    
    # Filter deposits for the given fiscal year and time period
    filtered_deposits = deposits[
        (deposits["record_date"] >= start_date) &
        (deposits["record_date"] <= end_date)
    ]
    
    # Calculate the total deposits for the year
    total_deposits = filtered_deposits["transaction_today_amt"].sum()
    
    # Append the total deposits to the list
    yearly_totals.append({"year": year, "total_deposits": total_deposits})

# Convert the yearly totals to a DataFrame
yearly_first_days_totals = pd.DataFrame(yearly_totals)

# Save the yearly totals DataFrame to a CSV file for further analysis
yearly_first_days_totals.to_csv("output/yearly_first_days_deposits_totals_live.csv", index=False)

# Display the info and first few rows of the yearly totals DataFrame
yearly_first_days_totals.info()
yearly_first_days_totals.head(20)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13 entries, 0 to 12
Data columns (total 2 columns):
 #   Column          Non-Null Count  Dtype
---  ------          --------------  -----
 0   year            13 non-null     int64
 1   total_deposits  13 non-null     int64
dtypes: int64(2)
memory usage: 340.0 bytes


Unnamed: 0,year,total_deposits
0,2013,3657839
1,2014,3359820
2,2015,3462914
3,2016,3697790
4,2017,3846308
5,2018,4334982
6,2019,5036003
7,2020,6528970
8,2021,7386210
9,2022,7167204


In [13]:
# To create day by day charts for web, we're going to create a df with every date and number of days since Jan 20
# Filter all_dates to include only dates from January 20 to April 30 for each year
all_dates = pd.date_range(start="2013-01-20", end="2025-04-30", freq='D')
all_dates_df = pd.DataFrame({
    "record_date": all_dates,
    "day_number": all_dates.map(lambda date: (date - pd.Timestamp(f"{date.year}-01-20")).days + 1 if date <= pd.Timestamp(f"{date.year}-04-30") else 0)
})

# Ensure that only valid day numbers are included (greater than 0)
all_dates_df = all_dates_df[all_dates_df["day_number"] > 0]
# Remove rows with day_number greater than 100
all_dates_df = all_dates_df[all_dates_df["day_number"] <= 100]

# Show info and header of the filtered all_dates_df
all_dates_df.info()
all_dates_df.head(2000)


<class 'pandas.core.frame.DataFrame'>
Index: 1300 entries, 0 to 4482
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   record_date  1300 non-null   datetime64[ns]
 1   day_number   1300 non-null   int64         
dtypes: datetime64[ns](1), int64(1)
memory usage: 30.5 KB


Unnamed: 0,record_date,day_number
0,2013-01-20,1
1,2013-01-21,2
2,2013-01-22,3
3,2013-01-23,4
4,2013-01-24,5
...,...,...
4478,2025-04-25,96
4479,2025-04-26,97
4480,2025-04-27,98
4481,2025-04-28,99


In [14]:
# Create a new cell for daily totals, cumulative totals, and day numbers

# Calculate the max date so far this year
max_date = deposits["record_date"].max()
current_year = max_date.year
days_elapsed = (max_date - pd.to_datetime(f"{current_year}-01-20")).days

# Create an empty list to store daily deposits DataFrames
daily_deposits_list = []

# Loop through each fiscal year
for year in range(2013, 2026):
    # Define the start and end dates for the fiscal year
    start_date = pd.to_datetime(f"{year}-01-20")
    end_date = start_date + pd.Timedelta(days=days_elapsed)
    
    # Filter deposits for the given fiscal year and time period
    filtered_deposits = deposits[
        (deposits["record_date"] >= start_date) &
        (deposits["record_date"] <= end_date)
    ]
    
    # Group by record_date and calculate the total deposits for each day
    daily_deposits = (
        filtered_deposits
        .groupby("record_date", as_index=False)
        .agg({"transaction_today_amt": "sum"})
        .rename(columns={"transaction_today_amt": "daily_total"})
    )
    
    # Add a cumulative total column
    daily_deposits["cumulative_total"] = daily_deposits["daily_total"].cumsum()
    
    # Add the year column for reference
    daily_deposits["year"] = year
    
    # Add the day_number column
    # daily_deposits["day_number"] = (daily_deposits["record_date"] - start_date).dt.days + 1
    
    # Append the daily deposits DataFrame to the list
    daily_deposits_list.append(daily_deposits)

# Concatenate all daily deposits DataFrames into a single DataFrame
all_daily_deposits = pd.concat(daily_deposits_list, ignore_index=True)

# Display the info and first few rows of the daily deposits DataFrame
all_daily_deposits.info()
all_daily_deposits.head(200)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 940 entries, 0 to 939
Data columns (total 4 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   record_date       940 non-null    datetime64[ns]
 1   daily_total       940 non-null    int64         
 2   cumulative_total  940 non-null    int64         
 3   year              940 non-null    int64         
dtypes: datetime64[ns](1), int64(3)
memory usage: 29.5 KB


Unnamed: 0,record_date,daily_total,cumulative_total,year
0,2013-01-22,23287,23287,2013
1,2013-01-23,26888,50175,2013
2,2013-01-24,102298,152473,2013
3,2013-01-25,11061,163534,2013
4,2013-01-28,15835,179369,2013
...,...,...,...,...
195,2015-04-01,28912,2256511,2015
196,2015-04-02,123834,2380345,2015
197,2015-04-03,12423,2392768,2015
198,2015-04-06,16054,2408822,2015


In [15]:
# Merge the all_dates_df with the all_daily_deposits DataFrame to include all dates
all_dates_with_deposits = all_dates_df.merge(
    all_daily_deposits, 
    on="record_date", 
    how="left"
)

# Make sure we're sorted by record_date
all_dates_with_deposits = all_dates_with_deposits.sort_values(by="record_date").reset_index(drop=True)

# Fill NaN values in daily_total with 0
all_dates_with_deposits["daily_total"] = all_dates_with_deposits["daily_total"].fillna(0)

# In first two rows for dates Jan 20-21 of 2013 replace daily_total and cumulative_total with 0
all_dates_with_deposits.loc[
    (all_dates_with_deposits["record_date"].isin(["2013-01-20", "2013-01-21"])), 
    ["daily_total", "cumulative_total"]
] = 0.0

# Replace cumulative_total with 0 where day_number equals 1 and cumulative_total is NaN
all_dates_with_deposits.loc[
    (all_dates_with_deposits["day_number"] == 1) & (all_dates_with_deposits["cumulative_total"].isna()), 
    "cumulative_total"
] = 0.0

# Fill NaN values in cumulative_total with the prior figure
all_dates_with_deposits["cumulative_total"] = all_dates_with_deposits["cumulative_total"].fillna(method='ffill')

# Repopulate year with the year from the record_date
all_dates_with_deposits["year"] = all_dates_with_deposits["record_date"].dt.year

# Display the info and first few rows of the daily deposits DataFrame
all_dates_with_deposits.info()
all_dates_with_deposits.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1300 entries, 0 to 1299
Data columns (total 5 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   record_date       1300 non-null   datetime64[ns]
 1   day_number        1300 non-null   int64         
 2   daily_total       1300 non-null   float64       
 3   cumulative_total  1300 non-null   float64       
 4   year              1300 non-null   int32         
dtypes: datetime64[ns](1), float64(2), int32(1), int64(1)
memory usage: 45.8 KB


  (all_dates_with_deposits["record_date"].isin(["2013-01-20", "2013-01-21"])),
  all_dates_with_deposits["cumulative_total"] = all_dates_with_deposits["cumulative_total"].fillna(method='ffill')


Unnamed: 0,record_date,day_number,daily_total,cumulative_total,year
0,2013-01-20,1,0.0,0.0,2013
1,2013-01-21,2,0.0,0.0,2013
2,2013-01-22,3,23287.0,23287.0,2013
3,2013-01-23,4,26888.0,50175.0,2013
4,2013-01-24,5,102298.0,152473.0,2013


In [16]:
# Create pivoted table for use in web charts of first 100 days total deposits by year

# Pivot the DataFrame to have dates as rows, years as columns, and cumulative totals as values
pivoted_deposits = all_dates_with_deposits.pivot(index="day_number", columns="year", values="cumulative_total")

# Save the pivoted DataFrame to a CSV file for further analysis
pivoted_deposits.to_csv("output/first100_cum_deposits_by_year_live.csv")

# Display the info and first few rows of the pivoted DataFrame
pivoted_deposits.info()
pivoted_deposits.head(100)



<class 'pandas.core.frame.DataFrame'>
Index: 100 entries, 1 to 100
Data columns (total 13 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   2013    100 non-null    float64
 1   2014    100 non-null    float64
 2   2015    100 non-null    float64
 3   2016    100 non-null    float64
 4   2017    100 non-null    float64
 5   2018    100 non-null    float64
 6   2019    100 non-null    float64
 7   2020    100 non-null    float64
 8   2021    100 non-null    float64
 9   2022    100 non-null    float64
 10  2023    100 non-null    float64
 11  2024    100 non-null    float64
 12  2025    100 non-null    float64
dtypes: float64(13)
memory usage: 10.9 KB


year,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,2024,2025
day_number,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,0.0,0.0,24370.0,43898.0,12081.0,0.0,0.0,0.0,30477.0,149089.0,23050.0,0.0,0.0
2,0.0,24469.0,70306.0,171679.0,12081.0,0.0,0.0,105043.0,231901.0,173066.0,23050.0,0.0,287576.0
3,23287.0,61580.0,166260.0,183553.0,12081.0,24874.0,101051.0,136876.0,253645.0,173066.0,23050.0,36606.0,304723.0
4,50175.0,143908.0,177238.0,183553.0,38315.0,40170.0,131430.0,241710.0,253645.0,173066.0,57323.0,282671.0,701824.0
5,152473.0,153765.0,177238.0,183553.0,58618.0,64932.0,231705.0,258897.0,253645.0,209658.0,310874.0,319691.0,727146.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
96,3329962.0,3029639.0,3121336.0,3436794.0,3444632.0,3910879.0,4623005.0,5821987.0,6703590.0,6518512.0,6697938.0,9324274.0,10151249.0
97,3347609.0,3029639.0,3121336.0,3473457.0,3467493.0,4089267.0,4640116.0,5821987.0,6730482.0,6666525.0,6729126.0,9557669.0,10151249.0
98,3347609.0,3029639.0,3149429.0,3515997.0,3634940.0,4109959.0,4640116.0,5821987.0,6862744.0,6703350.0,6863674.0,9581418.0,10151249.0
99,3347609.0,3053618.0,3175723.0,3536982.0,3673178.0,4109959.0,4640116.0,5838704.0,6882732.0,6838961.0,6912322.0,9581418.0,10197607.0
