In [30]:
import pandas as pd
import numpy as np
import datawrapper as dw
import janitor
import requests

In [31]:
# Load the data from the Daily Treasury Statement that includes 10 years of transactions by federal agencies, specify column 4 is a string
# Will add link here to the documentation for the Daily Treasury Statement
# https://fiscal.treasury.gov/reports-statements/dts/
operating_cash = pd.read_csv(
    "data/DTS_OpCashDpstWdrl_20150417_20250416.csv",
    dtype={4: str},
)

# Clean column names
operating_cash = operating_cash.clean_names()

# Convert record_date to datetime
operating_cash["record_date"] = pd.to_datetime(operating_cash["record_date"])

# Show me the information about the data
operating_cash.info()
operating_cash.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 279003 entries, 0 to 279002
Data columns (total 17 columns):
 #   Column                            Non-Null Count   Dtype         
---  ------                            --------------   -----         
 0   record_date                       279003 non-null  datetime64[ns]
 1   type_of_account                   279003 non-null  object        
 2   transaction_type                  279003 non-null  object        
 3   transaction_category              277501 non-null  object        
 4   transaction_category_description  17328 non-null   object        
 5   transactions_today                279003 non-null  int64         
 6   transactions_month_to_date        279003 non-null  int64         
 7   transactions_fiscal_year_to_date  279003 non-null  int64         
 8   table_number                      279003 non-null  object        
 9   table_name                        279003 non-null  object        
 10  source_line_number              

Unnamed: 0,record_date,type_of_account,transaction_type,transaction_category,transaction_category_description,transactions_today,transactions_month_to_date,transactions_fiscal_year_to_date,table_number,table_name,source_line_number,fiscal_year,fiscal_quarter_number,calendar_year,calendar_quarter_number,calendar_month_number,calendar_day_number
0,2025-04-16,Treasury General Account (TGA),Deposits,Dept of Agriculture (USDA) - misc,,13,303,2303,II,Deposits and Withdrawals of Operating Cash,1,2025,3,2025,2,4,16
1,2025-04-16,Treasury General Account (TGA),Deposits,USDA - Commodity Credit Corporation,,36,379,4583,II,Deposits and Withdrawals of Operating Cash,2,2025,3,2025,2,4,16
2,2025-04-16,Treasury General Account (TGA),Deposits,USDA - Federal Crop Insurance Corp Fund,,0,0,33,II,Deposits and Withdrawals of Operating Cash,3,2025,3,2025,2,4,16
3,2025-04-16,Treasury General Account (TGA),Deposits,USDA - Loan Repayments,,12,325,4501,II,Deposits and Withdrawals of Operating Cash,4,2025,3,2025,2,4,16
4,2025-04-16,Treasury General Account (TGA),Deposits,Dept of Commerce (DOC),,16,189,2990,II,Deposits and Withdrawals of Operating Cash,5,2025,3,2025,2,4,16


In [32]:
# Create a dataframe of only withdrawals
# We need to exclude any transactions where the transaction_category contains the string "Public Debt Cash Redemptions" because these are not withdrawals that indicate spending
# Will add more explanation here and link to the documentation for the transaction_category
withdrawals = operating_cash[
    (operating_cash["transaction_type"] == "Withdrawals") &
    (operating_cash["transaction_category"] != "Public Debt Cash Redemp. (Table IIIB)") &
    (operating_cash["transaction_category"] != "Public Debt Cash Redemp. (Table III-B)")
].copy()

# Show info and head
withdrawals.info()
withdrawals.head()


<class 'pandas.core.frame.DataFrame'>
Index: 162135 entries, 30 to 279002
Data columns (total 17 columns):
 #   Column                            Non-Null Count   Dtype         
---  ------                            --------------   -----         
 0   record_date                       162135 non-null  datetime64[ns]
 1   type_of_account                   162135 non-null  object        
 2   transaction_type                  162135 non-null  object        
 3   transaction_category              161384 non-null  object        
 4   transaction_category_description  9861 non-null    object        
 5   transactions_today                162135 non-null  int64         
 6   transactions_month_to_date        162135 non-null  int64         
 7   transactions_fiscal_year_to_date  162135 non-null  int64         
 8   table_number                      162135 non-null  object        
 9   table_name                        162135 non-null  object        
 10  source_line_number                16

Unnamed: 0,record_date,type_of_account,transaction_type,transaction_category,transaction_category_description,transactions_today,transactions_month_to_date,transactions_fiscal_year_to_date,table_number,table_name,source_line_number,fiscal_year,fiscal_quarter_number,calendar_year,calendar_quarter_number,calendar_month_number,calendar_day_number
30,2025-04-16,Treasury General Account (TGA),Withdrawals,Unclassified,,1279,17193,144029,II,Deposits and Withdrawals of Operating Cash,182,2025,3,2025,2,4,16
81,2025-04-16,Treasury General Account (TGA),Withdrawals,Corporation for Public Broadcasting,,0,0,535,II,Deposits and Withdrawals of Operating Cash,82,2025,3,2025,2,4,16
82,2025-04-16,Treasury General Account (TGA),Withdrawals,Dept of Agriculture (USDA) - misc,,73,1341,16915,II,Deposits and Withdrawals of Operating Cash,83,2025,3,2025,2,4,16
83,2025-04-16,Treasury General Account (TGA),Withdrawals,USDA - Child Nutrition,,260,1303,17986,II,Deposits and Withdrawals of Operating Cash,84,2025,3,2025,2,4,16
84,2025-04-16,Treasury General Account (TGA),Withdrawals,USDA - Commodity Credit Corporation,,17,145,9469,II,Deposits and Withdrawals of Operating Cash,85,2025,3,2025,2,4,16


In [33]:
# We want to sum withdrawals by transaction_category and record_date for the last 10 years

# Calculate max date
max_date = withdrawals["record_date"].max()

# Calculate days between January 20, 2025 and max_date
days_passed = (max_date - pd.to_datetime("2025-01-20")).days

# Create a new dataframe to store spending for each year
spending_years = []

# Loop through each year from 2015 to 2025
for year in range(2016, 2026):
    start_date = pd.to_datetime(f"{year}-01-20")
    end_date = start_date + pd.Timedelta(days=days_passed)
    
    # Fill missing values in transaction_category_description with "Unknown"
    withdrawals["transaction_category_description"] = withdrawals["transaction_category_description"].fillna("NA")
    
    # Filter withdrawals for the given year and date range
    yearly_spending = (
        withdrawals[
            (withdrawals["record_date"] >= start_date) &
            (withdrawals["record_date"] <= end_date)
        ]
        .groupby(["transaction_category", "transaction_category_description"])
        .agg({"transactions_today": "sum"})
        .rename(columns={"transactions_today": f"spending_{year}"})
    )
    
    # Append the yearly spending to the list
    spending_years.append(yearly_spending)

# Combine all yearly spending into a single dataframe
spending = pd.concat(spending_years, axis=1).reset_index()
# Show me the info and head of the spending dataframe
spending.info()
spending.head(100)

# Save the spending dataframe to a CSV file for further analysis
spending.to_csv("agency_spending_by_year.csv", index=False)

# Display the first few rows to confirm the data
spending.head(25)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 202 entries, 0 to 201
Data columns (total 12 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   transaction_category              202 non-null    object 
 1   transaction_category_description  202 non-null    object 
 2   spending_2016                     68 non-null     float64
 3   spending_2017                     67 non-null     float64
 4   spending_2018                     69 non-null     float64
 5   spending_2019                     69 non-null     float64
 6   spending_2020                     93 non-null     float64
 7   spending_2021                     104 non-null    float64
 8   spending_2022                     114 non-null    float64
 9   spending_2023                     107 non-null    float64
 10  spending_2024                     100 non-null    float64
 11  spending_2025                     101 non-null    float64
dtypes: float

Unnamed: 0,transaction_category,transaction_category_description,spending_2016,spending_2017,spending_2018,spending_2019,spending_2020,spending_2021,spending_2022,spending_2023,spending_2024,spending_2025
0,Commodity Credit Corporation programs,,2068.0,2030.0,1981.0,4591.0,4539.0,,,,,
1,Defense Vendor Payments (EFT),,71568.0,66720.0,73093.0,86339.0,88177.0,88546.0,92936.0,87938.0,,
2,Education Department programs,,50252.0,48309.0,45314.0,46682.0,,,,,,
3,Energy Department programs,,8274.0,8003.0,7930.0,8702.0,,,,,,
4,Fed. Highway Administration programs,,8493.0,7732.0,8173.0,7879.0,9321.0,,,,,
5,Federal Employees Insurance Payments,,18730.0,18638.0,19104.0,19887.0,21389.0,21959.0,22324.0,,,
6,Federal Salaries (EFT),,40224.0,41129.0,42959.0,45436.0,45649.0,50904.0,52506.0,55286.0,56700.0,60321.0
7,Food and Nutrition Service (misc),,7662.0,7718.0,7770.0,7483.0,7489.0,,,,,
8,GSA programs,,4641.0,4871.0,5054.0,5493.0,,,,,,
9,Health and Human Services Grants (misc),,25582.0,26668.0,27610.0,29917.0,17098.0,,,,,


In [34]:
# Sum each spending column to get the total for each year
spending_totals = spending.drop(columns=["transaction_category","transaction_category_description"]).sum().reset_index()

# Rename the columns for clarity
spending_totals.columns = ["year", "total_spending"]

# remove spending_ from the year column
spending_totals["year"] = spending_totals["year"].str.replace("spending_", "")
# Convert the year column to integer
spending_totals["year"] = spending_totals["year"].astype(int)

# Save csv year totals
spending_totals.to_csv("total_spending_by_year.csv", index=False)

# Display the new DataFrame
spending_totals

Unnamed: 0,year,total_spending
0,2016,1261887.0
1,2017,1227202.0
2,2018,1284396.0
3,2019,1368684.0
4,2020,1741253.0
5,2021,2199428.0
6,2022,1734202.0
7,2023,3632816.0
8,2024,1788200.0
9,2025,2020041.0


In [None]:
import requests
import pandas as pd

# Define the base URL and parameters
url = "https://api.fiscaldata.treasury.gov/services/api/fiscal_service/v1/accounting/dts/deposits_withdrawals_operating_cash"
params = {
    "fields": "record_date,transaction_type,transaction_catg,transaction_catg_desc,transaction_today_amt",
    "filter": "transaction_type:eq:Withdrawals",
    "sort": "-record_date",
    "page[size]": 10000  # Number of records per page
}

# Initialize an empty list to store the data
all_data = []

# Loop through page numbers 1 to 100
for page_number in range(1, 20):
    print(f"Fetching page {page_number}...")
    params["page[number]"] = page_number  # Add the current page number to the parameters
    
    # Make the GET request
    response = requests.get(url, params=params)
    
    # Check the response status
    if response.status_code == 200:
        data = response.json()  # Parse the JSON response
        
        # Extract the 'data' field and append it to the list
        if "data" in data:
            all_data.extend(data["data"])
        else:
            print(f"No data found on page {page_number}.")
            break  # Stop if no data is returned
    else:
        print(f"Request failed on page {page_number} with status code {response.status_code}: {response.text}")
        break  # Stop if the request fails

# Convert the list of data to a DataFrame
withdrawals_api = pd.DataFrame(all_data)

# Convert record_date to datetime
withdrawals_api["record_date"] = pd.to_datetime(withdrawals_api["record_date"])
# Clean column names using pyjanitor
withdrawals_api = withdrawals_api.clean_names()

# Display DataFrame info
withdrawals_api.info()
# print max date
max_date_api = withdrawals_api["record_date"].max()
print(f"Max date in API data: {max_date_api}")
#print min date
min_date_api = withdrawals_api["record_date"].min()
print(f"Min date in API data: {min_date_api}")
withdrawals_api.head(25)

In [41]:
import requests
import pandas as pd

# Define the base URL
url = "https://api.fiscaldata.treasury.gov/services/api/fiscal_service/v1/accounting/dts/deposits_withdrawals_operating_cash"

# Initialize an empty list to store the data
all_data = []

# Loop through fiscal years and quarters
for year in range(2016, 2025):  # From 2015 to 2024
    for quarter in range(1, 5):  # Quarters 1 to 4
        print(f"Fetching data for FY {year}, Quarter {quarter}...")
        
        # Define the parameters dynamically for each year and quarter
        params = {
            "filter": f"transaction_type:eq:Withdrawals,record_fiscal_year:eq:{year},record_fiscal_quarter:eq:{quarter}",
            "sort": "-record_date",
            "page[size]": 10000  # Number of records per page
        }
        
        # Make the GET request
        response = requests.get(url, params=params)
        
        # Check the response status
        if response.status_code == 200:
            data = response.json()  # Parse the JSON response
            
            # Extract the 'data' field and append it to the list
            if "data" in data:
                all_data.extend(data["data"])
            else:
                print(f"No data found for FY {year}, Quarter {quarter}.")
        else:
            print(f"Request failed for FY {year}, Quarter {quarter} with status code {response.status_code}: {response.text}")

# Convert the list of data to a DataFrame
withdrawals_api = pd.DataFrame(all_data)

# Convert record_date to datetime
if not withdrawals_api.empty:
    withdrawals_api["record_date"] = pd.to_datetime(withdrawals_api["record_date"])
    # Clean column names using pyjanitor
    withdrawals_api = withdrawals_api.clean_names()

# Display DataFrame info
withdrawals_api.info()

# Print the range of dates in the data
if not withdrawals_api.empty:
    max_date_api = withdrawals_api["record_date"].max()
    min_date_api = withdrawals_api["record_date"].min()
    print(f"Max date in API data: {max_date_api}")
    print(f"Min date in API data: {min_date_api}")

# Display the first few rows
withdrawals_api.head(25)

Fetching data for FY 2016, Quarter 1...
Fetching data for FY 2016, Quarter 2...
Fetching data for FY 2016, Quarter 3...
Fetching data for FY 2016, Quarter 4...
Fetching data for FY 2017, Quarter 1...
Fetching data for FY 2017, Quarter 2...
Fetching data for FY 2017, Quarter 3...
Fetching data for FY 2017, Quarter 4...
Fetching data for FY 2018, Quarter 1...
Fetching data for FY 2018, Quarter 2...
Fetching data for FY 2018, Quarter 3...
Fetching data for FY 2018, Quarter 4...
Fetching data for FY 2019, Quarter 1...
Fetching data for FY 2019, Quarter 2...
Fetching data for FY 2019, Quarter 3...
Fetching data for FY 2019, Quarter 4...
Fetching data for FY 2020, Quarter 1...
Fetching data for FY 2020, Quarter 2...
Fetching data for FY 2020, Quarter 3...
Fetching data for FY 2020, Quarter 4...
Fetching data for FY 2021, Quarter 1...
Fetching data for FY 2021, Quarter 2...
Fetching data for FY 2021, Quarter 3...
Fetching data for FY 2021, Quarter 4...
Fetching data for FY 2022, Quarter 1...


Unnamed: 0,record_date,account_type,transaction_type,transaction_catg,transaction_catg_desc,transaction_today_amt,transaction_mtd_amt,transaction_fytd_amt,table_nbr,table_nm,src_line_nbr,record_fiscal_year,record_fiscal_quarter,record_calendar_year,record_calendar_quarter,record_calendar_month,record_calendar_day
0,2015-12-31,Federal Reserve Account,Withdrawals,Commodity Credit Corporation programs,,87,2352,11308,II,Deposits and Withdrawals of Operating Cash,33,2016,1,2015,4,12,31
1,2015-12-31,Federal Reserve Account,Withdrawals,Defense Vendor Payments (EFT),,1113,29035,74472,II,Deposits and Withdrawals of Operating Cash,34,2016,1,2015,4,12,31
2,2015-12-31,Federal Reserve Account,Withdrawals,Education Department programs,,964,11165,34592,II,Deposits and Withdrawals of Operating Cash,35,2016,1,2015,4,12,31
3,2015-12-31,Federal Reserve Account,Withdrawals,Energy Department programs,,239,2938,8333,II,Deposits and Withdrawals of Operating Cash,36,2016,1,2015,4,12,31
4,2015-12-31,Federal Reserve Account,Withdrawals,Federal Employees Insurance Payments,,292,6526,18729,II,Deposits and Withdrawals of Operating Cash,37,2016,1,2015,4,12,31
5,2015-12-31,Federal Reserve Account,Withdrawals,Fed. Highway Administration programs,,2,3390,10421,II,Deposits and Withdrawals of Operating Cash,38,2016,1,2015,4,12,31
6,2015-12-31,Federal Reserve Account,Withdrawals,Federal Salaries (EFT),,3375,17042,43898,II,Deposits and Withdrawals of Operating Cash,39,2016,1,2015,4,12,31
7,2015-12-31,Federal Reserve Account,Withdrawals,Food and Nutrition Service (misc),,89,3050,8010,II,Deposits and Withdrawals of Operating Cash,40,2016,1,2015,4,12,31
8,2015-12-31,Federal Reserve Account,Withdrawals,GSA programs,,47,1589,4403,II,Deposits and Withdrawals of Operating Cash,41,2016,1,2015,4,12,31
9,2015-12-31,Federal Reserve Account,Withdrawals,Health and Human Services Grants (misc),,233,8482,23854,II,Deposits and Withdrawals of Operating Cash,42,2016,1,2015,4,12,31


In [42]:
import requests
import pandas as pd

# Define the base URL
url = "https://api.fiscaldata.treasury.gov/services/api/fiscal_service/v1/accounting/dts/deposits_withdrawals_operating_cash"

# Initialize an empty list to store the data
latest_data = []

# Loop through FY2025 and its quarters
for year in range(2025, 2026):  # FY2025 and beyond
    for quarter in range(1, 5):  # Quarters 1 to 4
        print(f"Fetching data for FY {year}, Quarter {quarter}...")
        
        # Define the parameters dynamically for each year and quarter
        params = {
            "filter": f"transaction_type:eq:Withdrawals,record_fiscal_year:eq:{year},record_fiscal_quarter:eq:{quarter}",
            "sort": "-record_date",
            "page[size]": 10000  # Number of records per page
        }
        
        # Make the GET request
        response = requests.get(url, params=params)
        
        # Check the response status
        if response.status_code == 200:
            data = response.json()  # Parse the JSON response
            
            # Extract the 'data' field and append it to the list
            if "data" in data:
                latest_data.extend(data["data"])
            else:
                print(f"No data found for FY {year}, Quarter {quarter}.")
        else:
            print(f"Request failed for FY {year}, Quarter {quarter} with status code {response.status_code}: {response.text}")

# Convert the list of data to a DataFrame
latest_withdrawals = pd.DataFrame(latest_data)

# Convert record_date to datetime
if not latest_withdrawals.empty:
    latest_withdrawals["record_date"] = pd.to_datetime(latest_withdrawals["record_date"])
    # Clean column names using pyjanitor
    latest_withdrawals = latest_withdrawals.clean_names()

# Display DataFrame info
latest_withdrawals.info()

# Print the range of dates in the data
if not latest_withdrawals.empty:
    max_date_latest = latest_withdrawals["record_date"].max()
    min_date_latest = latest_withdrawals["record_date"].min()
    print(f"Max date in latest data: {max_date_latest}")
    print(f"Min date in latest data: {min_date_latest}")

# Display the first few rows
latest_withdrawals.head(25)

Fetching data for FY 2025, Quarter 1...
Fetching data for FY 2025, Quarter 2...
Fetching data for FY 2025, Quarter 3...
Fetching data for FY 2025, Quarter 4...
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14008 entries, 0 to 14007
Data columns (total 17 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   record_date              14008 non-null  datetime64[ns]
 1   account_type             14008 non-null  object        
 2   transaction_type         14008 non-null  object        
 3   transaction_catg         14008 non-null  object        
 4   transaction_catg_desc    14008 non-null  object        
 5   transaction_today_amt    14008 non-null  object        
 6   transaction_mtd_amt      14008 non-null  object        
 7   transaction_fytd_amt     14008 non-null  object        
 8   table_nbr                14008 non-null  object        
 9   table_nm                 14008 non-null  object        

Unnamed: 0,record_date,account_type,transaction_type,transaction_catg,transaction_catg_desc,transaction_today_amt,transaction_mtd_amt,transaction_fytd_amt,table_nbr,table_nm,src_line_nbr,record_fiscal_year,record_fiscal_quarter,record_calendar_year,record_calendar_quarter,record_calendar_month,record_calendar_day
0,2024-12-31,Treasury General Account (TGA),Withdrawals,Corporation for Public Broadcasting,,0,0,535,II,Deposits and Withdrawals of Operating Cash,82,2025,1,2024,4,12,31
1,2024-12-31,Treasury General Account (TGA),Withdrawals,Dept of Agriculture (USDA) - misc,,77,2607,8797,II,Deposits and Withdrawals of Operating Cash,83,2025,1,2024,4,12,31
2,2024-12-31,Treasury General Account (TGA),Withdrawals,USDA - Child Nutrition,,110,3150,8471,II,Deposits and Withdrawals of Operating Cash,84,2025,1,2024,4,12,31
3,2024-12-31,Treasury General Account (TGA),Withdrawals,USDA - Commodity Credit Corporation,,70,2002,7304,II,Deposits and Withdrawals of Operating Cash,85,2025,1,2024,4,12,31
4,2024-12-31,Treasury General Account (TGA),Withdrawals,USDA - Federal Crop Insurance Corp Fund,,48,2123,9573,II,Deposits and Withdrawals of Operating Cash,86,2025,1,2024,4,12,31
5,2024-12-31,Treasury General Account (TGA),Withdrawals,USDA - Loan Payments,,22,1421,3694,II,Deposits and Withdrawals of Operating Cash,87,2025,1,2024,4,12,31
6,2024-12-31,Treasury General Account (TGA),Withdrawals,USDA - Other Farm Service,,1,76,481,II,Deposits and Withdrawals of Operating Cash,88,2025,1,2024,4,12,31
7,2024-12-31,Treasury General Account (TGA),Withdrawals,USDA - Supp Nutrition Assist Prog (SNAP),,194,9390,27759,II,Deposits and Withdrawals of Operating Cash,89,2025,1,2024,4,12,31
8,2024-12-31,Treasury General Account (TGA),Withdrawals,USDA - Supp Nutrition Assist Prog (WIC),,28,728,1968,II,Deposits and Withdrawals of Operating Cash,90,2025,1,2024,4,12,31
9,2024-12-31,Treasury General Account (TGA),Withdrawals,Dept of Commerce (DOC),,19,2027,5393,II,Deposits and Withdrawals of Operating Cash,91,2025,1,2024,4,12,31


In [43]:
import pandas as pd

# Assuming `historical_withdrawals` contains the historical data (FY2015–FY2024)
# and `latest_withdrawals` contains the latest data (FY2025 forward)

# Combine the two DataFrames
combined_withdrawals = pd.concat([withdrawals_api, latest_withdrawals], ignore_index=True)

# Drop duplicate rows if any (optional, based on your data)
combined_withdrawals = combined_withdrawals.drop_duplicates()

# Ensure the record_date column is in datetime format (if not already)
combined_withdrawals["record_date"] = pd.to_datetime(combined_withdrawals["record_date"])

# Sort the combined DataFrame by record_date (optional, for chronological order)
combined_withdrawals = combined_withdrawals.sort_values(by="record_date").reset_index(drop=True)

# Display the combined DataFrame info
combined_withdrawals.info()

# Print the range of dates in the data
if not combined_withdrawals.empty:
    max_date_latest = combined_withdrawals["record_date"].max()
    min_date_latest = combined_withdrawals["record_date"].min()
    print(f"Max date in latest data: {max_date_latest}")
    print(f"Min date in latest data: {min_date_latest}")

# Display the first few rows of the combined DataFrame
combined_withdrawals.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 160559 entries, 0 to 160558
Data columns (total 17 columns):
 #   Column                   Non-Null Count   Dtype         
---  ------                   --------------   -----         
 0   record_date              160559 non-null  datetime64[ns]
 1   account_type             160559 non-null  object        
 2   transaction_type         160559 non-null  object        
 3   transaction_catg         160559 non-null  object        
 4   transaction_catg_desc    160559 non-null  object        
 5   transaction_today_amt    160559 non-null  object        
 6   transaction_mtd_amt      160559 non-null  object        
 7   transaction_fytd_amt     160559 non-null  object        
 8   table_nbr                160559 non-null  object        
 9   table_nm                 160559 non-null  object        
 10  src_line_nbr             160559 non-null  object        
 11  record_fiscal_year       160559 non-null  object        
 12  record_fiscal_qu

Unnamed: 0,record_date,account_type,transaction_type,transaction_catg,transaction_catg_desc,transaction_today_amt,transaction_mtd_amt,transaction_fytd_amt,table_nbr,table_nm,src_line_nbr,record_fiscal_year,record_fiscal_quarter,record_calendar_year,record_calendar_quarter,record_calendar_month,record_calendar_day
0,2015-10-01,Federal Reserve Account,Withdrawals,Defense Vendor Payments (EFT),,1121,1121,1121,II,Deposits and Withdrawals of Operating Cash,35,2016,1,2015,4,10,1
1,2015-10-01,Federal Reserve Account,Withdrawals,Medicare Advantage - Part C&D Payments,,19342,19342,19342,II,Deposits and Withdrawals of Operating Cash,52,2016,1,2015,4,10,1
2,2015-10-01,Federal Reserve Account,Withdrawals,Medicare and Other CMS Payments,,1287,1287,1287,II,Deposits and Withdrawals of Operating Cash,51,2016,1,2015,4,10,1
3,2015-10-01,Federal Reserve Account,Withdrawals,Medicaid,,1033,1033,1033,II,Deposits and Withdrawals of Operating Cash,50,2016,1,2015,4,10,1
4,2015-10-01,Federal Reserve Account,Withdrawals,Labor Dept. prgms (excl. unemployment),,100,100,100,II,Deposits and Withdrawals of Operating Cash,49,2016,1,2015,4,10,1


In [48]:
# Filter the combined_withdrawals DataFrame for withdrawals and exclude specific categories
combined_withdrawals = combined_withdrawals[
    (combined_withdrawals["transaction_catg"] != "Public Debt Cash Redemp. (Table IIIB)") &
    (combined_withdrawals["transaction_catg"] != "Public Debt Cash Redemp. (Table III-B)")
]

In [49]:
# Group by fiscal year and quarter, then calculate unique dates and unique records
unique_counts = combined_withdrawals.groupby(["record_fiscal_year", "record_fiscal_quarter"]).agg(
    unique_dates=("record_date", "nunique"),  # Count unique dates
    unique_records=("record_date", "size")   # Count total records
)

# Convert the result to a DataFrame for better readability
unique_counts_df = unique_counts.reset_index()
unique_counts_df.columns = ["Fiscal Year", "Fiscal Quarter", "Unique Dates", "Unique Records"]

# Display the result
print(unique_counts_df)

   Fiscal Year Fiscal Quarter  Unique Dates  Unique Records
0         2016              1            62            2206
1         2016              2            62            2157
2         2016              3            64            2209
3         2016              4            64            2266
4         2017              1            61            2207
5         2017              2            62            2183
6         2017              3            64            2221
7         2017              4            63            2247
8         2018              1            62            2253
9         2018              2            62            2213
10        2018              3            64            2229
11        2018              4            63            2260
12        2019              1            62            2292
13        2019              2            61            2177
14        2019              3            64            2288
15        2019              4           

In [45]:
# Group by fiscal year and quarter, then calculate unique dates and unique records for the withdrawals DataFrame
unique_counts_withdrawals = withdrawals.groupby(["fiscal_year", "fiscal_quarter_number"]).agg(
    unique_dates=("record_date", "nunique"),  # Count unique dates
    unique_records=("record_date", "size")   # Count total records
)

# Convert the result to a DataFrame for better readability
unique_counts_withdrawals_df = unique_counts_withdrawals.reset_index()
unique_counts_withdrawals_df.columns = ["Fiscal Year", "Fiscal Quarter", "Unique Dates", "Unique Records"]

# Display the result
print(unique_counts_withdrawals_df)

    Fiscal Year  Fiscal Quarter  Unique Dates  Unique Records
0          2015               3            52            1779
1          2015               4            65            2295
2          2016               1            62            2206
3          2016               2            62            2157
4          2016               3            64            2209
5          2016               4            64            2266
6          2017               1            61            2207
7          2017               2            62            2183
8          2017               3            64            2221
9          2017               4            63            2247
10         2018               1            62            2253
11         2018               2            62            2213
12         2018               3            64            2229
13         2018               4            63            2260
14         2019               1            62            2292
15      