In [22]:
import pandas as pd
import sys
import os
import requests
from datetime import datetime
from dataclasses import dataclass, field

parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
sys.path.append(parent_dir)

from functions.display import display

In [23]:
year = '2023-01-01'
params = {
    'page[size]': '5000',
    'page[number]': '1',
    'filter': f'record_date:gte:{year}'
}
endpoint = '/v1/accounting/dts/deposits_withdrawals_operating_cash'
base_url = 'https://api.fiscaldata.treasury.gov/services/api/fiscal_service'
url = base_url + endpoint
all_data = []

In [24]:
@dataclass
class Data:
    url: str
    rows: int = 0
    df: pd.DataFrame = None
    last_accessed: datetime = None
    all_data: list = field(default_factory=list)
    params: dict = field(default_factory=dict)
    columns: dict = field(default_factory=dict)
    
    def __post_init__(self):
        self.sub_menu = {
            "Back": 'back',
            "Search": self.search,
            "Parameters": {
                "Back": 'back',
                "List parameters": self.list_parameters,
                "Edit parameters": self.edit_parameters
            },
            "Data": {
                "Back": 'back',
                "View data": self.view_data,
                "View data size": self.view_data_size,
                "List columns": self.list_columns,
            }
        }
    
    def search(self) -> pd.DataFrame:
        """
        Fetches all data from the API by handling pagination.
        """
        while True:
            # Make the request to the API
            response = requests.get(self.url, params=self.params)

            # Check if the request was successful
            if response.status_code != 200:
                break

            # Extract the data from the response
            data = response.json().get('data', [])
            
            # If there's no more data, stop fetching
            if not data:
                print("No more data returned. All data fetched.")
                break

            # Add the fetched data to the list
            self.all_data.extend(data)
            print(f"Fetched {len(data)} entries from page {self.params['page[number]']}. Total entries so far: {len(self.all_data)}")

            # Increment the page number for the next request
            self.params['page[number]'] = str(int(self.params['page[number]']) + 1)

        # After fetching all pages, convert the data to a DataFrame
        if self.all_data:
            self.df = pd.DataFrame(self.all_data)
            self.last_accessed = datetime.now()
            for column in self.df.columns:
                self.columns[column.replace('_', ' ').capitalize()] = column
            self.rows = len(self.df)
            print(f"Total rows fetched: {self.df.shape[0]}")
        else:
            print("No data found.")

        return
    
    def list_parameters(self):
        for item in self.params:
            print(item)
    
    def edit_parameters(self, params):
        choice = display(params)
        self.params = params
    
    def list_columns(self):
        for column in self.columns:
            print(column)
    
    def pick_columns(self):
        return display(self.columns)
    
    def view_data_size(self):
        print(f'Total entries in dataset: {self.rows}')
    
    def view_data(self):
        print(self.df.describe())

In [None]:
data = Data(url=url, params=params)
display(data.sub_menu)

In [26]:
# df['transaction_today_amt'] = pd.to_numeric(df['transaction_today_amt'])
# df_sorted = df['transaction_today_amt'].sort_values(ascending=False)
# print(df_sorted)