In [1]:
from IPython.core.interactiveshell import InteractiveShell
import sys, os

InteractiveShell.ast_node_interactivity = "all"
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..","..")))

In [2]:
import pandas as pd
from paths import DATA_PATH

In [None]:
from datetime import datetime

def loadData(start_date, end_date):
    files = os.listdir(DATA_PATH)
    files_no_ext = [os.path.splitext(f)[0] for f in files]
    start = datetime.strptime(start_date, "%Y-%m-%d")
    end = datetime.strptime(end_date, "%Y-%m-%d")
    filtered_files = []
    for f in files_no_ext:
        try:
            file_date = datetime.strptime(f, "%Y-%m-%d")
            if start <= file_date <= end:
                filtered_files.append(f)
        except ValueError:
            continue 
    df_list = []
    for file in filtered_files:
        temp_df = pd.read_csv(os.path.join(DATA_PATH,(file) + ".csv"))
        df_list.append(temp_df)
    if df_list:
        df = pd.concat(df_list, ignore_index=True)
    else:
        df = pd.DataFrame()
    if (df.empty):
        return pd.DataFrame()
    
    return df

In [4]:
final_df = loadData("2025-05-14", "2025-05-17")

In [None]:
final_df

In [6]:
from typing import Callable
from langchain.tools import BaseTool
from pydantic import PrivateAttr

class DataLoadingTool(BaseTool):
    name: str = "data_loader"
    description: str = (
        "Loads data."
        "Given a start and end date, this tool loads data from CSV files "
        "into a pandas DataFrame for further analysis and processing."
        "the start and end dates must follow the format YYYY-MM-DD."
        "they must be concatenated into one param ('start_date, end_date')."
    )

    # Use PrivateAttr to store a callback function without validation
    _set_df_callback: Callable[[pd.DataFrame], None] = PrivateAttr()

    def __init__(self, set_df_callback: Callable[[pd.DataFrame], None], **kwargs):
        super().__init__(**kwargs)
        self._set_df_callback = set_df_callback

    def _run(self, query) -> str:
        try:
            start_date, end_date = query.split(",")
            start_date = start_date.strip()
            end_date = end_date.strip()
            df = loadData(start_date,end_date)  # Load the data
            self._set_df_callback(df)  # Call the callback to set the DataFrame
            return "Data loaded successfully"
        except Exception as e:
            return f"Failed to load data: {e}"

    async def _arun(self) -> str:
        return self._run()

In [6]:
class TestDataLoadingTool:
    def __init__(self):
        self.df =None

    def set_df(self, df: pd.DataFrame):
        self.df = df

    def get_dataframe(self):
        return self.df

In [7]:
test= TestDataLoadingTool()

In [8]:
data_loading_tool = DataLoadingTool(set_df_callback=test.set_df)

In [9]:
data_loading_tool("2025-05-14, 2025-05-14")

  data_loading_tool("2025-05-14, 2025-05-14")


'Data loaded successfully'

In [5]:
test.get_dataframe()["OrderDate"].min(), test.get_dataframe()["OrderDate"].max()

NameError: name 'test' is not defined

In [None]:
test.get_dataframe()