In [12]:
from langchain_core.tools import tool
import pandas as pd
import numpy as np
import json

In [15]:
@tool
def get_dataframe_info() -> str:
    """Get basic information about the DataFrame including shape, columns, and data types."""
    info = {
        "shape": df.shape,
        "columns": list(df.columns),
        "dtypes": {col: str(dtype) for col, dtype in df.dtypes.items()},
        "memory_usage": f"{df.memory_usage(deep=True).sum()} bytes"
    }
    return json.dumps(info, indent=2)

@tool
def get_column_stats(column_name: str) -> str:
    """Get statistical summary for a specific column."""
    if column_name not in df.columns:
        return f"Column '{column_name}' not found. Available columns: {list(df.columns)}"
    
    col = df[column_name]
    
    if col.dtype in ['int64', 'float64']:
        stats = {
            "count": col.count(),
            "mean": col.mean(),
            "std": col.std(),
            "min": col.min(),
            "25%": col.quantile(0.25),
            "50%": col.median(),
            "75%": col.quantile(0.75),
            "max": col.max()
        }
    else:
        stats = {
            "count": col.count(),
            "unique": col.nunique(),
            "top": col.mode().iloc[0] if not col.mode().empty else "N/A",
            "freq": col.value_counts().iloc[0] if len(col.value_counts()) > 0 else 0
        }
    
    return json.dumps(stats, indent=2)

@tool
def get_missing_values() -> str:
    """Check for missing values in the DataFrame."""
    missing = df.isnull().sum()
    missing_dict = {col: int(count) for col, count in missing.items()}
    total_missing = missing.sum()
    
    return json.dumps({
        "total_missing_values": int(total_missing),
        "missing_by_column": missing_dict,
        "percentage_missing": {col: round((count/len(df))*100, 2) for col, count in missing_dict.items()}
    }, indent=2)

In [16]:
tools = [get_dataframe_info, get_column_stats, get_missing_values]