From b09b36b5e1bc06aca7f573c03078e9c4e9f1dded Mon Sep 17 00:00:00 2001 From: Adarsh Kumar Date: Sun, 19 Oct 2025 13:31:32 +0530 Subject: [PATCH] feat: Add /process/preview endpoint for CSV previews --- app/routers/process.py | 54 +++++++++++++++++++++++++++++++++++++- app/services/processing.py | 17 ++++++++++++ 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/app/routers/process.py b/app/routers/process.py index a1e2e16..7d0dd7b 100644 --- a/app/routers/process.py +++ b/app/routers/process.py @@ -22,7 +22,7 @@ ProcessRequestBody, ProcessResponse, ) -from app.services.processing import parse_input, process_dataframe +from app.services.processing import parse_input, process_dataframe, generate_preview from app.utils.file_utils import download_to_temp router = APIRouter() @@ -113,3 +113,55 @@ async def process_csv( status_code=400, content=ProcessResponse(data=None, stats=None, download_url=None, errors=[{"message": str(e)}]).dict(), ) + +@router.post("/preview") +async def get_csv_preview( + # We accept the same input types as the main endpoint + file: Optional[UploadFile] = File(default=None, description="CSV/Excel file to upload"), + url: Optional[str] = Form(default=None, description="URL to CSV/Excel file"), + raw_csv: Optional[str] = Form(default=None, description="Raw CSV text"), +): + """Generate a JSON preview of a file, URL, or raw CSV. + + Returns the column names and the first 5 rows. + """ + try: + # Step 1: Handle URL input if provided + url_path: Optional[Path] = None + if url: + try: + url_path, _ = await download_to_temp(url) + except Exception as e: + raise HTTPException(status_code=400, detail=f"Failed to download URL: {e}") + + # Step 2: Handle file upload input if provided + file_bytes: Optional[bytes] = None + filename: Optional[str] = None + if file is not None: + filename = file.filename + file_bytes = await file.read() + + # Step 3: Reuse the existing parse_input function to create a DataFrame + # We use a default ParsingOptions since we don't need customization for a preview. + parsed = parse_input( + file_bytes=file_bytes, + filename=filename, + url_path=url_path, + raw_csv=raw_csv, + parsing=ParsingOptions(), + ) + + # Step 4: Call our new function to generate the preview from the DataFrame + preview_data = generate_preview(parsed.df) + + return JSONResponse(status_code=200, content=preview_data) + + except HTTPException: + # Re-raise known HTTP exceptions + raise + except Exception as e: + # Return a structured error for any other failures, matching the project's style + return JSONResponse( + status_code=400, + content={"errors": [{"message": str(e)}]}, + ) \ No newline at end of file diff --git a/app/services/processing.py b/app/services/processing.py index 7c667c3..ac0ee57 100644 --- a/app/services/processing.py +++ b/app/services/processing.py @@ -337,3 +337,20 @@ def parse_input( return ParsedInput(df=df, source_name=filename) raise ValueError("No input provided: expected file upload, URL, or raw_csv text") + +def generate_preview(df: pd.DataFrame) -> dict: + """Generate a preview from a DataFrame. + + Args: + df: The input DataFrame. + + Returns: + A dictionary containing column names and the first 5 rows. + """ + # Get the column names as a list + columns = df.columns.tolist() + + # Get the first 5 rows and convert them to a list of dictionaries + rows = df.head(5).to_dict(orient="records") + + return {"columns": columns, "rows": rows} \ No newline at end of file