Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 53 additions & 1 deletion app/routers/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
ProcessRequestBody,
ProcessResponse,
)
from app.services.processing import parse_input, process_dataframe
from app.services.processing import parse_input, process_dataframe, generate_preview
from app.utils.file_utils import download_to_temp

router = APIRouter()
Expand Down Expand Up @@ -113,3 +113,55 @@ async def process_csv(
status_code=400,
content=ProcessResponse(data=None, stats=None, download_url=None, errors=[{"message": str(e)}]).dict(),
)

@router.post("/preview")
async def get_csv_preview(
# We accept the same input types as the main endpoint
file: Optional[UploadFile] = File(default=None, description="CSV/Excel file to upload"),
url: Optional[str] = Form(default=None, description="URL to CSV/Excel file"),
raw_csv: Optional[str] = Form(default=None, description="Raw CSV text"),
):
"""Generate a JSON preview of a file, URL, or raw CSV.

Returns the column names and the first 5 rows.
"""
try:
# Step 1: Handle URL input if provided
url_path: Optional[Path] = None
if url:
try:
url_path, _ = await download_to_temp(url)
except Exception as e:
raise HTTPException(status_code=400, detail=f"Failed to download URL: {e}")

# Step 2: Handle file upload input if provided
file_bytes: Optional[bytes] = None
filename: Optional[str] = None
if file is not None:
filename = file.filename
file_bytes = await file.read()

# Step 3: Reuse the existing parse_input function to create a DataFrame
# We use a default ParsingOptions since we don't need customization for a preview.
parsed = parse_input(
file_bytes=file_bytes,
filename=filename,
url_path=url_path,
raw_csv=raw_csv,
parsing=ParsingOptions(),
)

# Step 4: Call our new function to generate the preview from the DataFrame
preview_data = generate_preview(parsed.df)

return JSONResponse(status_code=200, content=preview_data)

except HTTPException:
# Re-raise known HTTP exceptions
raise
except Exception as e:
# Return a structured error for any other failures, matching the project's style
return JSONResponse(
status_code=400,
content={"errors": [{"message": str(e)}]},
)
17 changes: 17 additions & 0 deletions app/services/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,3 +337,20 @@ def parse_input(
return ParsedInput(df=df, source_name=filename)

raise ValueError("No input provided: expected file upload, URL, or raw_csv text")

def generate_preview(df: pd.DataFrame) -> dict:
"""Generate a preview from a DataFrame.

Args:
df: The input DataFrame.

Returns:
A dictionary containing column names and the first 5 rows.
"""
# Get the column names as a list
columns = df.columns.tolist()

# Get the first 5 rows and convert them to a list of dictionaries
rows = df.head(5).to_dict(orient="records")

return {"columns": columns, "rows": rows}