From 1b2fdddf3197ef29ad63ac178978bafd526ccc10 Mon Sep 17 00:00:00 2001 From: jiankaiii Date: Fri, 26 Sep 2025 14:07:23 +0800 Subject: [PATCH] feat: enable content generation without source for Summary (#349) --- README.md | 68 +- backend/main.py | 126 +- backend/requirements.txt | 2 +- ecosystem.config.cjs | 2 +- frontend/next.config.mjs | 39 +- frontend/package.json | 3 +- .../(app)/workspace/courses/create/page.tsx | 6 +- .../workspace/courses/edit/[id]/page.tsx | 6 +- .../src/app/(app)/workspace/summary/page.tsx | 39 +- .../src/app/api/programmes/download/route.ts | 81 +- frontend/src/app/api/summary/route.ts | 325 ++--- frontend/src/components/sources-list.tsx | 8 +- install_win.bat | 74 ++ run_win.bat | 98 ++ scripts/path-resolver.mjs | 4 +- scripts/utils.mjs | 1107 ++++++++++++++--- scripts/win/install.ps1 | 576 +++++++++ scripts/win/run.ps1 | 71 ++ scripts/win/setup.ps1 | 146 +++ scripts/win/stop.ps1 | 44 + scripts/win/uninstall.ps1 | 318 +++++ setup_win.bat | 37 + stop_win.bat | 161 +++ uninstall_win.bat | 42 + 24 files changed, 2973 insertions(+), 410 deletions(-) create mode 100644 install_win.bat create mode 100644 run_win.bat create mode 100644 scripts/win/install.ps1 create mode 100644 scripts/win/run.ps1 create mode 100644 scripts/win/setup.ps1 create mode 100644 scripts/win/stop.ps1 create mode 100644 scripts/win/uninstall.ps1 create mode 100644 setup_win.bat create mode 100644 stop_win.bat create mode 100644 uninstall_win.bat diff --git a/README.md b/README.md index 4e43d48..e35b98f 100644 --- a/README.md +++ b/README.md @@ -16,9 +16,10 @@ Powered by Intel CPUs and GPUs, the tool ensures accurate, contextually relevant - **Operating System**: - | Type | Version | - |-------------|---------------------------| - | Linux | Ubuntu 24.04 Desktop LTS | + | Type | Version | + | ------- | ------------------------ | + | Linux | Ubuntu 24.04 Desktop LTS | + | Windows | Windows 11 | ### Hardware Requirements @@ -45,13 +46,13 @@ Powered by Intel CPUs and GPUs, the tool ensures accurate, contextually relevant > **Important Notice:** This software is currently in pre-production status, designed to run locally on a single system only. For more stable version, please refer to our latest tagged pre-release. -## Quick Start +## Quick Start Linux > **Pre-requisite:** Follow the [Edge Developer Kit Reference Scripts](https://github.com/intel/edge-developer-kit-reference-scripts) to install the necessary drivers and compute-related packages for Intel® GPUs 1. **Setup** - Install system dependencies: - > **Note:** Setup requires administrator privileges as it installs system-level dependencies. + > **Note**: Setup requires administrator privileges as it installs system-level dependencies. ```bash sudo ./setup.sh @@ -81,5 +82,62 @@ Powered by Intel CPUs and GPUs, the tool ensures accurate, contextually relevant ./uninstall.sh ``` +## Quick Start Windows + +1. System-level setup (Adminstrator required) + + ```powershell + # Double click on setup_win.bat and select "Yes" + .\setup_win.bat + ``` + This script will perform or install the following if not present and may take a while to complete: + - Winget + - Python 3.12 (if not not installed or lower version) + - Enable PowerShell script execution if needed + +2. Install application and its dependencies + + ```powershell + # Double click on install_win.bat + .\install_win.bat + ``` + This will automatically proceed to installation of application (without administrator privilege) which does the following: + - Download and install Node.js locally (22.16.0) + - Download and install jq locally + - Install npm dependencies + - Set up Python virtual environment + - Download and configure Ollama + - Create environment configuration files + +3. Start the application + ```powershell + # Double-click to run + .\run_win.bat + ``` + Running this command will automatically open a web-browser with `http://localhost:8080` + +4. Stop the application + ```powershell + # Double click to stop all services + .\stop_win.bat + ``` + >**IMPORTANT**: Please make sure to close all command or terminal prompts that are open after running `stop_win.bat` + +5. Uninstall the application + ```powershell + # Double click to run uninstall script + .\uninstall_win.bat + ``` + +## Limitations + +On Windows, when running `run_win.bat`, PM2 launches several command prompt windows during operation. These windows can be minimized, but they will remain open. To stop the application, run `stop_win.bat` and manually close the command prompt windows to properly shut down all services. + +## Troubleshooting + +1. Unable to unzip file from Github for Windows + + If you have trouble unzipping the downloaded zip file from GitHub on Windows, try extracting it to a folder with a shorter name or path. This issue is caused by Windows' maximum file path length limitation. + ## Disclaimer Intel is committed to respecting human rights and avoiding causing or contributing to adverse impacts on human rights. See [Intel’s Global Human Rights Principles](https://www.intel.com/content/dam/www/central-libraries/us/en/documents/policy-human-rights.pdf). Intel’s products and software are intended only to be used in applications that do not cause or contribute to adverse impacts on human rights. Users should comply with all requirements to notify relevant parties that AI was used in the production of materials, as mandated by their employers or professional standards. diff --git a/backend/main.py b/backend/main.py index 4691396..acf66da 100644 --- a/backend/main.py +++ b/backend/main.py @@ -11,7 +11,7 @@ from generate_caption import generate_dynamic_caption from generate_image_embedding import generate_image_embedding from fastapi.responses import FileResponse, JSONResponse -from generate_pptx import create_pptx +from generate_pptx import create_pptx from starlette.background import BackgroundTask import tempfile import imagehash @@ -25,61 +25,66 @@ OUTPUT_DIR = BASE_DIR / "images" OUTPUT_DIR.mkdir(parents=True, exist_ok=True) + @app.post("/parse") async def parse_pdf(file: UploadFile = File(...)): """ Endpoint to parse a PDF file uploaded via multipart/form-data. Extracts images, generates captions and embeddings, and returns the data. """ + temp_file_path = None try: - with tempfile.NamedTemporaryFile(delete=True, suffix=".pdf") as temp_file: + # Create temp file with delete=False to avoid Windows file locking issues + with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: temp_file.write(await file.read()) temp_file_path = temp_file.name - print(f"DEBUG : Temporary PDF file created at: {temp_file_path}") - # Open the PDF file using PyMuPDF - pdf_file = fitz.open(str(temp_file_path)) - image_data = [] - image_order = 1 - seen_hashes = set() - extracted_text = [] - - for page_index in range(len(pdf_file)): - page = pdf_file.load_page(page_index) - extracted_text.append(page.get_text()) - - # Extract images from the page - image_list = page.get_images(full=True) - for image_index, img in enumerate(image_list, start=1): - xref = img[0] - base_image = pdf_file.extract_image(xref) - image_bytes = base_image["image"] - image_ext = base_image["ext"] - - # Compute perceptual hash - pil_img = Image.open(io.BytesIO(image_bytes)) - phash = str(imagehash.phash(pil_img)) - if phash in seen_hashes: - print(f"DEBUG: Skipping duplicate/similar image (hash: {phash})") - continue - seen_hashes.add(phash) - - image_name = f"image{page_index+1}_{image_index}.{image_ext}" - - # Generate caption and embedding for the image - try: - caption = generate_dynamic_caption(image_bytes) - if caption is not None: - embedding = generate_image_embedding(image_bytes) - image_data.append({ + print(f"DEBUG : Temporary PDF file created at: {temp_file_path}") + # Open the PDF file using PyMuPDF (now works on Windows since file is closed) + pdf_file = fitz.open(str(temp_file_path)) + image_data = [] + image_order = 1 + seen_hashes = set() + extracted_text = [] + + for page_index in range(len(pdf_file)): + page = pdf_file.load_page(page_index) + extracted_text.append(page.get_text()) + + # Extract images from the page + image_list = page.get_images(full=True) + for image_index, img in enumerate(image_list, start=1): + xref = img[0] + base_image = pdf_file.extract_image(xref) + image_bytes = base_image["image"] + image_ext = base_image["ext"] + + # Compute perceptual hash + pil_img = Image.open(io.BytesIO(image_bytes)) + phash = str(imagehash.phash(pil_img)) + if phash in seen_hashes: + print(f"DEBUG: Skipping duplicate/similar image (hash: {phash})") + continue + seen_hashes.add(phash) + + image_name = f"image{page_index+1}_{image_index}.{image_ext}" + + # Generate caption and embedding for the image + try: + caption = generate_dynamic_caption(image_bytes) + if caption is not None: + embedding = generate_image_embedding(image_bytes) + image_data.append( + { "filename": image_name, "embedding": embedding, "order": image_order, - "image_bytes": image_bytes.hex() - }) - image_order += 1 - except Exception as e: - print(f"Error processing image {image_name}: {e}") + "image_bytes": image_bytes.hex(), + } + ) + image_order += 1 + except Exception as e: + print(f"Error processing image {image_name}: {e}") # Prepare the response data response_data = { @@ -93,11 +98,25 @@ async def parse_pdf(file: UploadFile = File(...)): except Exception as e: print(f"Error processing PDF: {e}") - raise HTTPException(status_code=500, detail=f"An error occurred while processing the PDF: {e}") + raise HTTPException( + status_code=500, detail=f"An error occurred while processing the PDF: {e}" + ) + finally: + # Clean up temporary file on Windows + if temp_file_path and os.path.exists(temp_file_path): + try: + os.unlink(temp_file_path) + print(f"DEBUG: Cleaned up temporary file: {temp_file_path}") + except Exception as cleanup_error: + print( + f"Warning: Failed to clean up temporary file {temp_file_path}: {cleanup_error}" + ) + class PPTXRequest(BaseModel): content: dict + def validate_and_transform_content(content: dict) -> dict: """ Validate and transform the incoming content to match the expected format @@ -160,6 +179,7 @@ def validate_and_transform_content(content: dict) -> dict: return transformed_content + @app.post("/generate-pptx") async def generate_pptx(request: PPTXRequest): """Endpoint to generate a PowerPoint presentation.""" @@ -170,7 +190,9 @@ async def generate_pptx(request: PPTXRequest): transformed_content = validate_and_transform_content(request.content) # Create a temporary file for the PPTX - with tempfile.NamedTemporaryFile(delete=False, suffix=".pptx", dir=dir_slide) as temp_pptx_file: + with tempfile.NamedTemporaryFile( + delete=False, suffix=".pptx", dir=dir_slide + ) as temp_pptx_file: temp_pptx_path = temp_pptx_file.name print(temp_pptx_path) @@ -195,20 +217,24 @@ async def cleanup_temp_file(): path=temp_pptx_path, media_type="application/vnd.openxmlformats-officedocument.presentationml.presentation", filename="generated_presentation.pptx", - background=BackgroundTask(cleanup_temp_file) + background=BackgroundTask(cleanup_temp_file), ) except Exception as e: print(f"Error generating PPTX: {e}") - raise HTTPException(status_code=500, detail=f"An error occurred while generating the PPTX file: {e}") + raise HTTPException( + status_code=500, + detail=f"An error occurred while generating the PPTX file: {e}", + ) + if __name__ == "__main__": import uvicorn import os - + # Get host and port from environment variables with defaults host = os.environ.get("BACKEND_HOST", "127.0.0.1") port = int(os.environ.get("BACKEND_PORT", 8016)) - + print(f"Starting backend server on {host}:{port}") - uvicorn.run(app, host=host, port=port) \ No newline at end of file + uvicorn.run(app, host=host, port=port) diff --git a/backend/requirements.txt b/backend/requirements.txt index b1185c7..2d28232 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,7 +1,7 @@ python-pptx==1.0.2 fastapi==0.115.11 uvicorn==0.34.0 -PyMuPDF==1.22.3 +PyMuPDF==1.26.4 Pillow==11.1.0 python-dotenv==1.0.0 torch diff --git a/ecosystem.config.cjs b/ecosystem.config.cjs index 7fbd0be..081979f 100644 --- a/ecosystem.config.cjs +++ b/ecosystem.config.cjs @@ -96,7 +96,7 @@ module.exports = { env: { NODE_ENV: 'production', HOSTNAME: readEnvVariable('FRONTEND_HOST', '127.0.0.1'), - PORT: readEnvVariable('FRONTEND_PORT', 8080) + PORT: readEnvVariable('FRONTEND_PORT', 8080), }, watch: false, autorestart: true, diff --git a/frontend/next.config.mjs b/frontend/next.config.mjs index 2d0bf9b..cc9828d 100644 --- a/frontend/next.config.mjs +++ b/frontend/next.config.mjs @@ -1,17 +1,17 @@ -import { withPayload } from "@payloadcms/next/withPayload" -import { readFileSync } from "fs" -import path from "path" +import { withPayload } from '@payloadcms/next/withPayload' +import { readFileSync } from 'fs' +import path from 'path' -const isStandalone = process.env.STANDALONE_BUILD === "true" -const packageJsonPath = path.resolve(process.cwd(), "package.json") -const packageJson = JSON.parse(readFileSync(packageJsonPath, "utf-8")) -const persona = process.env.PERSONA || "faculty" -const personaView = process.env.PERSONA_VIEW || "default" +const isStandalone = process.env.STANDALONE_BUILD === 'true' +const packageJsonPath = path.resolve(process.cwd(), 'package.json') +const packageJson = JSON.parse(readFileSync(packageJsonPath, 'utf-8')) +const persona = process.env.PERSONA || 'faculty' +const personaView = process.env.PERSONA_VIEW || 'default' /** @type {import('next').NextConfig} */ const nextConfig = { distDir: isStandalone ? path.join(`next-${persona}`) : `.next`, - output: isStandalone ? "standalone" : undefined, + output: isStandalone ? 'standalone' : undefined, outputFileTracingRoot: isStandalone ? path.resolve(process.cwd()) // Use a separate folder for standalone output : process.cwd(), @@ -19,8 +19,25 @@ const nextConfig = { NEXT_PUBLIC_APP_NAME: packageJson.name, NEXT_PUBLIC_APP_VERSION: packageJson.version, NEXT_PUBLIC_PERSONA: persona, - NEXT_PUBLIC_PERSONA_VIEW: personaView - } + NEXT_PUBLIC_PERSONA_VIEW: personaView, + }, + // Windows-specific: Include libsql native dependencies in standalone build + ...(process.platform === 'win32' && + isStandalone && { + outputFileTracingIncludes: { + '*': ['./node_modules/@libsql/**/*'], + }, + webpack: (config, { isServer }) => { + if (isServer) { + // Windows-specific: Don't externalize libsql native dependencies + config.externals = config.externals || [] + config.externals.push({ + '@libsql/win32-x64-msvc': 'commonjs @libsql/win32-x64-msvc', + }) + } + return config + }, + }), } export default withPayload(nextConfig) diff --git a/frontend/package.json b/frontend/package.json index 86a790d..69ff8aa 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -8,7 +8,8 @@ "dev:lecturer": "cross-env NODE_OPTIONS=--no-deprecation PERSONA=lecturer next dev -p 8080", "dev:student": "cross-env NODE_OPTIONS=--no-deprecation PERSONA=student next dev -p 8080", "build": "cross-env NODE_OPTIONS=--no-deprecation next build", - "prebuild:faculty": "if [ ! -d node_modules ]; then npm install; fi && npm run build:lecturer && npm run build:student", + "prebuild:faculty": "npm run ensure-deps && npm run build:lecturer && npm run build:student", + "ensure-deps": "npm install", "build:faculty": "cross-env NODE_OPTIONS=--no-deprecation STANDALONE_BUILD=true next build", "build:lecturer": "cross-env NODE_OPTIONS=--no-deprecation STANDALONE_BUILD=true PERSONA=lecturer next build", "build:student": "cross-env NODE_OPTIONS=--no-deprecation STANDALONE_BUILD=true PERSONA=student next build", diff --git a/frontend/src/app/(app)/workspace/courses/create/page.tsx b/frontend/src/app/(app)/workspace/courses/create/page.tsx index 375ade2..2b9b55c 100644 --- a/frontend/src/app/(app)/workspace/courses/create/page.tsx +++ b/frontend/src/app/(app)/workspace/courses/create/page.tsx @@ -68,7 +68,9 @@ const courseFormSchema = z.object({ .refine((val) => /^\d{4}\.\d{1,2}\.\d{1,3}$/.test(val), { message: 'Version must be in YYYY.MM.MICRO format (e.g., 2025.01.0)', }), - description: z.string().optional(), + description: z.string().min(10, { + message: 'Course description must be at least 10 characters.', + }), model: z.object({ name: z.string(), modified_at: z.string(), @@ -640,7 +642,7 @@ export default function CreateCoursePage() { name="description" render={({ field }) => ( - Course Description (Optional) + Course Description