In [None]:
#%% [code]
# Import necessary modules
import os
import json
import threading
import pdf2image
import ollama
from flask import Flask, request, jsonify

In [None]:
# Create Flask app
app = Flask(__name__)

In [None]:
# Utility function to extract the base of a filename (without extension)
def findname(filename):
    extension = ""
    for char in filename[::-1]:
        if char == ".":
            break
        extension += char
    return filename[:-len(extension)] if extension else filename

In [None]:
# Convert the given PDF into images and save them in the 'temp' folder.
def createimages(pdf_path):
    try:
        images = pdf2image.convert_from_path(pdf_path, poppler_path="poppler-24.08.0/Library/bin")
        for idx, image in enumerate(images):
            image_path = os.path.join("temp", f"output_page_{idx + 1}.png")
            image.save(image_path, 'PNG')
        print(f"Images created successfully! Total pages: {len(images)}")
    except Exception as e:
        print("Error converting PDF to images:", e)
        raise

In [None]:
# Run inference on each image in the 'temp' folder via ollama and merge outputs.
def inferenceimages():
    merged_text = ""
    try:
        # Process all PNG files in sorted order
        images = sorted([f for f in os.listdir("temp") if f.endswith(".png")])
        if not images:
            raise Exception("No images found in the temp folder.")
        for image_file in images:
            image_path = os.path.join("temp", image_file)
            print(f"Processing image: {image_path}")
            
            # Call ollama.chat without the 'images' parameter
            res = ollama.chat(
                model="granite3.2-vision",
                messages=[
                    {
                        'role': 'system',
                        'content': (
                            f"Analyze the given image. This image is a part of multiset. Understand the image, extract the important content related points and explain them in concised bullet points."
                        ),
                        'images': [f'{image_path}']
                    }
                    
                ]
            )
            
            photo_description = res['message']['content']
            merged_text += "\n" + photo_description
            
        # Save merged text to an output file.
        os.makedirs("output", exist_ok=True)
        with open(os.path.join("output", "explanation.txt"), "w", encoding='utf-8') as fout:
            fout.write(merged_text)
        print("Image inference completed!")
    except Exception as e:
        print("Error during image inference:", e)
        raise
    return merged_text

In [None]:
# Generate quiz questions from input text;
# The prompt instructs the model to return valid JSON.
def textfiletoquiz(ques=10, input_text=""):
    print("Making quiz questions!")
    prompt = f"""You are an edututor substitute model, whose function is to create quizzes. You will receive a query. This query is an output from a Vision Model.
Clean the text to remove irrelevant data, and generate {ques} quiz questions from the given data.
You must not only generate a question and subsequent 4 options, but also one of the option must be true. You must also return 1 correct option from the given options, and explaination if possible. return NA if no explaination.
Return the output as valid JSON in the following format:
{{
   "questions": [
     {{
        "question": "Question text",
        "options": [provide 4 options here],
        "answer": provide the answer here
     }}
     // Repeat for each question
   ]
}}
Ensure the JSON is valid.
"""
    res = ollama.chat(
        model="granite3.2-vision",
        messages=[
            {'role': 'system', 'content': prompt},
            {'role': 'user', 'content': input_text}
        ]
    )
    quizdesc = res['message']['content']
    os.makedirs("quiz_out", exist_ok=True)
    with open(os.path.join("quiz_out", "latest_quiz.json"), "w", encoding='utf-8') as fout:
        fout.write(quizdesc)
    print("Quiz questions generated and saved.")
    return quizdesc

In [None]:
# Remove all files from the 'temp' folder.
def cleanup():
    try:
        for root, _, files in os.walk("temp"):
            for file in files:
                os.remove(os.path.join(root, file))
        print("Temp cleanup complete!")
    except Exception as e:
        print("Error during cleanup:", e)

In [None]:
# Ensure required directories exist.
for folder in ["content", "temp", "output", "quiz_out"]:
    os.makedirs(folder, exist_ok=True)

# Endpoint to process an uploaded PDF and generate quiz questions.
@app.route('/extract', methods=['POST'])
def extract():
    if 'file' not in request.files:
        return jsonify({"error": "No file provided"}), 400
    file = request.files['file']
    if file.filename == "":
        return jsonify({"error": "No file selected"}), 400
    if not file.filename.lower().endswith('.pdf'):
        return jsonify({"error": "Uploaded file is not a PDF"}), 400
    # Save the PDF to 'content'
    pdf_path = os.path.join("content", file.filename)
    file.save(pdf_path)
    print("PDF file saved to:", pdf_path)
    try:
        createimages(pdf_path)
        merged_text = inferenceimages()
        quiz_json = textfiletoquiz(ques=10, input_text=merged_text)
        cleanup()
    except Exception as e:
        return jsonify({"error": f"Processing error: {str(e)}"}), 500
    return jsonify({"message": "PDF processed successfully", "quiz": quiz_json}), 200

# Endpoint to return the latest quiz questions JSON.
@app.route('/extractQuestions', methods=['GET'])
def extract_questions():
    try:
        with open(os.path.join("quiz_out", "latest_quiz.json"), "r", encoding='utf-8') as fin:
            quiz_data = json.load(fin)
        return jsonify(quiz_data), 200
    except Exception as e:
        return jsonify({"error": f"Error reading quiz data: {str(e)}"}), 500

In [None]:
# Function to run the Flask app in a separate thread.
def run_app():
    app.run(host='0.0.0.0', port=5000, debug=True, use_reloader=False)

# Start the Flask server in a new thread.
thread = threading.Thread(target=run_app)
thread.start()

In [None]:
with open('output/output.txt', 'r', encoding="utf-8") as f:
    
    textfiletoquiz(12, input_text=f.read())