In [None]:
!pip install transformers accelerate torch torchvision pillow pyngrok
!pip install flask flask-ngrok
!pip install -U transformers
!pip install pyvips
!apt-get install -y libvips libvips-dev
!pip install pdf2image PyMuPDF
!apt install poppler-utils -y
!pip install flask-cors nltk scikit-learn

Collecting pyngrok
  Downloading pyngrok-7.2.4-py3-none-any.whl.metadata (8.7 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting n

In [None]:
# app.py (Colab Flask)
from flask import Flask, request, send_file, jsonify
from pyngrok import ngrok, conf
from PIL import Image
from pdf2image import convert_from_path
import torch
from transformers import AutoModelForCausalLM
from flask_cors import CORS
import threading
import nltk
import os
import base64

from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet as wn
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

nltk.download("punkt")
nltk.download("punkt_tab")
nltk.download("wordnet")

# Setup
conf.get_default().auth_token = "YOUR_NGROK_TOKEN"
app = Flask(__name__)
CORS(app)

model = AutoModelForCausalLM.from_pretrained(
    "vikhyatk/moondream2",
    revision="2025-01-09",
    trust_remote_code=True,
    device_map={"": "cuda"},
)

lemmatizer = WordNetLemmatizer()

@app.route("/")
def home():
    return jsonify({"message": "📝 Server is running"})

@app.route("/upload", methods=["POST"])
def upload_pdf():
    file = request.files.get("file")
    if not file:
        return "No file", 400

    path = "uploaded.pdf"
    file.save(path)

    try:
        images = convert_from_path(path)
    except Exception as e:
        return f"PDF error: {str(e)}", 500

    text = ""
    for idx, img in enumerate(images):
        try:
            enc = model.encode_image(img)
            extracted = model.answer_question(enc, "Extract all the handwritten text from the image.")
            text += extracted + "\n"
        except Exception as e:
            print(f"Page {idx} error: {str(e)}")

    output = "output.txt"
    with open(output, "w") as f:
        f.write(text.strip())

    return send_file(output, as_attachment=True, download_name="handwritten_extracted.txt")

@app.route("/check-plagiarism", methods=["POST"])
def check_plagiarism():
    try:
        data = request.get_json()
        files = data.get("files", [])
        if not files:
            return jsonify({"error": "No data"}), 400

        emails = [f.get("email") for f in files]
        texts = [f.get("text") for f in files]

        def preprocess(text):
            tokens = word_tokenize(text)
            lemmatized = [lemmatizer.lemmatize(w.lower()) for w in tokens]
            syns = []
            for w in lemmatized:
                syn = wn.synsets(w)
                syns.append(syn[0].lemmas()[0].name() if syn else w)
            return " ".join(syns)

        processed = [preprocess(t) for t in texts]
        tfidf = TfidfVectorizer().fit_transform(processed).toarray()

        results = []
        for i in range(len(tfidf)):
            for j in range(i + 1, len(tfidf)):
                score = cosine_similarity([tfidf[i]], [tfidf[j]])[0][0]
                classification = (
                    "Complete Plagiarism" if score >= 0.95 else
                    "Direct Plagiarism" if score >= 0.70 else
                    "Paraphrased" if score >= 0.50 else
                    "No Plagiarism"
                )
                results.append({
                    "studentA": emails[i],
                    "studentB": emails[j],
                    "similarityScore": round(score, 4),
                    "classification": classification,
                })

        return jsonify({"results": results})

    except Exception as e:
        return jsonify({"error": str(e)}), 500

# New Route to handle Base64 PDF upload and extract handwritten text
@app.route("/upload-base64", methods=["POST"])
def upload_pdf_base64():
    try:
        # Get the Base64 string from the request
        data = request.get_json()
        base64_pdf = data.get("file")

        if not base64_pdf:
            return jsonify({"error": "No file provided"}), 400

        # Decode the Base64 PDF
        pdf_data = base64.b64decode(base64_pdf)

        # Save the PDF file to disk
        pdf_path = "uploaded_base64.pdf"
        with open(pdf_path, "wb") as f:
            f.write(pdf_data)

        # Convert PDF to images
        images = convert_from_path(pdf_path)

        # Extract handwritten text from each page using Moondream2 model
        text = ""
        for idx, img in enumerate(images):
            try:
                enc = model.encode_image(img)
                extracted = model.answer_question(enc, "Extract all the handwritten text from the image.")
                text += extracted + "\n"
            except Exception as e:
                print(f"Page {idx} error: {str(e)}")

        # Save the extracted text into a file
        output = "output_base64.txt"
        with open(output, "w") as f:
            f.write(text.strip())

        return send_file(output, as_attachment=True, download_name="handwritten_extracted.txt")

    except Exception as e:
        return jsonify({"error": str(e)}), 500

# Launch with ngrok
def run_app():
    app.run(port=5000)

def start_ngrok():
    public_url = ngrok.connect(5000)
    print("⚡ Public URL:", public_url)

threading.Thread(target=start_ngrok).start()
run_app()


ModuleNotFoundError: No module named 'pyngrok'