In [1]:
!pip install sentence-transformers PyMuPDF ipywidgets --quiet


In [6]:
# üìö Imports
import fitz  # PyMuPDF
from sentence_transformers import SentenceTransformer, util
import ipywidgets as widgets
from IPython.display import display, HTML
import torch
import time
import re
from nltk.corpus import stopwords
import nltk

nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# üîå Load the SentenceTransformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# üìÑ Extract text from PDF
def extract_text_from_pdf(file_content):
    doc = fitz.open(stream=file_content, filetype="pdf")
    text = ""
    for page in doc:
        text += page.get_text()
    return text

# üßπ Clean text (remove stopwords & special characters)
def clean_text(text):
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # remove special characters
    text = text.lower()
    words = text.split()
    filtered_words = [word for word in words if word not in stop_words]
    return ' '.join(filtered_words)

# ‚úÇÔ∏è Split into chunks
def chunk_text(text, chunk_size=100):
    words = text.split()
    return [' '.join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

# üß† Plagiarism Calculation
def calculate_chunked_plagiarism(study_texts, student_text):
    all_chunks = []
    for study_text in study_texts:
        cleaned_text = clean_text(study_text)
        all_chunks.extend(chunk_text(cleaned_text))

    chunk_embeddings = model.encode(all_chunks, convert_to_tensor=True)
    student_embedding = model.encode(clean_text(student_text), convert_to_tensor=True)
    similarities = util.pytorch_cos_sim(student_embedding, chunk_embeddings)[0]
    max_similarity = float(similarities.max().item())
    return round(max_similarity * 100, 2), all_chunks, similarities

# ===========================
# üì§ Faculty Upload - Multiple PDFs
# ===========================
pdf_uploader_dbms = widgets.FileUpload(accept='.pdf', multiple=True)
pdf_uploader_coa = widgets.FileUpload(accept='.pdf', multiple=True)
display(widgets.HTML("<h3>üì§ Upload Notes for DBMS (Multiple files allowed)</h3>"))
display(pdf_uploader_dbms)
display(widgets.HTML("<h3>üì§ Upload Notes for COA (Multiple files allowed)</h3>"))
display(pdf_uploader_coa)

# ===========================
# üßë‚Äçüéì Student Inputs
# ===========================
student_name = widgets.Text(description="Name:")
student_text = widgets.Textarea(
    placeholder="Write your assignment answer here...",
    layout=widgets.Layout(width='100%', height='150px')
)
student_pdf_upload = widgets.FileUpload(accept='.pdf', multiple=False)

subject_dropdown = widgets.Dropdown(
    options=['Select Subject', 'DBMS', 'Computer Organization'],
    value='Select Subject',
    description='Subject:',
    style={'description_width': 'initial'}
)

display(widgets.HTML("<h3>üßë‚Äçüéì Student Submission</h3>"))
display(student_name, student_text)
display(widgets.HTML("<h4>üìé Or upload assignment as PDF</h4>"))
display(student_pdf_upload)
display(subject_dropdown)

# ===========================
# üñ±Ô∏è Button + Output Area
# ===========================
check_button = widgets.Button(description="Check Plagiarism", button_style='success')
output = widgets.Output()
display(check_button, output)

# ===========================
# ‚ñ∂Ô∏è On Button Click
# ===========================
def on_button_click(b):
    with output:
        output.clear_output()

        if not student_name.value.strip():
            print("‚ùå Please enter your name.")
            return

        subject = subject_dropdown.value
        if subject == 'Select Subject':
            print("‚ùå Please select a subject.")
            return

        # üì• Fetch faculty files
        if subject == 'DBMS':
            uploaded_files = list(pdf_uploader_dbms.value)
        elif subject == 'Computer Organization':
            uploaded_files = list(pdf_uploader_coa.value)
        else:
            uploaded_files = []

        if not uploaded_files:
            print(f"‚ùå Please upload at least one PDF for {subject} notes.")
            return

        # üßæ Fetch student input
        if student_pdf_upload.value:
            student_pdf = list(student_pdf_upload.value.values())[0]
            student_text_raw = extract_text_from_pdf(student_pdf['content'])
        elif student_text.value.strip():
            student_text_raw = student_text.value
        else:
            print("‚ùå Please either write your assignment or upload a PDF.")
            return

        # Spinner
        display(HTML("""
        <style>
        .loader {
            border: 6px solid #f3f3f3;
            border-top: 6px solid #3498db;
            border-radius: 50%;
            width: 40px;
            height: 40px;
            animation: spin 1s linear infinite;
            margin: auto;
        }
        @keyframes spin {
            0% { transform: rotate(0deg); }
            100% { transform: rotate(360deg); }
        }
        </style>
        <div style="text-align:center; margin-top:20px;">
            <div class="loader"></div>
            <p style="color:#3498db; font-size:18px;">Analyzing Plagiarism...</p>
        </div>
        """))
        time.sleep(2)

        # üìö Extract faculty notes
        study_texts = [extract_text_from_pdf(f['content']) for f in uploaded_files]

        # üîç Check plagiarism
        plagiarism_percent, chunks, similarities = calculate_chunked_plagiarism(study_texts, student_text_raw)
        unique_percent = 100 - plagiarism_percent

        # üéì Grade
        if plagiarism_percent <= 10:
            grade, remark, emoji = "A+", "Excellent! Original work.", "üåü"
        elif plagiarism_percent <= 30:
            grade, remark, emoji = "B", "Good effort, but some copied content found.", "üëç"
        elif plagiarism_percent <= 50:
            grade, remark, emoji = "C", "Fair attempt, moderate plagiarism.", "ü§î"
        elif plagiarism_percent <= 70:
            grade, remark, emoji = "D", "Needs improvement. High plagiarism detected.", "‚ö†Ô∏è"
        else:
            grade, remark, emoji = "F", "Unacceptable. Mostly plagiarized.", "‚ùå"

        result = "PASS" if plagiarism_percent <= 30 else "FAIL"
        marks = max(0, 100 - plagiarism_percent)

        # Highlight matches
        threshold = 0.65
        matched_chunks = [chunks[i] for i, sim in enumerate(similarities) if sim >= threshold]
        highlighted_matches = "<br><br>".join([f"<span style='background:yellow;'>{match}</span>" for match in matched_chunks]) or "No strong matches found."

        output.clear_output()
        display(HTML(f"""
        <style>
        .card {{
            padding:25px; 
            border-radius:20px; 
            text-align:center; 
            flex:1; 
            color:white; 
            animation: fadeIn 0.6s ease; 
            backdrop-filter: blur(10px);
            transition: transform 0.3s ease-in-out, box-shadow 0.3s ease-in-out;
        }}
        .plagiarism {{
            background: rgba(255, 0, 0, 0.6);
        }}
        .unique {{
            background: rgba(76, 175, 80, 0.6);
        }}
        .percent {{
            font-size: 40px;
            font-weight:bold;
            margin-top: 10px;
        }}
        .label {{
            font-size:22px; 
            margin-bottom:15px;
            font-weight:500;
        }}
        .card:hover {{
            transform: scale(1.05);
            box-shadow: 0 0 20px rgba(255, 255, 255, 0.6);
        }}
        .plagiarism:hover {{
            background: rgba(255, 0, 0, 1);
            box-shadow: 0 0 25px rgba(255, 0, 0, 1);
        }}
        .unique:hover {{
            background: rgba(76, 175, 80, 1);
            box-shadow: 0 0 25px rgba(76, 175, 80, 1);
        }}
        </style>

        <div style="display:flex; gap:30px; margin-top:20px;">
            <div class="card plagiarism">
                <div class="label">Plagiarism</div>
                <div class="percent">{plagiarism_percent}%</div>
            </div>
            <div class="card unique">
                <div class="label">Unique</div>
                <div class="percent">{unique_percent}%</div>
            </div>
        </div>

        <div style="margin-top:20px; font-size:18px;">
            <strong>Student:</strong> {student_name.value} <br>
            <strong>Subject:</strong> {subject} <br>
            <strong>Grade:</strong> {grade} {emoji} <br>
            <strong>Remark:</strong> {remark} <br>
            <strong>Result:</strong> {result} <br>
            <strong>Marks (out of 100):</strong> {marks}
        </div>

        <div style="margin-top:25px;">
            <h4>üîç <u>Matched Content:</u></h4>
            <div style="background:#f9f9f9; padding:15px; border-radius:12px; border:1px solid #ccc; font-size:15px;">
                {highlighted_matches}
            </div>
        </div>
        """))

# üîó Connect button
check_button.on_click(on_button_click)


[nltk_data] Downloading package stopwords to C:\Users\ANKAN
[nltk_data]     ROY\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


HTML(value='<h3>üì§ Upload Notes for DBMS (Multiple files allowed)</h3>')

FileUpload(value=(), accept='.pdf', description='Upload', multiple=True)

HTML(value='<h3>üì§ Upload Notes for COA (Multiple files allowed)</h3>')

FileUpload(value=(), accept='.pdf', description='Upload', multiple=True)

HTML(value='<h3>üßë\u200düéì Student Submission</h3>')

Text(value='', description='Name:')

Textarea(value='', layout=Layout(height='150px', width='100%'), placeholder='Write your assignment answer here‚Ä¶

HTML(value='<h4>üìé Or upload assignment as PDF</h4>')

FileUpload(value=(), accept='.pdf', description='Upload')

Dropdown(description='Subject:', options=('Select Subject', 'DBMS', 'Computer Organization'), style=Descriptio‚Ä¶

Button(button_style='success', description='Check Plagiarism', style=ButtonStyle())

Output()