In [1]:
!pip install -U bitsandbytes transformers accelerate gradio
!pip install PyMuPDF python-docx

Collecting bitsandbytes
  Downloading bitsandbytes-0.49.0-py3-none-win_amd64.whl.metadata (10 kB)
Downloading bitsandbytes-0.49.0-py3-none-win_amd64.whl (54.7 MB)
   ---------------------------------------- 0.0/54.7 MB ? eta -:--:--
   ---- ----------------------------------- 5.5/54.7 MB 27.9 MB/s eta 0:00:02
   ------- -------------------------------- 10.5/54.7 MB 25.2 MB/s eta 0:00:02
   --------- ------------------------------ 13.4/54.7 MB 22.1 MB/s eta 0:00:02
   ----------- ---------------------------- 16.0/54.7 MB 19.0 MB/s eta 0:00:03
   ------------- -------------------------- 18.6/54.7 MB 17.8 MB/s eta 0:00:03
   --------------- ------------------------ 21.8/54.7 MB 17.2 MB/s eta 0:00:02
   ------------------ --------------------- 25.4/54.7 MB 17.1 MB/s eta 0:00:02
   --------------------- ------------------ 29.6/54.7 MB 17.4 MB/s eta 0:00:02
   ------------------------ --------------- 33.6/54.7 MB 17.6 MB/s eta 0:00:02
   ------------------------- -------------- 35.1/54.7 MB 

In [2]:

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import gradio as gr
import fitz  # PyMuPDF for PDF
import docx  # For DOCX files

In [3]:

# Device setup
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Device set to: {device}")

# 4-bit quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4"
)

# Load model and tokenizer
model_id = "microsoft/phi-3-mini-4k-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto"
)

Device set to: cpu


tokenizer_config.json: 0.00B [00:00, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/599 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/181 [00:00<?, ?B/s]

In [4]:

def chunk_text(text, max_tokens=1000):
    sentences = text.split('. ')
    chunks, current_chunk = [], ""

    for sentence in sentences:
        if len(current_chunk) + len(sentence) <= max_tokens:
            current_chunk += sentence + ". "
        else:
            chunks.append(current_chunk.strip())
            current_chunk = sentence + ". "
    if current_chunk:
        chunks.append(current_chunk.strip())

    return chunks

In [5]:

def generate_response(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=150,
            temperature=0.7,
            top_k=50,
            top_p=0.95,
            do_sample=True,
            no_repeat_ngram_size=3
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

In [6]:

import re

def read_pdf(file_path):
    doc = fitz.open(file_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

def read_docx(file_path):
    doc = docx.Document(file_path)
    text = ""
    for para in doc.paragraphs:
        text += para.text + "\n"
    return text


# Fix common name typos
def fix_name_typos(text):
    replacements = {
        "Gisbund": "Gisburn",
        "Gissburn": "Gisburn",
        "Gisbrown": "Gisburn",
        "Grindley": "Grindle",
        "Grindly": "Grindle",
        "Garbuck": "Gisburn",
        "Giesurn": "Gisburn",
        "Gaiesurn": "Gisburn",
        "Rickmam": "Rickham",
        "Rickmham": "Rickham",
        "Strud": "Stroud",
        "Mrs. Studrd": "Mrs. Stroud",
        "Mrs. Pardiggler": "Mrs. Stroud",
        "Mr. Strud": "Mr. Stroud",
        "Mrs. Pardiggle": "Mrs. Stroud"
    }

    for wrong, correct in replacements.items():
        text = re.sub(rf"\b{wrong}\b", correct, text)
    return text

# Remove repeated sentences
def remove_repeated_sentences(text):
    sentences = text.split('. ')
    seen = set()
    cleaned = []
    for sentence in sentences:
        sentence_clean = sentence.strip()
        if sentence_clean and sentence_clean not in seen:
            cleaned.append(sentence_clean)
            seen.add(sentence_clean)
    return '. '.join(cleaned).strip()

# Ensure clean sentence endings
def clean_endings(text):
    if not text.endswith('.'):
        text += '.'
    return text.replace('\n', ' ').strip()

In [7]:

import traceback

def summarize_file(file):
    try:
        if file.name.endswith(".pdf"):
            text = read_pdf(file.name)
        elif file.name.endswith(".docx"):
            text = read_docx(file.name)
        elif file.name.endswith(".txt"):
            with open(file.name, "r", encoding="utf-8") as f:
                text = f.read()
        else:
            return "Unsupported file format. Upload .txt, .pdf, or .docx files only."

        chunks = chunk_text(text, max_tokens=1000)
        combined_summary = ""
        for chunk in chunks:
            prompt = f"Summarize the following text in simple terms:\n\n{chunk}\n\nSummary:"
            summary = generate_response(prompt)
            cleaned_summary = summary.split("Summary:")[-1].strip()
            combined_summary += f"{cleaned_summary} "

        # üîß Post-Processing Pipeline
        combined_summary = fix_name_typos(combined_summary)
        combined_summary = remove_repeated_sentences(combined_summary)
        combined_summary = clean_endings(combined_summary)

        return combined_summary

    except Exception as e:
        return f"Error occurred:\n{traceback.format_exc()}"


In [8]:
def draft_content(topic, tone):
    try:
        prompt = (
            f"Write a {tone.lower()} email about the following topic:\n\n{topic}\n\n"
            f"Only output the email content, including a professional closing and signature line."
        )

        response = generate_response(prompt).strip()

        # üîß Remove leaked prompt before "Subject:"
        if "Subject:" in response:
            response = response.split("Subject:", 1)[-1].strip()
            response = "Subject: " + response

        # üîß Auto-correct known typos
        response = response.replace("responsibilled", "responsibilities")
        response = response.replace("Prime Minster", "Prime Minister")
        response = response.replace("Moddi", "Modi")
        response = response.replace("India'", "India's")

        # üîß Stop if AI starts another task or appends instructions
        stop_phrases = [
            "Write a", "Instruction:", "Task:", "Next:", "Question:"
        ]
        for phrase in stop_phrases:
            if phrase in response:
                response = response.split(phrase)[0].strip()

        # üîß Remove unwanted signature fields
        for unwanted in ["[Title]", "[Company Name]"]:
            response = response.replace(unwanted, "").strip()

        # üîß Ensure clean sentence or proper email signature ending
        signature_phrases = ["Sincerely,", "Regards,", "Best regards,", "Thank you,", "Yours sincerely,", "Warm regards,"]
        has_signature = any(sig in response for sig in signature_phrases)

        if not has_signature:
            if not response.endswith(('.', '!', '?')):
                last_period = response.rfind('.')
                if last_period != -1:
                    response = response[:last_period+1]
                else:
                    response += "."

        else:
            # Remove trailing text after signature if it leaks
            lines = response.splitlines()
            for i, line in enumerate(lines):
                if any(sig in line for sig in signature_phrases):
                    response = "\n".join(lines[:i+2])  # Keep signature + name
                    break

        # üîß Remove leaked prompts or instructions AFTER the signature
        leaked_phrases = [
            "Compose an in-depth", "Please write", "Generate a",
            "Write an analysis", "Create a report", "Answer the following"
        ]
        for phrase in leaked_phrases:
            if phrase in response:
                response = response.split(phrase)[0].strip()

        return response

    except Exception as e:
        import traceback
        return f"Error occurred:\n{traceback.format_exc()}"

import re

def draft_content(topic, tone):
    try:
        # üö´ Detect general questions or unrelated prompts
        question_keywords = ["what", "why", "how", "when", "who", "which", "is", "are", "do", "does", "should"]
        if topic.strip().endswith("?") or re.match(r"^\s*(" + "|".join(question_keywords) + r")\b", topic.strip().lower()):
            return (
                "üìù This email drafting tool is designed for creating professional or friendly emails based on a specific topic.\n"
                "It looks like you've entered a general question. Please use a different tool or interface for general queries."
            )

        prompt = (
            f"Write a {tone.lower()} email about the following topic:\n\n{topic}\n\n"
            f"Only output the email content, including a professional closing and signature line."
        )

        response = generate_response(prompt).strip()

        # üîß Remove leaked prompt before "Subject:"
        if "Subject:" in response:
            response = response.split("Subject:", 1)[-1].strip()
            response = "Subject: " + response

        # üîß Auto-correct known typos
        response = response.replace("responsibilled", "responsibilities")

        # üîß Stop if AI starts another task
        for stop_phrase in ["Write a", "Instruction:", "Task:", "Next:", "Question:"]:
            if stop_phrase in response:
                response = response.split(stop_phrase)[0].strip()

        # üîß Remove unwanted signature fields
        for unwanted in ["[Title]", "[Company Name]"]:
            response = response.replace(unwanted, "").strip()

        # üîß Ensure clean sentence ending (if no signature exists)
        signature_phrases = ["Sincerely,", "Regards,", "Best regards,", "Thank you,", "Yours sincerely,"]
        has_signature = any(sig in response for sig in signature_phrases)

        if not has_signature:
            if not response.endswith(('.', '!', '?')):
                last_period = response.rfind('.')
                if last_period != -1:
                    response = response[:last_period+1]
                else:
                    response += "..."

        return response

    except Exception as e:
        import traceback
        return f"Error occurred:\n{traceback.format_exc()}"

"""üîπ AI Response Generation Function

This function creates structured AI-generated text with controlled length and coherence.

Key Features:

‚úÖ Processes input using tokenization.

‚úÖ Generates text with constraints to prevent randomness and repetition.

‚úÖ Ensures clean stopping using an end-of-sequence token.

‚úÖ Decodes output into readable text.
"""

def generate_response(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=350,  # Enough for full email + signature
            temperature=0.6,
            top_p=0.9,
            do_sample=False,
            no_repeat_ngram_size=3,
            eos_token_id=tokenizer.eos_token_id  # Stops cleanly
        )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

"""üåê Unified Gradio Interface ‚Äì VersaMind"""

# --- GRADIO INTERFACE ---
with gr.Blocks(title="üß† VersaMind ‚Äì Smart Summarizer & Email Drafter") as app:
    gr.Markdown("# üß† VersaMind ‚Äì Smart Summarizer & Email Drafter")
    gr.Markdown("Upload documents or enter a topic to quickly get AI-generated summaries or professional emails!")

    with gr.Tab("üìÑ Document Summarizer"):
        file_input = gr.File(label="üìÑ Upload .txt, .pdf, or .docx file")
        summary_output = gr.Textbox(label="üìù Summary", lines=10)
        summarize_btn = gr.Button("Summarize")
        summarize_btn.click(fn=summarize_file, inputs=file_input, outputs=summary_output)

    with gr.Tab("‚úâÔ∏è Email Drafter"):
        topic_input = gr.Textbox(label="üìù Enter Topic or Prompt", lines=2)
        tone_input = gr.Radio(["Formal", "Friendly", "Professional", "Casual"], label="Select Style/Tone")
        draft_output = gr.Textbox(label="üñãÔ∏è Drafted Email", lines=10)
        draft_btn = gr.Button("Draft Email")
        draft_btn.click(fn=draft_content, inputs=[topic_input, tone_input], outputs=draft_output)

app.launch()

* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.


