# üìÑ SOW Generator (Local LLM / GPU Version)

Generates a Statement of Work (SOW) PDF from Meeting Minutes using a **Local LLM** (Mistral-7B) running on Google Colab's GPU.

**Instructions:**
1.  **Runtime**: Ensure `Runtime > Change runtime type` is set to **T4 GPU** (or better).
2.  **Hugging Face Token**: Add your HF Token to Colab Secrets (`HF_TOKEN`) with `READ` permission, or enter it when prompted. You must accept the model license for `mistralai/Mistral-7B-Instruct-v0.3` on Hugging Face.
3.  **Template**: Upload `SOW-TEMPLATE.pdf` to the Files section.
4.  Run all cells.

In [None]:
# @title 1. Install Dependencies
# @markdown Installs `transformers`, `accelerate`, `bitsandbytes`, and PDF tools.
import subprocess
import sys

def install(package):
    subprocess.check_call([sys.executable, "-m", "pip", "install", package])

print("Installing dependencies (this may take 1-2 mins)...")
try:
    import torch
    import transformers
    import pypdf
    import xhtml2pdf
except ImportError:
    install("torch")
    install("transformers")
    install("accelerate")
    install("bitsandbytes")
    install("pypdf")
    install("xhtml2pdf")
    print("Dependencies installed!")

In [None]:
# @title 2. Load Local LLM (Mistral-7B)
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
import os

# CONFIGURATION
MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.3" # @param {type:"string"}

# Retrieve HF Token
try:
    from google.colab import userdata
    HF_TOKEN = userdata.get('HF_TOKEN')
except ImportError:
    import getpass
    print("Enter your Hugging Face Token:")
    HF_TOKEN = getpass.getpass()
except Exception:
    import getpass
    print("Could not retrieve key from userData. Enter your HF Token:")
    HF_TOKEN = getpass.getpass()

if not HF_TOKEN:
    raise ValueError("HF Token is required to download the model!")

print(f"[+] Loading {MODEL_ID}... (This will take a few minutes)")

# Quantization Config (4-bit) to fit in T4 GPU
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

try:
    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN)
    model = AutoModelForCausalLM.from_pretrained(
        MODEL_ID,
        quantization_config=bnb_config,
        device_map="auto",
        token=HF_TOKEN
    )
    
    # Create Pipeline
    pipe = pipeline(
        "text-generation",
        model=model,
        tokenizer=tokenizer,
        max_new_tokens=2500,
        pad_token_id=tokenizer.eos_token_id
    )
    print("‚úÖ Model loaded successfully on GPU!")
except Exception as e:
    print(f"‚ùå Error loading model: {e}")
    print("Make sure you have accepted the model license on Hugging Face.")


In [None]:
# @title 3. Core Functions (Adapted for Local LLM)
from io import BytesIO
from pypdf import PdfReader
from xhtml2pdf import pisa

def extract_text_from_pdf(pdf_path):
    try:
        reader = PdfReader(pdf_path)
        text = ""
        for page in reader.pages:
            text += page.extract_text() + "\n"
        return text
    except Exception as e:
        return f"Error reading PDF template: {str(e)}"

def generate_pdf_from_html(html_content):
    result = BytesIO()
    pisa.CreatePDF(BytesIO(html_content.encode('utf-8')), result)
    return result.getvalue()

def query_local_llm(messages, pipeline_obj):
    """Helper to query the local pipeline using Chat Templates."""
    prompt = pipeline_obj.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    outputs = pipeline_obj(
        prompt, 
        do_sample=True, 
        temperature=0.3, 
        top_p=0.9
    )
    # Extract only the generated text (remove prompt)
    generated_text = outputs[0]["generated_text"]
    # Mistral/Llama usually appends response at the end. 
    # We need to robustly strip the input prompt if apply_chat_template doesn't handle it conceptually for the return.
    # Usually pipeline returns properties differently. Let's slice:
    return generated_text[len(prompt):].strip()

def generate_sow_draft(mom_text, pipeline_obj):
    print("[+] Generating SOW Draft (this may take time)...")
    
    # Simplified One-Shot Prompt for Local LLM context window limits
    messages = [
        {"role": "user", "content": (
            f"You are a Project Manager. Create a detailed Statement of Work (SOW) based on these Meeting Minutes:\n\n"
            f"'{mom_text}'\n\n"
            f"Include sections: Project Overview, Scope, Deliverables, Timeline, Pricing.\n"
            f"Use Markdown formatting."
        )}
    ]
    return query_local_llm(messages, pipeline_obj)

def format_sow_to_html(edited_text, template_text, pipeline_obj):
    print("[+] Formatting SOW to HTML...")
    
    # We truncated template_text if it's too huge for local context
    short_template = template_text[:1000] + "...[truncated]"
    
    messages = [
        {"role": "user", "content": (
            f"Convert this SOW text into HTML. Mimic the style of the following template.\n\n"
            f"TEMPLATE STYLE:\n{short_template}\n\n"
            f"SOW CONTENT:\n{edited_text}\n\n"
            f"Output ONLY valid HTML code. No markdown blocks."
        )}
    ]
    response = query_local_llm(messages, pipeline_obj)
    return response.replace("```html", "").replace("```", "").strip()

In [None]:
# @title 4. Input & Generate
# @markdown Run this cell. Inputs MOM -> Generates Draft.

MOM_TEXT = """
Project: Customer Portal Redesign
Scope: Update UI/UX, add login, integrate Stripe payments.
Timeline: 4 weeks. Budget: $10k.
Team: 1 Designer, 2 Developers.
"""
# You can modify MOM_TEXT above or use input()

if 'pipe' not in locals():
    print("‚ùå Pipeline not loaded. Please run Cell 2 first.")
else:
    draft_sow = generate_sow_draft(MOM_TEXT, pipe)
    print("\n" + "="*40)
    print("GENERATED DRAFT SOW")
    print("="*40)
    print(draft_sow)
    print("="*40)

In [None]:
# @title 5. Generate PDF
# @markdown Generates the final PDF.

FINAL_DRAFT_SOW = draft_sow # Modify this if you want to edit the text programmatically
TEMPLATE_FILENAME = "SOW-TEMPLATE.pdf"

if os.path.exists(TEMPLATE_FILENAME):
    template_content = extract_text_from_pdf(TEMPLATE_FILENAME)
    html_out = format_sow_to_html(FINAL_DRAFT_SOW, template_content, pipe)
    pdf_bytes = generate_pdf_from_html(html_out)
    
    output_filename = "Local_SOW.pdf"
    with open(output_filename, "wb") as f:
        f.write(pdf_bytes)
    
    print(f"‚úÖ PDF Generated: {output_filename}")
    from google.colab import files
    files.download(output_filename)
else:
    print("‚ùå Template 'SOW-TEMPLATE.pdf' not found. Upload it and run again.")