In [1]:
!pip install flask flask-cors pyngrok pdf2image pytesseract transformers bitsandbytes accelerate
!apt-get install -y poppler-utils
!apt-get update
!apt-get install -y tesseract-ocr

Collecting flask-cors
  Downloading flask_cors-5.0.1-py3-none-any.whl.metadata (961 bytes)
Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Collecting pdf2image
  Downloading pdf2image-1.17.0-py3-none-any.whl.metadata (6.2 kB)
Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.3-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3,>=2.0->bitsandbytes)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Coll

In [None]:
import os
import torch
import tempfile
from pyngrok import ngrok
import pytesseract
from flask import Flask, request, jsonify
from flask_cors import CORS
from huggingface_hub import login
from pdf2image import convert_from_path
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

# Authenticate Hugging Face API
login(token="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")

# Authenticate ngrok
NGROK_AUTH_TOKEN = "2tftHI685KRDnJN53ozRbelz7Uo_4JkRfCRaYWbyxu8F8UjJ9"
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# Initialize Flask App
app = Flask(__name__)
CORS(app)

# Expose API via ngrok
public_url = ngrok.connect(5000)
print(f"Public API URL: {public_url}")

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using Device: {device}")

# Load fine-tuned Mistral model
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True
)

model_id = "mistralai/Mistral-7B-Instruct-v0.1"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    quantization_config=bnb_config,
)

print("Fine-Tuned Mistral Model Loaded Successfully")

# File to store processed resumes
MISTRAL_RESUME_FILE = "mistral_resumes.txt"

# Ensure the resume storage file exists
if not os.path.exists(MISTRAL_RESUME_FILE):
    with open(MISTRAL_RESUME_FILE, "w", encoding="utf-8") as f:
        f.write("")

def clean_text(text):
    """Cleans OCR artifacts and removes unnecessary characters."""
    text = text.replace("©", "").replace("*", "")
    text = " ".join(text.split())
    text = text.replace("\n", " ")
    return text.strip()

def extract_text_from_pdf(pdf_path):
    """Extract text from a PDF with OCR"""
    if not os.path.exists(pdf_path):
        raise FileNotFoundError(f"PDF file not found: {pdf_path}")

    images = convert_from_path(pdf_path)
    extracted_text = " ".join(pytesseract.image_to_string(img) for img in images)
    return clean_text(extracted_text)

def format_resume_with_mistral(extracted_text):
    """Uses the fine-tuned Mistral model to format a resume."""

    prompt = f"""
    Structure the following resume text into a clean, professional format.

    Resume Text:
    {extracted_text}

    Instructions:
    - Keep the structure as close to the original resume as possible while making it professional and readable.
    - Do not include unnecessary Markdown formatting.
    - Ensure proper sentence structure instead of bullet points where possible.
    - Remove irrelevant sections like "Miscellaneous" or "Not Provided."
    - Use natural spacing instead of headings like '## Contact Information'.
    - Ensure the response is structured in an easy-to-read natural flow.

    Formatted Resume Output:
    """

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=4096).to(device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=2048,
            do_sample=False,
            repetition_penalty=1.2,
            pad_token_id=tokenizer.eos_token_id
        )

    formatted_resume = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
    formatted_resume = formatted_resume.replace("(Start your response from here)", "").strip()

    return formatted_resume

@app.route("/process_cv", methods=["POST"])
def process_cv():
    """Processes multiple resumes, formats them, and saves them."""
    if "file" not in request.files:
        return jsonify({"error": "No file uploaded"}), 400

    files = request.files.getlist("file")
    extracted_results = []

    for file in files:
        temp_pdf_path = os.path.join(tempfile.gettempdir(), file.filename)
        file.save(temp_pdf_path)

        extracted_text = extract_text_from_pdf(temp_pdf_path)
        structured_resume = format_resume_with_mistral(extracted_text)

        if structured_resume.strip():
            extracted_results.append({
                "filename": file.filename,
                "data": structured_resume
            })

            with open(MISTRAL_RESUME_FILE, "a", encoding="utf-8") as f:
                f.write(structured_resume + "\n\n")

        else:
            extracted_results.append({"filename": file.filename, "message": "No structured data returned"})

    return jsonify({"status": "success", "results": extracted_results})

@app.route("/store_cv", methods=["POST"])
def store_cv():
    """Stores structured resumes in mistral_resumes.txt."""
    data = request.json
    structured_resume = data.get("resume", "")

    if not structured_resume.strip():
        return jsonify({"error": "No valid resume content received"}), 400

    with open(MISTRAL_RESUME_FILE, "a", encoding="utf-8") as f:
        f.write(structured_resume + "\n\n")

    return jsonify({"message": "Resume stored successfully"}), 200

@app.route("/query_cv", methods=["POST"])
def query_cv():
    """Queries stored resumes and returns a concise response."""
    data = request.json
    user_query = data.get("query")

    if not user_query:
        return jsonify({"error": "No query provided"}), 400

    if not os.path.exists(MISTRAL_RESUME_FILE):
        return jsonify({"error": "No resumes stored"}), 400

    with open(MISTRAL_RESUME_FILE, "r", encoding="utf-8") as file:
        stored_resumes = file.read()

    prompt = f'"{user_query}". List max 3 candidates: Name | 2-3 skills | Short experience (max 10 words).'

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=4096).to(device)

    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=100)

    response_text = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

    try:
        return jsonify({"query": user_query, "response": response_text})
    except Exception:
        return jsonify({"error": "Failed to parse AI response."}), 500

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=5000)

Public API URL: NgrokTunnel: "https://b502-34-125-11-8.ngrok-free.app" -> "http://localhost:5000"
Using Device: cuda


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Fine-Tuned Mistral Model Loaded Successfully
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [03/Mar/2025 07:41:57] "POST /process_cv HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [03/Mar/2025 07:44:02] "POST /process_cv HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [03/Mar/2025 07:44:44] "POST /process_cv HTTP/1.1" 200 -
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
INFO:werkzeug:127.0.0.1 - - [03/Mar/2025 07:46:50] "POST /query_cv HTTP/1.1" 200 -
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
INFO:werkzeug:127.0.0.1 - - [03/Mar/2025 07:48:46] "POST /query_cv HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [03/Mar/2025 07:51:32] "POST /process_cv HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [03/Mar/2025 07:53:22] "POST /process_cv HTTP/1.1" 200 -
