In [3]:
!pip -q install "transformers>=4.43.0" "accelerate>=0.33.0" "bitsandbytes>=0.43.0" sentencepiece

In [4]:
import torch, json, re
from transformers import AutoTokenizer, AutoModelForCausalLM

MODEL_ID = "mistralai/Mistral-7B-Instruct-v0.2"

print(f"Loading model: {MODEL_ID}...")
tok = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    device_map="auto",
    load_in_4bit=True,
    torch_dtype=torch.float16
)
print("Model loaded successfully.")

Loading model: mistralai/Mistral-7B-Instruct-v0.2...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/596 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!
The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 3 files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/111 [00:00<?, ?B/s]

Model loaded successfully.


### Chat Function

In [5]:
def chat(messages, max_new_tokens=240, temperature=0.7, top_p=0.9, repetition_penalty=1.1):
    """
    Chat function compatible with Mistral-Instruct models.
    """
    text = tok.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    inputs = tok([text], return_tensors="pt").to(model.device)

    with torch.no_grad():
        out = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=temperature,
            top_p=top_p,
            repetition_penalty=repetition_penalty,
            eos_token_id=tok.eos_token_id # Add EOS token for Mistral
        )

    # Decode only the new tokens
    gen = out[0][inputs["input_ids"].shape[1]:]
    resp = tok.decode(gen, skip_special_tokens=True)
    return resp

### System Prompt

In [6]:
# Your improved System Prompt from Milestone 5
SYS_PROMPT = """
You write short, polite B2B cold emails.

Your job:
- Read the lead_profile and style_profile I give you.
- Then write a ready-to-send cold email for that specific person.

Hard rules:
- 60–120 words.
- One clear call to action.
- No links in the first email.
- Use ONLY information from lead_profile. Do not invent facts.
- Use greeting and closing from style_profile.
- DO NOT output instructions, guidelines, or placeholders.
- DO NOT explain what you are doing.

Output format (VERY IMPORTANT):
Return ONLY a single JSON object with this exact shape:
{"subject": "<short subject line>", "body": "<full email body text>"}
"""

### Helper Functions

In [7]:
MBTI_TONE = {
 "ENTJ":{"assertiveness":"high","formality":"high","avg_wps":18,"cta_style":"decisive"},
 "INTJ":{"assertiveness":"med-high","formality":"high","avg_wps":17,"cta_style":"analytical"},
 "ENFJ":{"assertiveness":"medium","formality":"medium","avg_wps":20,"cta_style":"collaborative"},
 "ENFP":{"assertiveness":"medium","formality":"medium","avg_wps":21,"cta_style":"story"},
}
DEFAULT_TONE = {"assertiveness":"medium","formality":"medium","avg_wps":19,"cta_style":"consultative"}

def mbti_to_style_profile(mbti: str):
    base = MBTI_TONE.get((mbti or "").upper(), DEFAULT_TONE)
    return {
        "style_name": f"MBTI:{mbti or 'Consultative'}",
        "avg_words": 95,
        "avg_wps": base["avg_wps"],
        "formality": base["formality"],
        "assertiveness": base["assertiveness"],
        "cta_style": base["cta_style"],
        "greeting": "Hi {first_name},",
        "closing": "Best regards,\n{sender_name}",
        "quirks": []
    }

def normalize_profile(j):
    j = {**{"objective":"", "optional_context":"", "profile_data":{}}, **j}
    p = j["profile_data"]
    p.setdefault("name","")
    p.setdefault("headline","")
    p.setdefault("career_history",[])
    p.setdefault("skills",[])
    p.setdefault("recent_activity",[])
    p.setdefault("company_updates",[])
    p.setdefault("technical_contributions",[])
    p.setdefault("inferred_mbti","")
    return j

def build_lead_profile(j):
    p = j["profile_data"]
    role = p["career_history"][0]["role"] if p.get("career_history") else ""
    company = p["career_history"][0]["company"] if p.get("career_history") else ""
    return {
      "name": p.get("name",""),
      "headline": p.get("headline",""),
      "role": role,
      "company": company,
      "skills": p.get("skills", [])[:8],
      "recent_activity": p.get("recent_activity", [])[:3],
      "company_updates": p.get("company_updates", [])[:2],
      "technical_contributions": p.get("technical_contributions", [])[:3],
      "objective": j.get("objective",""),
      "optional_context": j.get("optional_context","")
    }

### Email Generation Logic

In [8]:
def generate_email(user_json, sender_name="Team", address="Your Address"):
    j = normalize_profile(user_json)
    lead_profile = build_lead_profile(j)
    mbti = j["profile_data"].get("inferred_mbti","")
    style_profile = mbti_to_style_profile(mbti)

    user_prompt = (
      "lead_profile = " + json.dumps(lead_profile, ensure_ascii=False) + "\n" +
      "style_profile = " + json.dumps(style_profile, ensure_ascii=False)
    )

    raw = chat(
        [
          {"role":"system", "content": SYS_PROMPT},
          {"role":"user", "content": user_prompt}
        ],
        max_new_tokens=220,
        temperature=0.6,
        top_p=0.9,
        repetition_penalty=1.1,
    )

    print("--- RAW MODEL OUTPUT START ---")
    print(raw)
    print("--- RAW MODEL OUTPUT END ---")

    # Try to parse JSON
    email = {"subject":"", "body":"", "raw": raw}
    try:
        # if it already looks like pure JSON
        txt = raw.strip()
        # Handle cases where the model might add markdown ```json ... ```
        if txt.startswith("```json"):
            txt = txt[7:-3].strip() # Remove the markdown fences

        if txt.startswith("{") and txt.endswith("}"):
            parsed = json.loads(txt)
        else:
            # extract first {...}
            m = re.search(r'\{.*\}', txt, re.S)
            if m:
                parsed = json.loads(m.group(0))
            else:
                parsed = {}
        email["subject"] = parsed.get("subject","").strip()
        email["body"] = parsed.get("body","").strip()
    except Exception as e:
        print(f"\n[Warning] JSON parse failed: {e}. Falling back to raw text.")

    # Fallback: if body still empty, just use raw
    if not email["body"].strip():
        email["subject"] = email["subject"] or "Quick idea for you"
        email["body"] = raw.strip().lstrip("```json").rstrip("```") # Clean up markdown fences

    # Add signature/compliance
    if email["body"] and "unsubscribe" not in email["body"].lower():
        email["body"] += f"\n\n—\n{address}\nIf this isn’t relevant, reply “unsubscribe” and we won’t contact you again."

    # Replace placeholders
    first_name = lead_profile.get("name", "").split()[0] # Get first name
    email["body"] = email["body"].replace("{first_name}", first_name)
    email["body"] = email["body"].replace("{sender_name}", sender_name)
    return email

### Example Run

In [9]:
# Using user_json_2 as it is a good, detailed example
user_json_2 = {
  "objective": "Explore a pilot to improve trial-to-paid conversion in their SaaS funnels.",
  "optional_context": "We can piggyback on their upcoming Q1 pricing experiments.",
  "profile_data": {
    "name": "Rahul Mehta",
    "headline": "VP Product | B2B SaaS",
    "career_history": [
      {
        "role": "VP Product",
        "company": "Nimbus CRM",
        "duration": "2020–present",
        "description": "Owns product strategy for SMB and mid-market CRM suites."
      },
      {
        "role": "Director of Product",
        "company": "SalesFlow",
        "duration": "2016–2020",
        "description": "Led activation and onboarding initiatives across web and mobile."
      }
    ],
    "skills": ["Product Strategy", "Activation", "Onboarding", "A/B Testing"],
    "recent_activity": [
      "Posted about reducing time-to-value in SaaS trials.",
      "Commented on a thread about improving product-led growth motions."
    ],
    "company_updates": [
      "Rolled out a new 14-day free trial with in-app guides.",
      "Announced integrations with three major billing platforms."
    ],
    "technical_contributions": [
      "Co-authored an internal playbook on growth experiments."
    ],
    "inferred_mbti": "INTJ"
  }
}

print("\n--- GENERATING EMAIL FOR RAHUL MEHTA ---")
email = generate_email(user_json_2, sender_name="Sumit", address="123 OMR, Chennai")

print("\n--- FINAL PARSED EMAIL ---")
print("SUBJECT:", email["subject"])
print("BODY:\n", email["body"])

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.



--- GENERATING EMAIL FOR RAHUL MEHTA ---
--- RAW MODEL OUTPUT START ---
{"subject": "Improving Trial Conversion at Nimbus CRM - Let's Collaborate",
 "body": "Hi Rahul,\n\nAs a fellow VP Product at a B2B SaaS company, I've noticed your recent activity focusing on optimizing time-to-value and exploring product-led growth motions.\n\nI'd love to discuss how we might collaborate to improve trial-to-paid conversion at Nimbus CRM. With skills in Product Strategy, Activation, Onboarding, A/B Testing, and experience co-authoring growth experiment playbooks, I believe we could make a significant impact.\n\nYour recent company updates on the new 14-day free trial and integrations with major billing platforms are intriguing. Let's explore how these initiatives could influence our collaboration.\n\nBest regards,\n[Your Name]"}
--- RAW MODEL OUTPUT END ---

--- FINAL PARSED EMAIL ---
SUBJECT: Improving Trial Conversion at Nimbus CRM - Let's Collaborate
BODY:
 Hi Rahul,

As a fellow VP Product at a