In [None]:
# Load model directly
from transformers import AutoProcessor, AutoModelForImageTextToText

processor = AutoProcessor.from_pretrained("Qwen/Qwen3-VL-Embedding-8B")
model = AutoModelForImageTextToText.from_pretrained("Qwen/Qwen3-VL-Embedding-8B")

In [None]:
# Phishing classified as Kinda Legitimate, 
import torch
import torch.nn.functional as F

def classify_email_robust(email_body):
    # 1. Use a specific "Probing" instruction for the email.
    # This triggers the model's 'Classification' weights instead of just 'Topic' weights.
    query_instruction = "Instruct: Analyze this email for potential security threats. Is it a phishing attempt or a legitimate message?\nQuery: "
    
    # 2. Use detailed descriptors for labels. 
    # Generic labels like "Phishing" are too short for stable math. 
    # Detailed labels create a larger 'target' in vector space.
    labels = [
        "A malicious phishing email containing urgent threats, suspicious links, or credential harvesting.",
        "A safe, legitimate, and professional business email communication."
    ]
    
    # 3. Combine inputs: [Instruction + Email, Label 1, Label 2]
    texts = [f"{query_instruction}{email_body}"] + labels
    
    # Tokenize
    inputs = processor(text=texts, padding=True, return_tensors="pt").to(model.device)

    with torch.no_grad():
        outputs = model(**inputs, output_hidden_states=True)
        
        # KEY FIX: Access the [EOS] token embedding (the last token in the sequence).
        # We use the attention mask to find the last valid token for each text.
        last_hidden_states = outputs.hidden_states[-1]
        
        # Get the index of the last non-padding token
        last_token_indices = inputs.attention_mask.sum(dim=1) - 1
        
        # Extract the EOS embeddings
        embeddings = last_hidden_states[torch.arange(last_hidden_states.size(0)), last_token_indices]
        
        # Normalize for Cosine Similarity
        embeddings = F.normalize(embeddings, p=2, dim=1)

    # 4. Compare the Email Vector to the Label Vectors
    email_vec = embeddings[0:1]
    label_vecs = embeddings[1:]

    # Calculate similarity and apply a temperature scale (20.0) 
    # This amplifies small mathematical differences into clear probabilities.
    logits = torch.matmul(email_vec, label_vecs.T) * 20.0
    probs = torch.softmax(logits, dim=1).cpu().numpy()[0]

    return {
        "Phishing": probs[0],
        "Legitimate": probs[1],
        "Result": "Phishing" if probs[0] > probs[1] else "Legitimate"
    }

# --- TEST ---
email_content = """
Subject: Project Handover: Rackspace Onboarding Documents
Body: Hi Sarah, attached are the final signed contracts and project requirements for the Joshua onboarding. Their point of contact is Mark Jensen. Let's touch base tomorrow morning to walk through https://youssouf.pickup-distrib.com/ the implementation timeline. Best, Dave.
"""
result = classify_email_robust(email_content)
print(f"Final Decision: {result['Result']}")
print(f"Confidence - Phishing: {result['Phishing']:.2%}, Legit: {result['Legitimate']:.2%}")

In [None]:
# Phishing classified as Kinda Legitimate, 
import torch
import torch.nn.functional as F
from PIL import Image
import os

def classify_multimodal_email(email_body, image_name=None):
    images = []
    image_token = ""
    
    # 1. Load the local image
    if image_name and os.path.exists(image_name):
        # Open the image and ensure it's in RGB format
        img = Image.open(image_name).convert("RGB")
        images = [img]
        # Placeholder required for Qwen3-VL to attend to the image
        image_token = "<|vision_start|><|image_pad|><|vision_end|>\n"
    
    # 2. Construct the multimodal prompt
    # We tell the model to consider both visual and textual cues
    query_instruction = (
        f"Instruct: Analyze this email and image for security threats. "
        f"Is this a phishing attempt or a legitimate message?\n"
        f"Query: {image_token}{email_body}"
    )
    
    labels = [
        "A malicious phishing email containing fraudulent visual elements, urgent threats, or suspicious links.",
        "A safe, legitimate, and professional business email communication."
    ]
    
    texts = [query_instruction] + labels
    
    # 3. Process inputs
    inputs = processor(
        text=texts, 
        images=images if images else None, 
        padding=True, 
        return_tensors="pt"
    ).to(model.device)

    with torch.no_grad():
        outputs = model(**inputs, output_hidden_states=True)
        
        # Use the [EOS] token embedding (the last token in the sequence)
        last_hidden_states = outputs.hidden_states[-1]
        last_token_indices = inputs.attention_mask.sum(dim=1) - 1
        embeddings = last_hidden_states[torch.arange(last_hidden_states.size(0)), last_token_indices]
        
        # Normalize for similarity comparison
        embeddings = F.normalize(embeddings, p=2, dim=1)

    # 4. Compare Email Vector to Label Vectors
    email_vec = embeddings[0:1]
    label_vecs = embeddings[1:]

    # Calculate similarity with a temperature scale of 20.0 to sharpen decisions
    logits = torch.matmul(email_vec, label_vecs.T) * 20.0
    probs = torch.softmax(logits, dim=1).cpu().numpy()[0]

    return {
        "Phishing": probs[0],
        "Legitimate": probs[1],
        "Result": "Phishing" if probs[0] > probs[1] else "Legitimate"
    }

# --- TEST ---
# Ensure 'image_cloud.png' is in the same folder as this notebook
image_file = "image_cloud.png" 

email_content = """
Subject: Urgent: Verify your Rackspace credentials
Body: We have detected a suspicious login attempt on your account. 
Please view the attached screenshot and verify your identity immediately 
at https://youssouf.pickup-distrib.com/verify-account.
"""

result = classify_multimodal_email(email_content, image_name=image_file)

print(f"Final Decision: {result['Result']}")
print(f"Confidence - Phishing: {result['Phishing']:.2%}, Legit: {result['Legitimate']:.2%}")

In [None]:
import torch
import torch.nn.functional as F
from PIL import Image
import os

def classify_multimodal_email(email_body, image_name=None):
    images = []
    image_token = ""
    
    # 1. Load the local image
    if image_name and os.path.exists(image_name):
        # Open the image and ensure it's in RGB format
        img = Image.open(image_name).convert("RGB")
        images = [img]
        # Placeholder required for Qwen3-VL to attend to the image
        image_token = "<|vision_start|><|image_pad|><|vision_end|>\n"
    
    # 2. Construct the multimodal prompt
    # We tell the model to consider both visual and textual cues
    query_instruction = (
        f"Instruct: Analyze this email and image for security threats. "
        f"Is this a phishing attempt or a legitimate message?\n"
        f"Query: {image_token}{email_body}"
    )
    
    labels = [
        "A malicious phishing email containing fraudulent visual elements, urgent threats, or suspicious links.",
        "A safe, legitimate, and professional business email communication."
    ]
    
    texts = [query_instruction] + labels
    
    # 3. Process inputs
    inputs = processor(
        text=texts, 
        images=images if images else None, 
        padding=True, 
        return_tensors="pt"
    ).to(model.device)

    with torch.no_grad():
        outputs = model(**inputs, output_hidden_states=True)
        
        # Use the [EOS] token embedding (the last token in the sequence)
        last_hidden_states = outputs.hidden_states[-1]
        last_token_indices = inputs.attention_mask.sum(dim=1) - 1
        embeddings = last_hidden_states[torch.arange(last_hidden_states.size(0)), last_token_indices]
        
        # Normalize for similarity comparison
        embeddings = F.normalize(embeddings, p=2, dim=1)

    # 4. Compare Email Vector to Label Vectors
    email_vec = embeddings[0:1]
    label_vecs = embeddings[1:]

    # Calculate similarity with a temperature scale of 20.0 to sharpen decisions
    logits = torch.matmul(email_vec, label_vecs.T) * 20.0
    probs = torch.softmax(logits, dim=1).cpu().numpy()[0]

    return {
        "Phishing": probs[0],
        "Legitimate": probs[1],
        "Result": "Phishing" if probs[0] > probs[1] else "Legitimate"
    }

# --- TEST ---
# Ensure 'image_cloud.png' is in the same folder as this notebook
image_file = "image_cloud.png" 

email_content = """
Subject: Urgent: Verify your Rackspace credentials
Body: We have detected a suspicious login attempt on your account. 
Please view the attached screenshot and verify your identity immediately 
at https://youssouf.pickup-distrib.com/verify-account.
"""

result = classify_multimodal_email(email_content, image_name=image_file)

print(f"Final Decision: {result['Result']}")
print(f"Confidence - Phishing: {result['Phishing']:.2%}, Legit: {result['Legitimate']:.2%}")