In [9]:
"""
PhishGuard: Corrected Dual-Model Phishing Detection System
Fixed decision logic for all scenarios with proper OR logic for threat detection
"""

import re
import time
import json
from typing import List, Dict, Optional
from urllib.parse import urlparse
import numpy as np
import torch
from openai import OpenAI
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    pipeline
)
import warnings
warnings.filterwarnings('ignore')

# ================================
# SYSTEM CONFIGURATION
# ================================
class Config:
    """Central configuration for all system parameters"""

    # Model identifiers
    GPT_MODEL_ID = "ft:gpt-4o-mini-2024-07-18:personal::C1GDgRus"
    MODERNBERT_MODEL_PATH = "/content/drive/MyDrive/dhanya_modernBERT_unzipped"

    # Processing limits
    MAX_EMAIL_LENGTH = 2000
    MAX_URL_LENGTH = 256

    # Decision threshold
    PHISHING_THRESHOLD = 0.5

    # Model weights (60-40 split for combined threats)
    EMAIL_WEIGHT = 0.6
    URL_WEIGHT = 0.4

    # Debug mode
    DEBUG_MODE = False

# ================================
# URL EXTRACTION AND ANALYSIS
# ================================
class URLExtractor:
    """Extracts and analyzes URLs from email content"""

    def __init__(self):
        """Initialize URL extraction patterns"""

        # Comprehensive URL patterns
        self.url_patterns = [
            r'https?://[^\s<>"{}|\\^`\[\]]+',  # Standard HTTP/HTTPS
            r'(?:www\.)[a-zA-Z0-9][a-zA-Z0-9-]+[a-zA-Z0-9]\.[^\s<>"{}|\\^`\[\]]+',  # www. URLs
            r'ftp://[^\s<>"{}|\\^`\[\]]+',  # FTP
            r'(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,}(?:/[^\s]*)?',  # Domain.tld/path
        ]

        self.compiled_patterns = [re.compile(pattern, re.IGNORECASE) for pattern in self.url_patterns]

        # URL shorteners
        self.url_shorteners = {
            'bit.ly', 'tinyurl.com', 'goo.gl', 'ow.ly', 'is.gd',
            't.co', 'buff.ly', 'j.mp', 'clicky.me', 'budurl.com'
        }

        # Suspicious TLDs
        self.suspicious_tlds = ['.tk', '.ml', '.ga', '.cf', '.pw']

        # Phishing keywords
        self.phishing_keywords = [
            'verify', 'suspend', 'secure', 'update', 'confirm',
            'validate', 'alert', 'account', 'billing', 'refund',
            'prize', 'free', 'click'
        ]

    def extract_urls(self, text: str) -> List[Dict[str, any]]:
        """Extract all URLs from text"""

        urls_found = set()
        url_metadata = []

        # Clean text from common false positives
        text = re.sub(r'\$[\d,]+\.?\d*', '', text)  # Remove money
        text = re.sub(r'\d+\.\d+', '', text)  # Remove decimals

        # Extract URLs
        for pattern in self.compiled_patterns:
            matches = pattern.findall(text)
            for match in matches:
                # Clean the URL
                url = match.strip().rstrip('.,;:!?')

                # Normalize
                if url and not url.startswith(('http://', 'https://', 'ftp://')):
                    if url.startswith('www.'):
                        url = 'https://' + url
                    elif self._looks_like_url(url):
                        url = 'https://' + url
                    else:
                        continue

                # Validate and add
                if url and url not in urls_found and self._is_valid_url(url):
                    urls_found.add(url)
                    metadata = self._analyze_url(url)
                    url_metadata.append(metadata)

        return url_metadata

    def _looks_like_url(self, text: str) -> bool:
        """Check if text looks like a URL"""
        # Must have at least one dot and valid TLD
        if '.' not in text:
            return False
        parts = text.split('.')
        if len(parts) < 2:
            return False
        # Check TLD length
        tld = parts[-1].split('/')[0]
        if len(tld) < 2 or len(tld) > 6:
            return False
        # Must not be all numbers
        if all(part.isdigit() for part in parts):
            return False
        return True

    def _is_valid_url(self, url: str) -> bool:
        """Validate URL format"""
        try:
            result = urlparse(url)
            return bool(result.scheme and result.netloc)
        except:
            return False

    def _analyze_url(self, url: str) -> Dict[str, any]:
        """Analyze URL for risk indicators"""

        try:
            parsed = urlparse(url)
            domain = parsed.netloc.lower()

            if domain.startswith('www.'):
                domain = domain[4:]

            # Check if it's an IP address
            is_ip = bool(re.match(r'^(\d{1,3}\.){3}\d{1,3}', domain))

            metadata = {
                'url': url,
                'domain': domain,
                'path': parsed.path,
                'has_ip': is_ip,
                'is_shortened': any(shortener in domain for shortener in self.url_shorteners),
                'has_suspicious_tld': any(tld in domain for tld in self.suspicious_tlds),
                'suspicious_keywords': self._check_suspicious_keywords(url),
                'subdomain_count': len(domain.split('.')) - 2 if '.' in domain else 0
            }

            return metadata

        except Exception as e:
            return {'url': url, 'domain': 'unknown', 'error': str(e)}

    def _check_suspicious_keywords(self, url: str) -> bool:
        """Check for phishing keywords in URL"""
        url_lower = url.lower()
        return any(keyword in url_lower for keyword in self.phishing_keywords)

# ================================
# EMAIL ANALYZER (GPT-4o)
# ================================
class EmailAnalyzer:
    """Analyzes email content using GPT-4o"""

    def __init__(self, api_key: str):
        """Initialize OpenAI client"""
        self.client = OpenAI(api_key=api_key)
        self.model_id = Config.GPT_MODEL_ID
        print("GPT-4o Email Analyzer initialized")

    def analyze_email(self, subject: str, body: str, max_retries: int = 3) -> Dict[str, any]:
        """Analyze email for phishing"""

        email_content = f"Subject: {subject}\n\nBody: {str(body)[:Config.MAX_EMAIL_LENGTH]}"

        for attempt in range(max_retries):
            try:
                response = self.client.chat.completions.create(
                    model=self.model_id,
                    messages=[
                        {
                            "role": "system",
                            "content": "You are a phishing email classifier. Classify the email as either 'safe' or 'phishing'. Respond with the classification and confidence."
                        },
                        {
                            "role": "user",
                            "content": email_content
                        }
                    ],
                    max_tokens=150,
                    temperature=0.1
                )

                raw_response = response.choices[0].message.content

                if Config.DEBUG_MODE:
                    print(f"    DEBUG: {raw_response[:100]}")

                response_lower = raw_response.strip().lower()

                # Parse response
                if 'phishing' in response_lower:
                    is_phishing = True
                    confidence = 0.9  # High confidence for phishing
                elif 'safe' in response_lower or 'legitimate' in response_lower:
                    is_phishing = False
                    confidence = 0.9  # High confidence for safe
                else:
                    is_phishing = False
                    confidence = 0.5  # Uncertain

                return {
                    'is_phishing': is_phishing,
                    'confidence': confidence,
                    'prediction_text': raw_response[:200],
                    'model': 'GPT-4o'
                }

            except Exception as e:
                if attempt < max_retries - 1:
                    time.sleep((attempt + 1) * 2)
                else:
                    return {
                        'is_phishing': False,
                        'confidence': 0.0,
                        'prediction_text': 'error',
                        'error': str(e),
                        'model': 'GPT-4o'
                    }

# ================================
# URL CLASSIFIER (ModernBERT)
# ================================
class URLClassifier:
    """Classifies URLs using ModernBERT"""

    def __init__(self, model_path: str):
        """Initialize ModernBERT model"""
        print("Loading ModernBERT model...")

        try:
            self.tokenizer = AutoTokenizer.from_pretrained(model_path)
            self.model = AutoModelForSequenceClassification.from_pretrained(
                model_path,
                torch_dtype=torch.float32,
                trust_remote_code=True
            )

            self.device = 0 if torch.cuda.is_available() else -1

            self.classifier = pipeline(
                "text-classification",
                model=self.model,
                tokenizer=self.tokenizer,
                device=self.device,
                return_all_scores=True,
                truncation=True,
                max_length=Config.MAX_URL_LENGTH
            )

            print("ModernBERT model loaded successfully")

        except Exception as e:
            print(f"Warning: Could not load ModernBERT: {e}")
            self.classifier = None

    def classify_url(self, url: str) -> Dict[str, any]:
        """Classify a single URL"""

        if not self.classifier:
            return {
                'url': url,
                'is_phishing': False,
                'confidence': 0.0,
                'error': 'Model not loaded'
            }

        try:
            # Preprocess
            processed_url = url.lower()
            if processed_url.startswith(('http://', 'https://')):
                processed_url = processed_url.split('://', 1)[1]
            if processed_url.startswith('www.'):
                processed_url = processed_url[4:]

            # Classify
            result = self.classifier(processed_url)
            scores = {item['label']: item['score'] for item in result[0]}

            # Parse scores
            if 'LABEL_0' in scores:
                benign_score = scores['LABEL_0']
                phishing_score = scores.get('LABEL_1', 0.0)
            else:
                benign_score = scores.get('Benign', 0.0)
                phishing_score = scores.get('Phishing', 0.0)

            is_phishing = phishing_score > benign_score
            confidence = max(benign_score, phishing_score)

            return {
                'url': url,
                'is_phishing': is_phishing,
                'confidence': confidence,
                'benign_score': benign_score,
                'phishing_score': phishing_score,
                'model': 'ModernBERT'
            }

        except Exception as e:
            return {
                'url': url,
                'is_phishing': False,
                'confidence': 0.0,
                'error': str(e),
                'model': 'ModernBERT'
            }

    def batch_classify(self, urls: List[str]) -> List[Dict[str, any]]:
        """Classify multiple URLs"""
        results = []
        for url in urls:
            result = self.classify_url(url)
            results.append(result)
            time.sleep(0.1)
        return results

# ================================
# CORRECTED DECISION FUSION ENGINE
# ================================
class DecisionFusion:
    """
    Corrected decision fusion logic
    Key principle: ANY phishing signal (email OR URL) = PHISHING
    """

    def __init__(self):
        """Initialize fusion parameters"""
        self.email_weight = Config.EMAIL_WEIGHT
        self.url_weight = Config.URL_WEIGHT

        # Risk multipliers (applied ONLY to confidence, not to binary decision)
        self.risk_multipliers = {
            'has_ip': 1.3,
            'is_shortened': 1.2,
            'has_suspicious_tld': 1.25,
            'suspicious_keywords': 1.15
        }

    def combine_results(self,
                       email_result: Dict[str, any],
                       url_results: List[Dict[str, any]],
                       url_metadata: List[Dict[str, any]]) -> Dict[str, any]:
        """
        CORRECTED Decision Logic:

        1. If email is phishing OR any URL is phishing = PHISHING
        2. Score calculation for confidence/severity:
           - No URLs: 100% email score
           - With URLs: 60% email + 40% URL (weighted average)
        3. Risk multipliers boost confidence, not the binary decision
        """

        # Determine if email is phishing
        email_is_phishing = email_result.get('is_phishing', False)
        email_confidence = email_result.get('confidence', 0.0)

        # Determine if any URL is phishing
        urls_are_phishing = False
        url_confidences = []
        phishing_url_count = 0

        if url_results:
            for url_result, metadata in zip(url_results, url_metadata):
                if 'error' not in url_result:
                    url_is_phishing = url_result.get('is_phishing', False)
                    url_confidence = url_result.get('confidence', 0.0)

                    # Apply risk multipliers to confidence only
                    if url_is_phishing:
                        phishing_url_count += 1
                        urls_are_phishing = True

                        # Boost confidence based on risk factors
                        for risk_factor, multiplier in self.risk_multipliers.items():
                            if metadata.get(risk_factor):
                                url_confidence = min(url_confidence * multiplier, 1.0)

                    url_confidences.append(url_confidence if url_is_phishing else 0)

        # CORRECTED: OR logic for threat detection
        is_phishing = email_is_phishing or urls_are_phishing

        # Calculate combined score for severity/confidence
        if not url_results:
            # No URLs: email only
            combined_score = email_confidence if email_is_phishing else 0
            decision_path = f"Email-only: {'PHISHING' if email_is_phishing else 'SAFE'} (conf: {email_confidence:.2f})"
        else:
            # With URLs: weighted combination
            email_score = email_confidence if email_is_phishing else 0

            if url_confidences:
                avg_url_score = np.mean(url_confidences)
                max_url_score = max(url_confidences) if url_confidences else 0
                url_score = 0.7 * avg_url_score + 0.3 * max_url_score
            else:
                url_score = 0

            # Combined score for severity
            combined_score = self.email_weight * email_score + self.url_weight * url_score

            # Decision path
            email_status = "PHISHING" if email_is_phishing else "SAFE"
            url_status = f"{phishing_url_count} PHISHING" if urls_are_phishing else "ALL SAFE"
            decision_path = f"Email={email_status}({email_score:.2f})*0.6 + URLs={url_status}({url_score:.2f})*0.4 = {combined_score:.3f}"

        # Ensure score is normalized
        combined_score = min(max(combined_score, 0.0), 1.0)

        # For phishing cases, ensure minimum score reflects threat
        if is_phishing and combined_score < Config.PHISHING_THRESHOLD:
            combined_score = Config.PHISHING_THRESHOLD

        # Calculate confidence
        if is_phishing:
            confidence = max(email_confidence, max(url_confidences) if url_confidences else 0)
        else:
            confidence = min(email_confidence, min([c for c in url_confidences if c > 0] or [1.0]))

        # Risk level based on combined score
        if combined_score >= 0.8:
            risk_level = "CRITICAL"
        elif combined_score >= 0.6:
            risk_level = "HIGH"
        elif combined_score >= 0.5:
            risk_level = "PHISHING"
        elif combined_score >= 0.4:
            risk_level = "MEDIUM"
        elif combined_score >= 0.2:
            risk_level = "LOW"
        else:
            risk_level = "MINIMAL"

        # Decision factors
        factors = []
        if email_is_phishing:
            factors.append(f"Email classified as phishing (confidence: {email_confidence:.2f})")
        if urls_are_phishing:
            factors.append(f"Found {phishing_url_count} phishing URL(s)")

        for metadata in url_metadata:
            if metadata.get('has_ip'):
                factors.append(f"URL uses IP address: {metadata['domain']}")
            if metadata.get('is_shortened'):
                factors.append(f"URL shortener detected: {metadata['domain']}")
            if metadata.get('has_suspicious_tld'):
                factors.append("Suspicious domain extension")

        return {
            'is_phishing': is_phishing,
            'combined_score': combined_score,
            'confidence': confidence,
            'risk_level': risk_level if is_phishing else "SAFE",
            'email_is_phishing': email_is_phishing,
            'urls_are_phishing': urls_are_phishing,
            'decision_factors': factors[:5],
            'decision_path': decision_path,
            'decision_logic': "OR logic: Email=PHISHING OR URL=PHISHING → PHISHING"
        }

# ================================
# MAIN PHISHGUARD SYSTEM
# ================================
class PhishGuardSystem:
    """Main system orchestrator with corrected logic"""

    def __init__(self, openai_api_key: str):
        """Initialize all components"""
        print("Initializing PhishGuard System...")
        print("-" * 50)

        self.url_extractor = URLExtractor()
        self.email_analyzer = EmailAnalyzer(openai_api_key)
        self.url_classifier = URLClassifier(Config.MODERNBERT_MODEL_PATH)
        self.decision_fusion = DecisionFusion()

        print("PhishGuard System initialized")
        print("=" * 50)

    def analyze(self, subject: str, body: str, show_details: bool = True) -> Dict[str, any]:
        """Analyze email for phishing threats"""

        print("\nAnalyzing email...")
        print("-" * 50)

        results = {
            'timestamp': time.strftime('%Y-%m-%d %H:%M:%S'),
            'email': {
                'subject': subject[:100],
                'body_length': len(body)
            }
        }

        # Step 1: Email analysis
        print("Step 1: Email content analysis (GPT-4o)")
        email_result = self.email_analyzer.analyze_email(subject, body)
        results['email_analysis'] = email_result

        email_status = "PHISHING" if email_result['is_phishing'] else "SAFE"
        print(f"  Result: {email_status} (confidence: {email_result['confidence']:.2%})")

        # Step 2: URL extraction
        print("\nStep 2: URL extraction")
        full_text = f"{subject} {body}"
        url_metadata = self.url_extractor.extract_urls(full_text)
        results['urls_found'] = len(url_metadata)
        print(f"  Found {len(url_metadata)} URLs")

        # Step 3: URL classification
        url_results = []
        if url_metadata:
            print("\nStep 3: URL classification (ModernBERT)")
            urls = [metadata['url'] for metadata in url_metadata]
            url_results = self.url_classifier.batch_classify(urls)

            for i, (metadata, result) in enumerate(zip(url_metadata, url_results), 1):
                if 'error' not in result:
                    url_status = "PHISHING" if result['is_phishing'] else "SAFE"
                    print(f"  URL {i}: {metadata['domain'][:30]} - {url_status} ({result['confidence']:.2%})")
        else:
            print("\nStep 3: No URLs to classify")

        results['url_analysis'] = url_results
        results['url_metadata'] = url_metadata

        # Step 4: Decision fusion
        print("\nStep 4: Decision fusion (OR logic)")
        final_decision = self.decision_fusion.combine_results(
            email_result, url_results, url_metadata
        )
        results['final_decision'] = final_decision

        if show_details:
            print(f"  Logic: {final_decision['decision_logic']}")
            print(f"  Path: {final_decision['decision_path']}")

        # Final results
        print("\n" + "=" * 50)
        print("FINAL DECISION")
        print("=" * 50)
        print(f"Classification: {'PHISHING DETECTED' if final_decision['is_phishing'] else 'SAFE'}")
        print(f"Risk Level: {final_decision['risk_level']}")
        print(f"Combined Score: {final_decision['combined_score']:.3f}")
        print(f"Confidence: {final_decision['confidence']:.2%}")

        if final_decision['decision_factors']:
            print("\nKey Factors:")
            for factor in final_decision['decision_factors']:
                print(f"  - {factor}")

        return results

# ================================
# COMPREHENSIVE TEST SUITE
# ================================
def run_all_scenarios():
    """
    Test all 7 possible scenarios:

    1. Phishing email (no URLs)
    2. Safe email (no URLs)
    3. Phishing email + safe URLs
    4. Phishing email + phishing URLs
    5. Safe email + phishing URLs
    6. Safe email + safe URLs
    7. Mixed: Safe email + mixed URLs (some safe, some phishing)
    """

    # Setup API key
    try:
        from google.colab import userdata
        api_key = userdata.get('OPENAI_API_KEY')
        print("API key loaded")
    except:
        import os
        api_key = os.environ.get("OPENAI_API_KEY")
        if not api_key:
            api_key = input("Enter API key: ").strip()

    if not api_key:
        print("ERROR: API key required")
        return

    # Initialize
    print("\n" + "=" * 70)
    print("PHISHGUARD COMPREHENSIVE TEST - ALL 7 SCENARIOS")
    print("Using OR Logic: Any phishing signal = PHISHING")
    print("=" * 70)

    phishguard = PhishGuardSystem(api_key)

    # Define all test scenarios
    test_scenarios = [
        # SCENARIO 1: Phishing email, no URLs
        {
            "name": "1. Phishing Email (No URLs)",
            "expected": "PHISHING (email threat only)",
            "subject": "Urgent Account Suspension",
            "body": """
            Your account will be permanently deleted in 24 hours.
            Reply immediately with your password to prevent this.

            Account Services
            """
        },

        # SCENARIO 2: Safe email, no URLs
        {
            "name": "2. Safe Email (No URLs)",
            "expected": "SAFE (no threats)",
            "subject": "Team Meeting Tomorrow",
            "body": """
            Hi team,

            Reminder: We have our weekly meeting tomorrow at 10 AM.
            Please prepare your updates.

            Thanks,
            John
            """
        },

        # SCENARIO 3: Phishing email + safe URL
        {
            "name": "3. Phishing Email + Safe URL",
            "expected": "PHISHING (email threat, safe URL)",
            "subject": "Account Verification Required",
            "body": """
            We detected suspicious activity on your account.
            Reply with your account details immediately.

            Learn about security at: https://www.google.com/security

            Security Team
            """
        },

        # SCENARIO 4: Phishing email + phishing URL
        {
            "name": "4. Phishing Email + Phishing URL",
            "expected": "PHISHING (both threats)",
            "subject": "Your Account Is Locked",
            "body": """
            Your account has been locked for security.

            Unlock it here: https://secure-bank-verify.tk/unlock

            Act within 24 hours or lose access permanently.

            Support Team
            """
        },

        # SCENARIO 5: Safe email + phishing URL (CRITICAL TEST)
        {
            "name": "5. Safe Email + Phishing URL",
            "expected": "PHISHING (URL threat)",
            "subject": "Check This Out",
            "body": """
            Hey,

            I found this weird link someone sent me:
            http://192.168.1.100/free-prizes

            Looks suspicious, right?

            Sarah
            """
        },

        # SCENARIO 6: Safe email + safe URL
        {
            "name": "6. Safe Email + Safe URL",
            "expected": "SAFE (no threats)",
            "subject": "Project Resources",
            "body": """
            Hi team,

            Here's the documentation for our project:
            https://www.wikipedia.org/wiki/Project_management

            Let me know if you need anything else.

            Mike
            """
        },

        # SCENARIO 7: Safe email + mixed URLs
        {
            "name": "7. Safe Email + Mixed URLs",
            "expected": "PHISHING (at least one URL threat)",
            "subject": "Security Newsletter",
            "body": """
            Monthly security update:

            Official guide: https://www.microsoft.com/security
            Warning - phishing site: http://phishing-test.ml/fake
            Our blog: https://www.company.com/blog

            Stay safe!
            IT Team
            """
        }
    ]

    # Run all tests
    all_results = []

    for i, scenario in enumerate(test_scenarios, 1):
        print("\n" + "=" * 70)
        print(f"SCENARIO {i}: {scenario['name']}")
        print(f"Expected: {scenario['expected']}")
        print("=" * 70)

        # Analyze
        results = phishguard.analyze(
            scenario["subject"],
            scenario["body"],
            show_details=True
        )

        # Store results
        decision = results["final_decision"]
        all_results.append({
            "scenario": scenario["name"],
            "expected": scenario["expected"],
            "result": "PHISHING" if decision["is_phishing"] else "SAFE",
            "score": decision["combined_score"],
            "email_threat": decision["email_is_phishing"],
            "url_threat": decision["urls_are_phishing"],
            "decision_path": decision["decision_path"]
        })

        time.sleep(1)

    # Summary
    print("\n" + "=" * 70)
    print("COMPLETE TEST SUMMARY - CORRECTED LOGIC")
    print("=" * 70)

    for r in all_results:
        print(f"\n{r['scenario']}")
        print(f"  Expected: {r['expected']}")
        print(f"  Result: {r['result']} (Score: {r['score']:.3f})")
        print(f"  Email: {'PHISHING' if r['email_threat'] else 'SAFE'}")
        print(f"  URLs: {'PHISHING' if r['url_threat'] else 'SAFE'}")
        print(f"  Decision: {r['decision_path']}")

    # Save results
    filename = f"phishguard_corrected_{time.strftime('%Y%m%d_%H%M%S')}.json"
    with open(filename, 'w') as f:
        json.dump(all_results, f, indent=2, default=str)

    print(f"\n[Results saved to {filename}]")

    # Decision logic explanation
    print("\n" + "=" * 70)
    print("CORRECTED DECISION LOGIC")
    print("=" * 70)
    print("""
    PRIMARY RULE: Email=PHISHING OR URL=PHISHING → PHISHING

    This implements proper security logic where ANY threat = PHISHING

    SCORING (for severity/confidence):
    - No URLs: Score = Email confidence (if phishing) or 0
    - With URLs: Score = (Email * 0.6) + (URL * 0.4)
    - URL Score = (Average * 0.7) + (Max * 0.3) of phishing URLs

    RISK LEVELS (based on combined score):
    - Score >= 0.8: CRITICAL
    - Score >= 0.6: HIGH
    - Score >= 0.5: PHISHING (threshold)
    - Score < 0.5: SAFE (only if no threats detected)

    KEY PRINCIPLE: We use OR logic for detection (security-first)
    but weighted scoring for severity assessment.
    """)

    print("\n" + "=" * 70)
    print("All scenarios tested successfully!")
    print("=" * 70)

# ================================
# MAIN EXECUTION
# ================================
if __name__ == "__main__":
    run_all_scenarios()

API key loaded

PHISHGUARD COMPREHENSIVE TEST - ALL 7 SCENARIOS
Using OR Logic: Any phishing signal = PHISHING
Initializing PhishGuard System...
--------------------------------------------------
GPT-4o Email Analyzer initialized
Loading ModernBERT model...


Device set to use cuda:0


ModernBERT model loaded successfully
PhishGuard System initialized

SCENARIO 1: 1. Phishing Email (No URLs)
Expected: PHISHING (email threat only)

Analyzing email...
--------------------------------------------------
Step 1: Email content analysis (GPT-4o)
  Result: PHISHING (confidence: 90.00%)

Step 2: URL extraction
  Found 0 URLs

Step 3: No URLs to classify

Step 4: Decision fusion (OR logic)
  Logic: OR logic: Email=PHISHING OR URL=PHISHING → PHISHING
  Path: Email-only: PHISHING (conf: 0.90)

FINAL DECISION
Classification: PHISHING DETECTED
Risk Level: CRITICAL
Combined Score: 0.900
Confidence: 90.00%

Key Factors:
  - Email classified as phishing (confidence: 0.90)

SCENARIO 2: 2. Safe Email (No URLs)
Expected: SAFE (no threats)

Analyzing email...
--------------------------------------------------
Step 1: Email content analysis (GPT-4o)
  Result: SAFE (confidence: 90.00%)

Step 2: URL extraction
  Found 0 URLs

Step 3: No URLs to classify

Step 4: Decision fusion (OR logic)
 