In [None]:
import re

def analyze_email(email_body, from_address, known_companies):
    """
    Analyzes an email for phishing indicators.
    
    Args:
        email_body (str): The full text of the email.
        from_address (str): The 'From' email address.
        known_companies (list): A list of company domains the email might be impersonating.
    
    Returns:
        dict: A report with a risk score and details of found red flags.
    """
    
    risk_score = 0
    warnings = []
    details = {}

    # 1. Check Sender Domain
    sender_domain = from_address.split('@')[-1]
    details['sender_domain'] = sender_domain
    
    is_impersonating = any(company in from_address for company in known_companies) and sender_domain not in known_companies
    if is_impersonating:
        risk_score += 30
        warnings.append(f"SENDER SPOOFING: Email claims to be from {known_companies} but comes from '{sender_domain}'.")

    # 2. Check for Generic Greeting
    generic_greetings = ['dear customer', 'dear user', 'dear valued member', 'dear account holder', 'hello user']
    if any(greeting in email_body.lower() for greeting in generic_greetings):
        risk_score += 15
        warnings.append("GENERIC GREETING: Email uses a generic greeting instead of your name.")

    # 3. Check for Urgency
    urgency_keywords = ['urgent', 'immediately', 'action required', 'suspended', 'verify now', 'within 24 hours']
    found_urgency = [word for word in urgency_keywords if word in email_body.lower()]
    if found_urgency:
        risk_score += 25
        warnings.append(f"SENSE OF URGENCY: Email creates pressure with words: {found_urgency}.")

    # 4. Check for Suspicious Links (This is a simplified check)
    # Find all URLs in the email body
    url_pattern = r'https?://[^\s]+'
    found_urls = re.findall(url_pattern, email_body)
    suspicious_links = []

    for url in found_urls:
        # Clean the URL to get the netloc (domain)
        clean_domain = re.findall(r'https?://([^/]+)', url)
        if clean_domain:
            domain = clean_domain[0]
            # Check if the email claims to be from a company, but the link goes elsewhere
            if any(company in email_body.lower() for company in known_companies) and not any(company in domain for company in known_companies):
                suspicious_links.append(f"Claimed: {known_companies}, Actual: {domain} -> {url}")

    if suspicious_links:
        risk_score += 30
        warnings.append("SUSPICIOUS LINKS: Found links that do not match the claimed company.")
        details['suspicious_links'] = suspicious_links

    # 5. Basic Check for Poor Grammar (Common Misspellings in Scams)
    common_scam_words = ['winning prize', 'kindly', 'congratulations you won', 'free gift']
    found_scam_words = [word for word in common_scam_words if word in email_body.lower()]
    if found_scam_words:
        risk_score += 10
        warnings.append(f"POOR GRAMMAR/SCAM PHRASES: Found common scam phrases: {found_scam_words}.")

    # Generate Final Report
    report = {
        "risk_score": min(risk_score, 100),  # Cap at 100%
        "warnings": warnings,
        "details": details
    }
    
    # Simple verdict
    if risk_score > 50:
        report["verdict"] = "🚨 HIGH RISK: This email is highly likely to be a phishing attempt."
    elif risk_score > 25:
        report["verdict"] = "⚠️  MEDIUM RISK: Be very cautious. This email shows several suspicious signs."
    else:
        report["verdict"] = "✅ LOW RISK: This email appears safe, but always stay vigilant."

    return report

# --- Example Usage ---
# Example 1: A classic fake Microsoft email
print("Example 1: Fake Microsoft Email\n" + "="*40)

fake_microsoft_email = """
Dear User,

We've detected unusual login activity on your Microsoft account. 
To prevent your account from being suspended, you must verify your identity immediately.

Click here to secure your account: https://micros0ft-verify.secure-login.xyz/confirm

If you do not take action within 24 hours, access to your services will be restricted.

Sincerely,
The Microsoft Security Team
"""

from_address_fake = "security@microsoft.secure-alert.com"
companies_impersonated = ["microsoft.com", "live.com", "outlook.com"]

report = analyze_email(fake_microsoft_email, from_address_fake, companies_impersonated)
print(f"From: {from_address_fake}")
print(f"Verdict: {report['verdict']}")
print(f"Risk Score: {report['risk_score']}%")
for warning in report['warnings']:
    print(f" - {warning}")
if 'suspicious_links' in report['details']:
    for link in report['details']['suspicious_links']:
        print(f"   * {link}")

print("\n"*2)

# Example 2: A (likely) legitimate email
print("Example 2: Legitimate Netflix Email\n" + "="*40)

legit_netflix_email = """
Hello Jane Doe,

Your monthly Netflix membership has been charged to your credit card ending in 1234. Thank you for your payment.

If you have any questions, you can visit our help center at https://help.netflix.com.

- The Netflix Team
"""

from_address_legit = "info@mail.netflix.com"
companies_impersonated_netflix = ["netflix.com"]

report_legit = analyze_email(legit_netflix_email, from_address_legit, companies_impersonated_netflix)
print(f"From: {from_address_legit}")
print(f"Verdict: {report_legit['verdict']}")
print(f"Risk Score: {report_legit['risk_score']}%")
for warning in report_legit['warnings']:
    print(f" - {warning}")