<a href="https://colab.research.google.com/github/bhaskarfx/Compiler-Design-Lab-using-LEX-Programs-for-Lexical-Analysis-Design/blob/main/email_forensics_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# prompt: connect with my drive

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import email
from email import policy
from email.parser import BytesParser
import os

In [None]:
# Function to parse the .eml file
def parse_eml(file_path):
    with open(file_path, 'rb') as file:
        msg = BytesParser(policy=policy.default).parse(file)
    return msg

In [None]:
# Function to extract email headers
def extract_headers(msg):
    headers = {
        "From": msg["From"],
        "To": msg["To"],
        "Subject": msg["Subject"],
        "Date": msg["Date"],
        "Message-ID": msg["Message-ID"],
        "Reply-To": msg["Reply-To"],
        "Received": msg["Received"]
    }
    return headers

In [None]:
def extract_body(msg):
    if msg.is_multipart():
        # If multipart, loop through each part and get the text/plain or text/html
        for part in msg.iter_parts():
            if part.get_content_type() == 'text/plain':
                return part.get_payload(decode=True).decode(part.get_content_charset(), 'ignore')
            elif part.get_content_type() == 'text/html':
                return part.get_payload(decode=True).decode(part.get_content_charset(), 'ignore')
        # If no text/plain or text/html is found, return an empty string to avoid NoneType error
        return ""
    else:
        # If it's not multipart, directly return the content
        # Handle potential NoneType error in get_content_charset()
        charset = msg.get_content_charset()
        return msg.get_payload(decode=True).decode(charset if charset else 'utf-8', 'ignore')

In [None]:
# Function to extract attachments
def extract_attachments(msg):
    attachments = []
    if msg.is_multipart():
        for part in msg.iter_parts():
            content_disposition = part.get("Content-Disposition", "")
            if "attachment" in content_disposition:
                filename = part.get_filename()
                content = part.get_payload(decode=True)
                attachments.append({"filename": filename, "content": content})
    return attachments


In [None]:
def check_suspicious_headers(headers):
    suspicious = []
    # Check if there are multiple 'Received' headers indicating suspicious routing
    if headers.get("Received"):
        received_headers = headers["Received"].split("\n")
        if len(received_headers) > 3:
            suspicious.append("Multiple Received headers detected, possible spoofing attempt.")

    return suspicious

In [None]:
# Main forensic analysis function
def email_forensics(file_path):
    msg = parse_eml(file_path)
    headers = extract_headers(msg)
    body = extract_body(msg)
    attachments = extract_attachments(msg)
    suspicious = check_suspicious_headers(headers)

    # Print headers
    print("Email Headers:")
    for key, value in headers.items():
        print(f"{key}: {value}")

    # Print suspicious activity
    if suspicious:
        print("\nSuspicious Activity Detected:")
        for activity in suspicious:
            print(activity)

    # Print body content (First 500 characters of the body for preview)
    print("\nEmail Body Preview (First 500 chars):")
    print(body[:500])

    # Handle attachments
    if attachments:
        print("\nAttachments:")
        for attachment in attachments:
            print(f"Filename: {attachment['filename']}")
            # You can save attachments if needed:
            # with open(attachment['filename'], 'wb') as f:
            #     f.write(attachment['content'])
             # Download attachments
            if attachment['filename']:
                with open(attachment['filename'], 'wb') as f:
                    f.write(attachment['content'])
                print(f"Downloaded: {attachment['filename']}")
    else:
        print("No attachments found.")

In [None]:
# Run the forensic analysis on the .eml file
file_path = '/content/drive/MyDrive/forensics/Invitation For National Level Annual Techno-Cultural Fest _Technorollix-2025_.eml'
email_forensics(file_path)

Email Headers:
From: VC Office OPJU <vc.office@opju.ac.in>
To: opjunewsletter@opju.ac.in
Subject: Invitation For National Level Annual Techno-Cultural Fest "Technorollix-2025"
Date: Tue, 18 Mar 2025 10:48:12 +0530
Message-ID: <CAEqVzL95=WVOb3Lz+TqOuNy2=3==Na2+j_A=NDa7HWeejy2oJQ@mail.gmail.com>
Reply-To: None
Received: by 2002:a05:6200:440c:b0:594:7c44:861c with SMTP id t12csp2836519qnz;        Mon, 17 Mar 2025 22:22:23 -0700 (PDT)

Email Body Preview (First 500 chars):


Attachments:
Filename: Technorollix 2025 Poster.jpg
Downloaded: Technorollix 2025 Poster.jpg
Filename: Technorollix Invitation Card.pdf
Downloaded: Technorollix Invitation Card.pdf
