# **Part 1: Header Analysis**

In [1]:
pip install ipapi

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.3.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
import email
import os
import re
import ipapi

def extract_headers(msg):
    headers = {
        'Received-SPF': msg.get('Received-SPF'),
        'ARC-Authentication-Results': msg.get('ARC-Authentication-Results')
    }
    return headers

def check_spf(spf_header):
    if spf_header:
        spf_status = spf_header.lower()
        if 'pass' in spf_status:
            # Extract IP address from the SPF result
            ip_match = re.search(r'designates\s(.*?)\s', spf_header)
            if ip_match:
                ip = ip_match.group(1)
                return f"Pass", ip
            else:
                return "Pass", None
        else:
            return "Not Pass", None
    else:
        return "Not Available", None

def check_dkim(arc_auth_results):
    if arc_auth_results:
        # Extract DKIM information from ARC-Authentication-Results header
        dkim_match = re.search(r'dkim=(\w+).*?header.i=(\S+)', arc_auth_results)
        if dkim_match:
            dkim_status = dkim_match.group(1)
            domain = dkim_match.group(2)
            return dkim_status, domain
        else:
            return "Not Available", None
    else:
        return "Not Available", None

def check_dmarc(arc_auth_results):
    if arc_auth_results:
        # Extract DMARC information from ARC-Authentication-Results header
        dmarc_match = re.search(r'dmarc=(\w+)', arc_auth_results)
        if dmarc_match:
            dmarc_status = dmarc_match.group(1)
            return dmarc_status
        else:
            return "Not Available"
    else:
        return "Not Available"

def get_ip_info(ip_address):
    """
    Retrieves information about the given IP address using the ipapi library.

    Args:
        ip_address (str): The IP address to get information about.

    Returns:
        dict: The information about the IP address.
    """
    # Get the IP information using the ipapi library
    ip_info = ipapi.location(ip_address)

    # Return IP information in a dictionary
    return {
        "IP Address": ip_info.get('ip'),
        "City": ip_info.get('city'),
        "Region": ip_info.get('region'),
        "Country": ip_info.get('country_name'),
        "Postal Code": ip_info.get('postal'),
        "Latitude": ip_info.get('latitude'),
        "Longitude": ip_info.get('longitude'),
        "Timezone": ip_info.get('timezone'),
        "ISP": ip_info.get('org')
    }

def analyze_email(eml_file_path):
    if os.path.exists(eml_file_path):
        with open(eml_file_path, 'r') as eml_file:
            msg = email.message_from_file(eml_file)

            # Extract specific headers
            headers = extract_headers(msg)
            print("Extracted Information:")
            for header, value in headers.items():
                print(f"{header}: {value}")

            # Check SPF, DKIM, DMARC
            spf_status, spf_ip = check_spf(headers.get('Received-SPF'))
            dkim_status, dkim_domain = check_dkim(headers.get('ARC-Authentication-Results'))
            dmarc_status = check_dmarc(headers.get('ARC-Authentication-Results'))

            # Extract IP address from SPF result
            ip_match = re.search(r'designates\s(.*?)\s', headers.get('Received-SPF', ''))
            ip = ip_match.group(1) if ip_match else None

            # Get information about the IP address
            ip_info = get_ip_info(ip) if ip else None

            # Return analysis results in a dictionary
            return {
                "SPF Status": spf_status,
                "SPF IP": spf_ip,
                "DKIM Status": dkim_status,
                "DKIM Domain": dkim_domain,
                "DMARC Status": dmarc_status,
                "IP Info": ip_info
            }
    else:
        return {"Error": "File not found."}

In [3]:
# Usage
eml_file_path = '../Example_emails/email_1.eml'  # Replace with your email file path
analysis_results = analyze_email(eml_file_path)
print(analysis_results)

Extracted Information:
Received-SPF: pass (google.com: domain of linguorank@gmail.com designates 209.85.220.41 as permitted sender) client-ip=209.85.220.41;
ARC-Authentication-Results: i=1; mx.google.com;
       dkim=pass header.i=@gmail.com header.s=20230601 header.b=lP6ePK8U;
       spf=pass (google.com: domain of linguorank@gmail.com designates 209.85.220.41 as permitted sender) smtp.mailfrom=linguorank@gmail.com;
       dmarc=pass (p=NONE sp=QUARANTINE dis=NONE) header.from=gmail.com
{'SPF Status': 'Pass', 'SPF IP': '209.85.220.41', 'DKIM Status': 'pass', 'DKIM Domain': '@gmail.com', 'DMARC Status': 'pass', 'IP Info': {'IP Address': '209.85.220.41', 'City': 'Mountain View', 'Region': 'California', 'Country': 'United States', 'Postal Code': '94043', 'Latitude': 37.4043, 'Longitude': -122.0748, 'Timezone': 'America/Los_Angeles', 'ISP': 'GOOGLE'}}


In [8]:
import pandas as pd
df = pd.read_csv("https://raw.githubusercontent.com/Steemhunt/temporary-email-blacklist/master/blacklist.txt")
df.head(10)

Unnamed: 0,0-00.usa.cc
0,0-mail.com
1,001.igg.biz
2,027168.com
3,0815.ru
4,0815.ry
5,0815.su
6,0845.ru
7,0ak.org
8,0box.eu
9,0clickemail.com
