In [50]:
# Standard libraries
import os
import bz2
import tarfile
import glob
from copy import deepcopy

# Data libraries
import pandas as pd
import re
import numpy as np

# Visualisation libraries
import matplotlib.pyplot as plt
%matplotlib inline

from PIL import Image
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator

# Language processing
import string

import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')
nltk.download('punkt')
from nltk.tokenize import word_tokenize

# URL libs
import requests
from bs4 import BeautifulSoup

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Liam\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Liam\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [6]:
# Global variables to specify where to get the email data (lib_url) and where to save it (main_dir, sub_dir)
# These will be referenced in the data curation phase of execution.
lib_url = "https://spamassassin.apache.org/old/publiccorpus/"
main_dir = "data"
sub_dir = "extracted"

In [7]:
def create_directory_structure(main_dir, sub_dir):
    """
    Function to create the directory structure on disk in the case that it doesn't already exist
    The directory will be created in the same directory as the source file and will use the structure main_dir/sub_dir
    Input:
        main_dir: the top level of the directory structure
        sub_dir: sublevel in the directory structure
    Returns:
        No return.
    """
    try:
        os.mkdir(".\\" + main_dir)
    except:
        print("Directory already exists.")
        try:
            os.mkdir(".\\"+ main_dir +"\\"+ sub_dir +"\\")
        except:
            print("Directory already exists.")
    print("Directory Structure Created")

In [8]:
def download_email_records(url):
    """
    Function to download the page source from online directory and create a list of filenames with .tar.bz2 extensions
    Input:
        url: the lib_url defined in the source
    Returns:
        downloadable: a list of urls made of the lib_url web address and the filenames extracted from the hyperlines in the page source.
    """
    soup = BeautifulSoup(requests.get(lib_url).text)
    urls = soup.find_all('a')
    filenames = [url['href'] for url in urls if "bz2" in str(url)]
    downloadable = [lib_url + filename for filename in filenames]
    print("Email archive urls extracted")
    return downloadable

In [9]:
def save_email_records(file_urls):
    """
    Function to download the email archives and write them to disk in the directory hierarchy
    Input:
        file_urls: a list of urls pointing to the email archives
    Returns:
        No return.
    """
    for i, url in enumerate(file_urls):
        dl = requests.get(url, allow_redirects = True)
        open(".//" + main_dir + "//"+file_urls[i].split("/")[-1], 'wb').write(dl.content)
    print("Email archives downloaded from url")

In [10]:
def extract_email_records(main_dir, sub_dir):
    """
    Function to extract the email records from the downloaded email archives.
    Loops through all files in each level of the directory hierarchy and extracts the data from all tar.bz2 archives to disk.
    Input:
        main_dir: the top level of the directory structure
        sub_dir: sublevel in the directory structure
    Returns:
        No return.
    """
    for filepath in glob.glob(".\\" + main_dir + "\\*.tar.bz2"):
        #zipfile = bz2.BZ2File(filepath)
        #data = zipfile.read()
        #newfile = filepath[:-4]
        #open(newfile, "wb").write(data)
        tar = tarfile.open(filepath, "r:bz2")
        tar.extractall(os.path.join(main_dir+ "\\"+ sub_dir, filepath[7:-8]))
        tar.close()
    print("Email records extracted")

In [11]:
def dl_and_create_email_records(url, main_dir, sub_dir):
    """
    Wrapper function to download and create the email records from the archive.
    """
    urls = download_email_records(url)
    create_directory_structure(main_dir = main_dir, sub_dir = sub_dir)
    save_email_records(file_urls = urls)
    extract_email_records(main_dir = main_dir, sub_dir = sub_dir)
    print("Email records downloaded & extracted")

In [12]:
def get_target_directory_details(target_dir, sub_dir):
    """
    Function to traverse a directory and record the target type of each folder by checking if the folder
    contains either "HAM" or "SPAM" in the name.
    Input:
        target_dir: the target directory
        sub_dir: the sub directory
    Returns:
        email_type_names: a list containing the folder path, in the sub_dir, and the target type based on the folder name.
    """
    sub_directories = glob.glob(target_dir + "\\"+ sub_dir +"\\*\\*")
#     print(target_dir)
#     print(os.path.join(target_dir, "\\extracted\\*\\*"))
#     print(sub_directories)
    names = [(x.split("\\")[-1], "HAM" if x.find("ham") >=0 else "SPAM") for x in sub_directories]
    email_type_names = list(zip(names, sub_directories))
    print("Target directories extracted")
    return email_type_names

In [13]:
def parse_email(email, line_names, target):
    """
    Function to take in the filename of an email document.
    Extract any information relating to the predefined tags
    Store any information after the subject line as body - to be further processed later
    Input:
        email: the email text file, extracted from the email archive
        line_names: a predefined list of line start strings that will correspond to column headers later
        target: the target type extracted from the home folder of the email file.
    Returns:
        value_dict: a dictionary with the extracted body text, target type and key-value pairs for the line_names values.
    """
    try:
        with open(email) as file:
            body_start = False # Changed to True after reading the subject tag.
            body = []
            value_dict = {}
            value_dict['target'] = target

            for line in file.readlines():
                line_start = line.split(":")[0]+":"
                if body_start:
                    body.append(line.strip())
                if line_start in line_names:
                    line_contents = re.findall(r":\s(.*)", line)[0]
                    value_dict[line_start] = line_contents
                if line_start == "Subject:":
                    body_start = True
            value_dict['body'] = "\n".join(body)
            return value_dict
    except Exception as e:
        print(f"{e}: Error: Can't read file {email}")

In [14]:
def get_email_target_mappings(main_dir):
    """
    Function to map the target type to the folder name.
    Used to map the target to the individual email text files later.
    Input:
        main_dir: The directory that needs to be mapped
    Returns:
        target_mapping: list of tuples containing the folder path & the target type ("HAM" or "SPAM").    
    """
    email_type_names = get_target_directory_details(".\\" + main_dir, sub_dir)
    directories = [x[1] for x in email_type_names]
    targets = [x[0][1] for x in email_type_names]
    target_mapping = list(zip(directories, targets))
    print("Target mappings extracted")
    return target_mapping

In [15]:
def get_directory_file_listing(dir_path):
    """
    Function to create a list of all the file paths in a directory
    Input:
        dir_path: the file path of a directory
    Returns:
        a list of all files in the directory.
    """
    print(dir_path + "\\")
    return glob.glob(dir_path + "\\*")

In [16]:
def extract_email_data_to_dictionary(main_dir):
    """
    Function to extract the details from the email text files and store in a dictionary
    Inputs:
        main_dir: the directory containing the email text files
    Returns:
        email_contents: dictionary containing the extracted dictionaries from the function parse_email().
    """
    line_names = ["To:", "From:", "MIME-Version:", "Content-Type:",
                 "Content-Transfer-Encoding:", "X-Mailer:", "Subject:",
                 "Precedence:"]

    target_mapping = get_email_target_mappings(main_dir = main_dir)
    email_contents = {}
    for target in target_mapping:
        for file in get_directory_file_listing(target[0]):
            email_contents[file.split("\\")[-1]] = parse_email(file, line_names, target[1])
    print("Emails extracted to dictionary")
    return email_contents

In [17]:
def convert_dict_to_dataframe(email_dict):
    """
    Function to convert a dictionary to a dataframe and tranpose the resulting dataframe.
    Input:
        email_dict: a dictionary containing dictionaries with extracted email information
    Returns:
        df: dataframe generated from the dictionary, transposed to keep keys as the columns and not as the rows.
    """
    df = pd.DataFrame.from_dict(email_dict).transpose().reset_index()
    print("DataFrame generated")
    return df

In [18]:
def generate_base_email_dataframe():
    """
    Wrapper function to return a dataframe from the extracted email archives
    """
    email_dict = extract_email_data_to_dictionary(main_dir = main_dir)
    print("Base dataframe ready for cleansing")
    return convert_dict_to_dataframe(email_dict)

In [19]:
dl_and_create_email_records(lib_url, main_dir, sub_dir)
base_email_df = generate_base_email_dataframe()

Target directories extracted
Target mappings extracted
.\data\extracted\20021010_easy_ham\easy_ham\
.\data\extracted\20021010_hard_ham\hard_ham\
.\data\extracted\20021010_spam\spam\
'charmap' codec can't decode byte 0x81 in position 3082: character maps to <undefined>: Error: Can't read file .\data\extracted\20021010_spam\spam\0123.68e87f8b736959b1ab5c4b5f2ce7484a
list index out of range: Error: Can't read file .\data\extracted\20021010_spam\spam\0255.42a6feb4435a0a68929075c0926f085d
'charmap' codec can't decode byte 0x81 in position 2588: character maps to <undefined>: Error: Can't read file .\data\extracted\20021010_spam\spam\0273.51c482172b47ce926021aa7cc2552549
'charmap' codec can't decode byte 0x81 in position 2503: character maps to <undefined>: Error: Can't read file .\data\extracted\20021010_spam\spam\0330.a4df526233e524104c3b3554dd8ab5a8
'charmap' codec can't decode byte 0x81 in position 2682: character maps to <undefined>: Error: Can't read file .\data\extracted\20021010_spam

In [20]:
base_email_df

Unnamed: 0,index,target,From:,To:,Subject:,MIME-Version:,Content-Type:,Precedence:,body,X-Mailer:,Content-Transfer-Encoding:
0,0001.ea7e79d3153e7469e7a9c3e0af6a357e,HAM,Robert Elz <kre@munnari.OZ.AU>,Chris Garrigues <cwg-dated-1030377287.06fa6d@D...,Re: New Sequences Window,1.0,text/plain; charset=us-ascii,bulk,In-Reply-To: <1029945287.4797.TMDA@deepeddy.vi...,,
1,0002.b3120c4bcbf3101e661161ee7efcb8bf,HAM,Steve Burt <Steve_Burt@cursor-system.com>,"""'zzzzteana@yahoogroups.com'"" <zzzzteana@yahoo...",[zzzzteana] RE: Alexander,1.0,text/plain; charset=US-ASCII,bulk,Reply-To: zzzzteana@yahoogroups.com\nContent-T...,Internet Mail Service (5.5.2653.19),7bit
2,0003.acfc5ad94bbd27118a0d8685d18c89dd,HAM,"""Tim Chapman"" <timc@2ubh.com>",zzzzteana <zzzzteana@yahoogroups.com>,[zzzzteana] Moscow bomber,1.0,text/plain; charset=US-ASCII,bulk,Reply-To: zzzzteana@yahoogroups.com\nContent-T...,Microsoft Outlook Express Macintosh Edition - ...,7bit
3,0004.e8d5727378ddde5c3be181df593f1712,HAM,Monty Solomon <monty@roscom.com>,undisclosed-recipient: ;,[IRR] Klez: The Virus That Won't Die,1.0,"text/plain; charset=""us-ascii""",bulk,Sender: irregulars-admin@tb.tf\nErrors-To: irr...,,
4,0005.8c3b9e9c0f3f183ddaf7592a11b99957,HAM,Tony Nugent <tony@linuxworks.com.au>,Exmh Users Mailing List <exmh-users@example.com>,Re: Insert signature,,,bulk,X-Loop: exmh-users@example.com\nSender: exmh-u...,nmh-1.0.4 exmh-2.4,
...,...,...,...,...,...,...,...,...,...,...,...
9345,01396.e80a10644810bc2ae3c1b58c5fd38dfa,SPAM,Professional_Career_Development_Institute@Frug...,yyyy@netnoteinc.com,Busy? Home Study Makes Sense!,,text/html,,Id-Frugaljoe: yyyy####netnoteinc.com\nDate: Tu...,,
9346,01397.f75f0dd0dd923faefa3e9cc5ecb8c906,SPAM,"""IQ - TBA"" <tba@insiq.us>",<yyyy@spamassassin.taint.org>,Preferred Non-Smoker Rates for Smokers,1.0,text/html;,,"To: <yyyy@spamassassin.taint.org>\nDate: Tue, ...",Microsoft CDO for Windows 2000,quoted-printable
9347,01398.8ca7045aae4184d56e8509dc5ad6d979,SPAM,Mike <raye@yahoo.lv>,Mailing.List@user2.pro-ns.net,"How to get 10,000 FREE hits per day to any web...",,"text/plain; charset=""iso-8859-1""",,Sender: Mike <raye@yahoo.lv>\nMime-Version: 1....,Microsoft Outlook Build 10.0.2616,
9348,01399.2319643317e2c5193d574e40a71809c2,SPAM,"""Mr. Clean"" <cweqx@dialix.oz.au>",<Undisclosed.Recipients@webnote.net>,Cannabis Difference,1.0,text/plain;,,"Date: Wed, 05 Aug 2020 04:01:50 -1900\nMIME-Ve...",,7bit


In [21]:
cleansed_email_df = deepcopy(base_email_df)

In [22]:
cleansed_email_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9350 entries, 0 to 9349
Data columns (total 11 columns):
 #   Column                      Non-Null Count  Dtype 
---  ------                      --------------  ----- 
 0   index                       9350 non-null   object
 1   target                      9331 non-null   object
 2   From:                       9329 non-null   object
 3   To:                         9006 non-null   object
 4   Subject:                    9322 non-null   object
 5   MIME-Version:               6208 non-null   object
 6   Content-Type:               8052 non-null   object
 7   Precedence:                 5304 non-null   object
 8   body                        9331 non-null   object
 9   X-Mailer:                   3650 non-null   object
 10  Content-Transfer-Encoding:  4604 non-null   object
dtypes: object(11)
memory usage: 803.6+ KB


In [23]:
def rename_columns_remove_colon_from_column_name(df):
    """
    Function to remove the colon from the column headers and force the text to lowercase
    Input:
        df: the target dataframe
    Returns:
        df: df with renamed columns
    """
    df.columns = [x.replace(":", "").lower() for x in df.columns]
    return df

In [24]:
def extract_email_components_to_features(df, user_types):
    """
    Function to extract components of the to & from columns to new features
        fullname: the fullname of the sender that prefixs the email address
        email: the full email address contained in '<email_address>'
        username: the username from the email address (everything before @)
        domain: the domain of the email address (everything after @)
    Inputs:
        df: the target dataframe
        user_types: list of types of user that will be processed i.e. ['to'], ['from'], ['to', 'from']
    Returns:
        df: df with additional features added.
    """
    for user_type in user_types:
        # Split the FROM column into full name, username and domain
        # df[str(user_type + '_fullname')] = df[str(user_type)].str.split("<", n = 1).str[0].str.replace('"', "")
        df[str(user_type + '_fullname')] = df[str(user_type)].str.extract(r'[$\s\"]?([\w\d\s]*)[\s\"]')[0]

        # Extract the from email
        # df[str(user_type + '_email')] = df[str(user_type)].str.split("<").str[1].str.replace('>', "")
        # df[str(user_type + '_email')] = df[str(user_type)].str.extract(r'[\s<]?([\w\d\+]*@.*\.[\w\d]*)')[0]
        df[str(user_type + '_email')] = df[str(user_type)].str.extract(r'([\w\d\+]+@[\w\d]+\.[\w\d]+)')[0]
        df[str(user_type + '_email_count')] = df[str(user_type)].str.count(r'([\w\d\+]+@[\w\d]+\.[\w\d]+)')
        
        # Extract the from username
        df[str(user_type + '_username')] = df[str(user_type + '_email')].str.extract(r'(.*)[@]')

        # Extract the from domain
        df[str(user_type + '_domain')] = df[str(user_type + '_email')].str.extract(r'[@](.*)')
    
    return df

In [25]:
def exclude_invalid_to_from_subject_target_records(df):
    """
    Function to exclude records with invalid target, to, from & subject.
    Input:
        df: email contents dataframe
    Returns:
        df: email contents dataframe without invaid target, to, from & subject rows.
    
    """
    df = df[df['target'].notna()]
    df = df[df['to'].notna()]
    df = df[df['from'].notna()]
    df = df[df['subject'].notna()]
    df = df[df['to_email'].notna()]
    df = df[df['from_email'].notna()]
    
    return df

In [26]:
def extract_content_type_info_from_content_type_records(df):
    """
    Function to extract format, type, encoding & character set information from the content-type string
    Input:
        df: email contents dataframe
    Returns:
        df: email contents dataframe with additional columns for content-type data
    
    """
    df['content-type-format'] = df['content-type'].str.lower().str.extract(r'^(\w+)/')
    df['content-type-type'] = df['content-type'].str.lower().str.extract(r'^\w+/(\w+)[;\s]?')
    df['content-type-charset'] = df['content-type'].str.lower().str.extract(r'charset[\s]?=[\"]?([\w\d-]+)[\"\s]?')
    df['content-type-encoding'] = df['content-type'].str.lower().str.extract(r'encoding[\s]?=[\"]?([\w\d-]+)[\"\s]?')
    
    return df

In [27]:
# try functions
cleansed_email_df = deepcopy(base_email_df)
cleansed_email_df = rename_columns_remove_colon_from_column_name(cleansed_email_df)
cleansed_email_df = extract_email_components_to_features(cleansed_email_df, ['to', 'from'])
cleansed_email_df = exclude_invalid_to_from_subject_target_records(cleansed_email_df)
cleansed_email_df = extract_content_type_info_from_content_type_records(cleansed_email_df)

In [28]:
# Columns to drop after review
# mime-version: no appreciable relevance - all values are 1 with an insignificant qty including additional info (approx 2%)
# content-type: feature extraction is complete
# Precedence: no appreciable relevance - no alignment between bulk and to_email_count and no obvious way to infer type. Possibly revisit or attempt to create a feature independently
# Content-transfer-encoding: Not enough data to add menaingful information - 7 bit appears to have a higher frequency with HAM email.
# x-mailer: emails with x-mailer seem more likely to be HAM but this can be explored further in future iterations.

dropping = ['mime-version', 'content-type', 'precedence', 'content-transfer-encoding', 'x-mailer']
cleansed_email_df.drop(dropping, axis = 1, inplace = True)

In [29]:
print(cleansed_email_df.info())
df = deepcopy(cleansed_email_df)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8517 entries, 0 to 9349
Data columns (total 20 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   index                  8517 non-null   object 
 1   target                 8517 non-null   object 
 2   from                   8517 non-null   object 
 3   to                     8517 non-null   object 
 4   subject                8517 non-null   object 
 5   body                   8517 non-null   object 
 6   to_fullname            2403 non-null   object 
 7   to_email               8517 non-null   object 
 8   to_email_count         8517 non-null   float64
 9   to_username            8517 non-null   object 
 10  to_domain              8517 non-null   object 
 11  from_fullname          7213 non-null   object 
 12  from_email             8517 non-null   object 
 13  from_email_count       8517 non-null   float64
 14  from_username          8517 non-null   object 
 15  from

In [34]:
#pd.set_option('display.max_rows', None)
pd.reset_option('display.max_rows')

In [30]:
df['body'][0]

'In-Reply-To: <1029945287.4797.TMDA@deepeddy.vircio.com>\nReferences: <1029945287.4797.TMDA@deepeddy.vircio.com>\n<1029882468.3116.TMDA@deepeddy.vircio.com> <9627.1029933001@munnari.OZ.AU>\n<1029943066.26919.TMDA@deepeddy.vircio.com>\n<1029944441.398.TMDA@deepeddy.vircio.com>\nMIME-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nMessage-Id: <13258.1030015585@munnari.OZ.AU>\nX-Loop: exmh-workers@example.com\nSender: exmh-workers-admin@example.com\nErrors-To: exmh-workers-admin@example.com\nX-Beenthere: exmh-workers@example.com\nX-Mailman-Version: 2.0.1\nPrecedence: bulk\nList-Help: <mailto:exmh-workers-request@example.com?subject=help>\nList-Post: <mailto:exmh-workers@example.com>\nList-Subscribe: <https://listman.example.com/mailman/listinfo/exmh-workers>,\n<mailto:exmh-workers-request@redhat.com?subject=subscribe>\nList-Id: Discussion list for EXMH developers <exmh-workers.example.com>\nList-Unsubscribe: <https://listman.example.com/mailman/listinfo/exmh-workers>,\n<mailto:e

In [31]:
def get_unique_words(df):
    """
    Function to encapsulate the process of generating a list of unique cuisines from the cusines column.
    Input:
        df: target dataframe
    Output:
        unique_words: a list of all unique words contained in the body column of the target dataframe df.
    """
    all_words = df[['body']].drop_duplicates()
    all_words['split'] = all_words['body'].astype(str).map(lambda x: x.split(" "))
    unique_words = (list(set([a.strip() for b in all_words['split'].tolist() for a in b])))
    return unique_words

In [32]:
def prep_words_row(df_row):
    return [a.strip() for a in re.split(r'[\s,\n\.]+', df_row)]

In [33]:
def get_word_map(words, row):
    """
    Passing in a list of unique cuisines that has been generated previously.
    We compare the cuisines in a particular row and return a binary list of 0 for False, 1 for True in the cuisines is
    in the row values.
    """
    
    return [(a in row)*1 for a in words]

In [34]:
def parse_words_df(df):
    df_full = pd.DataFrame(df)
    df_full = df_full[df_full['body'].notna()]
    #print(df_full.head())
    try:
        df_full.reset_index(inplace = True)
    except ValueError as ve:
        print(ve)
        print("Index not reset")
    mapping_dict = {}
    unique_words = get_unique_words(df_full)
    for i in range(df_full.shape[0]):
        try:
            df_row = prep_words_row(df_full['body'][i])
        except:
            print(df_full['body'][i])
            print(type(df_full['body'][i]))
        word_map = get_word_map(unique_words, df_row)
        mapping_dict[df_full['index'][i]] = word_map
    word_mapping_df = pd.DataFrame.from_dict(mapping_dict, orient = 'index', columns = unique_words)
    return word_mapping_df

In [82]:
word_mapping_df = parse_words_df(df)
word_mapping_df.shapen

KeyboardInterrupt: 

In [44]:
df['words']= df['body'].apply(nltk.word_tokenize)

In [81]:
punc_map = str.maketrans(dict.fromkeys(string.punctuation, ''))

In [87]:
df['words'] = df['body'].str.translate(punc_map).str.split(" ")

In [83]:
df['body'][0]

'In-Reply-To: <1029945287.4797.TMDA@deepeddy.vircio.com>\nReferences: <1029945287.4797.TMDA@deepeddy.vircio.com>\n<1029882468.3116.TMDA@deepeddy.vircio.com> <9627.1029933001@munnari.OZ.AU>\n<1029943066.26919.TMDA@deepeddy.vircio.com>\n<1029944441.398.TMDA@deepeddy.vircio.com>\nMIME-Version: 1.0\nContent-Type: text/plain; charset=us-ascii\nMessage-Id: <13258.1030015585@munnari.OZ.AU>\nX-Loop: exmh-workers@example.com\nSender: exmh-workers-admin@example.com\nErrors-To: exmh-workers-admin@example.com\nX-Beenthere: exmh-workers@example.com\nX-Mailman-Version: 2.0.1\nPrecedence: bulk\nList-Help: <mailto:exmh-workers-request@example.com?subject=help>\nList-Post: <mailto:exmh-workers@example.com>\nList-Subscribe: <https://listman.example.com/mailman/listinfo/exmh-workers>,\n<mailto:exmh-workers-request@redhat.com?subject=subscribe>\nList-Id: Discussion list for EXMH developers <exmh-workers.example.com>\nList-Unsubscribe: <https://listman.example.com/mailman/listinfo/exmh-workers>,\n<mailto:e

In [88]:
df['words'][0]

['InReplyTo',
 '10299452874797TMDAdeepeddyvirciocom\nReferences',
 '10299452874797TMDAdeepeddyvirciocom\n10298824683116TMDAdeepeddyvirciocom',
 '96271029933001munnariOZAU\n102994306626919TMDAdeepeddyvirciocom\n1029944441398TMDAdeepeddyvirciocom\nMIMEVersion',
 '10\nContentType',
 'textplain',
 'charsetusascii\nMessageId',
 '132581030015585munnariOZAU\nXLoop',
 'exmhworkersexamplecom\nSender',
 'exmhworkersadminexamplecom\nErrorsTo',
 'exmhworkersadminexamplecom\nXBeenthere',
 'exmhworkersexamplecom\nXMailmanVersion',
 '201\nPrecedence',
 'bulk\nListHelp',
 'mailtoexmhworkersrequestexamplecomsubjecthelp\nListPost',
 'mailtoexmhworkersexamplecom\nListSubscribe',
 'httpslistmanexamplecommailmanlistinfoexmhworkers\nmailtoexmhworkersrequestredhatcomsubjectsubscribe\nListId',
 'Discussion',
 'list',
 'for',
 'EXMH',
 'developers',
 'exmhworkersexamplecom\nListUnsubscribe',
 'httpslistmanexamplecommailmanlistinfoexmhworkers\nmailtoexmhworkersrequestredhatcomsubjectunsubscribe\nListArchive',
 