#Make sure to install Dependencies 

pip install tqdm

pip install asyncio

pip install aiofiles


In [115]:
import os
import re
import asyncio
import aiofiles
from tqdm import tqdm

In [116]:
# Specify the mbox filename
mobox_filename="mbox.full"
#mobox_filename="mbox_example"
mbox_dir= f'Resources/mbox/'
split_dir = 'Resources/Split/'

SPLIT_SIGNATURE='1.4.0.g6f2b'

In [117]:
async def emailHeader(split_content):
    '''
    Separate Email Header
    '''
    emailHeader=[]
    for line in split_content:
        emailHeader.append(line)
        if 'Subject:' in line:
            return emailHeader
        
async def reformat_Mbox(content):
    '''
    The new mbox file will have the emails in the same order, but with the lines of
    each individual message body printed in reverse order.
    The email headers and properly delimited email signatures remain unchanged.
    '''
    if content.replace("\n","").replace("\t","").strip()=='':
        #If content is blank do nothing
        return ''
    msgbodyList=[]
    emailHeaderList=[]
    split_content=content.split('\n')
    #Select Email Header
    emailHeaderList=await emailHeader(split_content)
    if SPLIT_SIGNATURE in content:
        # Select Message Body and Reverse Lines.
        msgbodyList=split_content[len(emailHeaderList):-2][::-1]
        # Add Signature
        msgbodyList.insert(len(msgbodyList),'--')
        msgbodyList.insert(len(msgbodyList),SPLIT_SIGNATURE) 
    else:
        msgbodyList=split_content[len(emailHeaderList)::][::-1]
    # Merge Email Header with Message Body 
    return '\n'.join(emailHeaderList+msgbodyList)

In [118]:
async def splitFile(filename):
    '''
    Spliting mbox file into separate individual email files
    '''
    async with aiofiles.open(f"{mbox_dir}{filename}", mode='r', encoding="ISO-8859-1") as f:
        try:            
            content = await f.read()
            lastpart=len(content.split(SPLIT_SIGNATURE))
            for i,part in enumerate(content.split(SPLIT_SIGNATURE)):
                async with aiofiles.open(f"{split_dir}/{filename}_File_" + str(i+1), mode="w",encoding="utf8") as newfile:
                    if i!=lastpart-1:
                        await newfile.write(f"{part}{SPLIT_SIGNATURE}")
                    else:
                        await newfile.write(f"{part}")
        except Exception as exp:
            print(f"Error file {filename} {exp}")
            pass
      
async def processFile(filename):
    '''
    Processing content of each splitted email files
    ''' 
    async with aiofiles.open(f"{split_dir}{filename}", mode='r', encoding="ISO-8859-1") as f:
        try:            
            content = await f.read()
            #Reformat mbox
            return await reformat_Mbox(content)
        except Exception as exp:
            print(f"Error file {filename} {exp}")
            pass        
       
async def mbox(filename):
    '''
    1.Spliting mbox file into separate individual email files
    '''
    print('Spliting mbox file')
    await splitFile(mobox_filename)

    '''
    2.Generate new mbox file with all the changes
    '''    
    print('Processing files') 
    try:
        os.remove(f"{mbox_dir}{mobox_filename}_reversed")
    except OSError:
        pass
    async with aiofiles.open(f"{mbox_dir}{mobox_filename}_reversed", mode="a",encoding="ISO-8859-1") as out:
        for filename in tqdm(os.listdir(split_dir)):
            if filename.startswith(mobox_filename):
                await out.write(await processFile(filename))
            out.flush()
    print(f"Input file: {mbox_dir}{mobox_filename}")
    print(f"Output file is generated: {mbox_dir}{mobox_filename}_reversed")
    
if __name__ == "__main__":
    asyncio.ensure_future(mbox(mobox_filename))

Spliting mbox file
Processing files


100%|██████████| 8/8 [00:00<00:00, 571.71it/s]


Input file: Resources/mbox/mbox.full
Output file is generated: Resources/mbox/mbox.full_reversed
