In [None]:
import json
import mailbox
import email
import html
import mimetypes
import imghdr
import os
import re
from datetime import datetime

# set your display name so that it is removed from the folder titles
myname='Christian Gollwitzer'

In [None]:
def shorten(string, N):
    if len(string) > N:
        return string[0:N]+ '...'
    else:
        return string

In [None]:
def shortennames(names, maxlen):
    # create a short string of a set of names
    
    # first try: simply join and check
    result = ', '.join(names)
    if len(result) < maxlen:
        return result
    
    # send try: Shorten second names to single letters
    lastnameshort = [re.sub(r'^(\S+\s+\S).*', r'\1.', x) for x in names]
    result = ', '.join(lastnameshort)
    if len(result) < maxlen:
        return result
    else:
        return shorten(result, maxlen - 5) + '(' + str(len(names)) + 'P)'    

In [None]:
def subsanitize(string):
    return re.sub(r'\s+', ' ', string)

In [None]:
def utfdemangle(s):
    # FB delivers strange JSON where all strings are encoded in UTF8 bytes
    return s.encode('latin1').decode('utf8')

In [None]:
def group_messages(jdata):
    # group messages according to sender and time
    # so that messages sent shortly after each other 
    # end up as multiple paragraphs
    threshold_ms = 300000
    rawmsgs = jdata['messages']
    rawmsgs.sort(key=lambda m: m['timestamp_ms'])
    oldts = 0
    oldsender = None
    group = None
    groups = []
    for msg in rawmsgs:
        if msg['sender_name'] != oldsender or abs(msg['timestamp_ms'] - oldts) > threshold_ms:
            if group is not None:
                groups.append(group)
            group = { 'sender_name': msg['sender_name'], 'timestamp_ms': msg['timestamp_ms'], 'messages' : []}

        group['messages'].append(msg)
        oldts = msg['timestamp_ms']
        oldsender = msg['sender_name']

    groups.append(group)

    return groups

In [None]:
def format_email(msg):
    m=email.message.EmailMessage()
    m['From']=utfdemangle(msg['sender_name'])
    
    ts = msg['timestamp_ms']
    dt=datetime.utcfromtimestamp(ts/1000)
    m['Date']=email.utils.format_datetime(dt)
    
    plaintext=''
    htmltext='<html><body>\n'
    attachements= {}
    subject = None
    for paragraph in msg['messages']:
        if 'content' in paragraph:
            text = utfdemangle(paragraph['content'])
            plaintext += text + '\n'
            htmltext += '<p>' + html.escape(text) + '</p>\n'
            if subject is None:
                subject = shorten(subsanitize(text), 30)
        if 'photos' in paragraph:
            # Code from Stackoverflow to construct HMTL email
            # with inlined images https://stackoverflow.com/a/49098251
            plaintext += str(len(paragraph['photos'])) + ' Photos'
            htmltext += '<p>\n'
            for photo in paragraph['photos']:
                image_cid = email.utils.make_msgid(domain='facebookexporter.local')
                htmlcid = image_cid[1:-1] # whyever
                uri = photo['uri']
                htmltext += f'<img src="cid:{htmlcid}" style="width: 50%"> \n'
                #print('embedding ' + uri + 'as ' + image_cid)
                attachements[image_cid] = uri
            htmltext += '</p>'
    htmltext += '</body> </html>\n'
    m.set_content(plaintext)
    m.add_alternative(htmltext, subtype='html')
    
    if subject is not None:
        m['Subject']=subject
    
    # attach images, if there are any
    for cid,fn in attachements.items():
        with open(fn, 'rb') as img:

            # know the Content-Type of the image
            maintype, subtype = mimetypes.guess_type(fn)[0].split('/')

            # attach it
            m.get_payload()[1].add_related(img.read(), 
                                                 maintype=maintype, 
                                                 subtype=subtype, 
                                         cid=cid)
    return m


In [None]:
def convert_folder(path, outpath='FBMessages'):
    try:
        os.mkdir(outpath)
    except FileExistsError:
        pass
    
    jdata = json.load(open(path + '/message_1.json'))
    mgroups = group_messages(jdata)
    
    # generate folder name from particpants
    # due to "left group messages" this is reconstructed from the messages
    uniqparticipants = set([utfdemangle(m['sender_name']) for m in jdata['messages']])
    if myname is not None:
        if myname in uniqparticipants:
            uniqparticipants.remove(myname)
    
    fnbase = outpath + '/' + shortennames(uniqparticipants, 30)
    outfn  = fnbase
    i = 1
    while os.path.exists(outfn):
        outfn = fnbase + str(i)
        i += 1
    
    print('Processing: ', outfn)
    mbox = mailbox.mbox(outfn, create=True)
    for msg in mgroups:
        mbox.add(str(format_email(msg)).encode('utf-8'))


In [None]:
def convert_inbox():
    # finds everything under messages/ and runs the conversion
    topdir = 'messages/inbox'
    dirs = os.listdir(topdir)
    msgdirs = [topdir + '/' + d for d in dirs if os.path.exists(topdir + '/' + d + '/message_1.json')]
    for d in msgdirs:
        convert_folder(d)

In [None]:
convert_inbox()