# Memespector Converter:  Postprocess Results of Memespector GUI
This **Notebook-as-Tool** allows you **to upload results from [Memespector GUI](https://github.com/jason-chao/memespector-gui) and postprocess these results**.

- The combined JSON output of the Memespector GUI tool is split in spearate API/AIs as well as the features analyzed.

- If a base path is passed for local files analyzed with Memespector the absolute file path is converted into a relative file path.


Open this file with Google Colab and run the cells consecutively by pressing the **Play** button or pushing **shift+enter**.

**Important notes:**
- Code is hidden in the background of Colab forms. For viewing and editing the code **double click** cell or select  **View → Show/hide code**

**Credits:** This notebook was written by Marcus Burkhardt.

In [None]:
#@title Setup 1: Install and Load Required Libraries and Run Setup Procedures


# Import Libaries
import os
import shutil
import json
import pandas as pd
from google.colab import files


# Defining path variable for data path
# data_path = os.path.join("gdrive", "MyDrive", "Colab_Data", "Data", "Memespector")
#if not os.path.isdir(data_path):
#  os.makedirs(data_path)

# Initialize ...
# ...

In [None]:
#@title Setup 2: Definition of Core and Support Functions Used by the Tool(s)

def load_json(fi):
    with open(fi, 'r') as infile:
        data = json.load(infile)
    return data

def postprocess_memespector_json(fname, base_path=""):
    data = load_json(fname)
    tmp = dict()

    for item in data:
        item_metadata = {
            'location': item['ImageLocation'],
            'location_type': item['ImageLocationType'],
            'exception': item['ExceptionRasied'],
            'completed': item['Completed']
        }

        if item_metadata['location_type'] == 'Local' and base_path != "":
           item['ImageLocation'] = item['ImageLocation'].replace(base_path, '')

        for dtype in item:
            if 'Invocation' in dtype and item[dtype]:
                api = dtype.replace('Invocation', '')
                processed = item[dtype]['Processed']
                fid = item['ImageLocation']

                if 'Model' in item[dtype]:
                    item_metadata[f'{dtype}_Model'] = item[dtype]['Model']
                if 'DetectionMaxResults' in item[dtype]:
                    item_metadata[f'{dtype}_DetectionMaxResults'] = item[dtype]['DetectionMaxResults']
                if 'FlatteningMinScore' in item[dtype]:
                    item_metadata[f'{dtype}_FlatteningMinScore'] = item[dtype]['FlatteningMinScore']
                if 'FlatteningMinScores' in item[dtype]:
                    item_metadata[f'{dtype}_FlatteningMinScores'] = item[dtype]['FlatteningMinScores']
                if 'DetectionFeatureTypes' in item[dtype]:
                    item_metadata[f'{dtype}_DetectionFeatureTypes'] = item[dtype]['DetectionFeatureTypes']
                if 'Status' in item[dtype]['APIResponse']:
                    for attr, value in item[dtype]['APIResponse']['Status'].items():
                        item_metadata[f'{dtype}_{attr}'] = value

                if 'APIResponse' in item[dtype]:
                    if api not in tmp.keys():
                        tmp[api] = dict()

                    if dtype == 'OpenSourceInvocation':
                        response_results = item[dtype]['APIResponse']['Results']
                        if isinstance(response_results, list) and len(response_results) == 1:
                            response_results = response_results[0]
                    elif dtype == 'ClarifaiInvocation':
                        response_results = item[dtype]['APIResponse']['Outputs']
                        if isinstance(response_results, list) and len(response_results) == 1:
                            response_results = response_results[0]['Data']

                    else:
                        response_results = item[dtype]['APIResponse']

                    for k, v in response_results.items():
                        if k not in tmp[api].keys():
                            tmp[api][k] = []
                        if isinstance(v, list):
                            for data_item in v:
                                i = dict()
                                i['File'] = fid
                                i['Type'] = k
                                for dk, dv in data_item.items():
                                    i[dk] = dv
                                i['Processed'] = processed
                                tmp[api][k].append(i)
                        elif isinstance(v, dict):
                            if k == 'WebDetection':
                                for sk, sv in v.items():
                                    for data_item in sv:
                                        i = dict()
                                        i['File'] = fid
                                        i['Type'] = k
                                        i['Subtype'] = sk
                                        for dk, dv in data_item.items():
                                            i[dk] = dv
                                        i['Processed'] = processed
                                        tmp[api][k].append(i)
                                    pass
                            elif k == 'FullTextAnnotation':
                                i = dict()
                                i['File'] = fid
                                i['Type'] = k
                                i['Text'] = v['Text']
                                i['Processed'] = processed
                                tmp[api][k].append(i)
                            elif k == 'SafeSearchAnnotation' or k == 'adult' or k == 'metadata' or k == 'Status':
                                i = dict()
                                i['File'] = fid
                                i['Type'] = k
                                for attr, value in v.items():
                                    i[attr] = value
                                tmp[api][k].append(i)
                            elif k == 'description':
                                for sk, sv in v.items():
                                    tmp_i = dict()
                                    tmp_i['File'] = fid
                                    tmp_i['Type'] = k
                                    tmp_i['Subtype'] = sk
                                    for sitem in sv:
                                        i = tmp_i.copy()
                                        if isinstance(sitem, dict):
                                            for attr, value in sitem.items():
                                                i[attr] = value
                                            tmp[api][k].append(i)
                                        else:
                                            i[sk] = sitem
                                            tmp[api][k].append(i)
                            else:
                                print('Error A')
                                print(k, v)
                        elif isinstance(v, str):
                            item_metadata[f"{dtype}_{k}"] = v
                        elif v is None:
                            pass
                        else:
                            print('Error B')
                            print(k, v)
                else:
                    print('Error C')

        if 'metadata' not in tmp:
            tmp['metadata'] = []

        tmp['metadata'].append(item_metadata)

    outpath = fname.replace('.json', '')
    os.makedirs(outpath, exist_ok=True)

    for api, api_data in tmp.items():
        if api == 'metadata':
            outfile = os.path.join(outpath, f"_{api}.csv")
            results = pd.json_normalize(api_data)
            results.to_csv(outfile, sep='\t', index=None)

            outfile = os.path.join(outpath, f"_{api}.json")
            with open(outfile, 'w') as outfile:
                json.dump(api_data, outfile, indent=4)

        else:
            for k, v in api_data.items():
                results = pd.json_normalize(v)
                if len(results) > 0:
                    outfile = os.path.join(outpath, f"{api}_{k}.csv")
                    results.to_csv(outfile, sep='\t', index=None)

                outfile = os.path.join(outpath, f"{api}_{k}.json")
                with open(outfile, 'w') as outfile:
                    json.dump(v, outfile, indent=4)

    return outpath


In [None]:
# @title Run to upload memespector results, convert to separate files

uploaded = files.upload()

# @markdown Add base_path to transform absolute locations of local images to relative path. Leave empty to keep absolute path.
base_path = "" #@param {type:"string"}
processed_items = []
for fname in list(uploaded.keys()):
    path = postprocess_memespector_json(fname, base_path)
    zipped = shutil.make_archive(path, 'zip', path)
    processed_items.append(fname)
    processed_items.append(zipped)
    processed_items.append(path)


In [None]:
# @title Run to download results as zip
for fname in list(uploaded.keys()):
    zipped = fname.replace('.json', '.zip')
    files.download(zipped)

In [None]:
#@title Optional: Run to cleanup files
for item in processed_items:
    if os.path.isfile(item):
        os.remove(item)
    elif os.path.isdir(item):
        shutil.rmtree(item)

