## Zip all data files into a staging area for upload to Dataverse

In [None]:
import json
import os
import zipfile

# load configs
with open('../config.json') as f:
    config = json.load(f)

In [None]:
manifest = [{'input': config['models_gpkg_path'],    'output': config['staging_gpkg_path']},
            {'input': config['models_graphml_path'], 'output': config['staging_graphml_path']},
            {'input': config['models_nelist_path'],  'output': config['staging_nelist_path']}]
manifest

In [None]:
# zip a whole directory
def zip_dir(input_path, output_folder, output_file):
    
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    output_path = '{}/{}.zip'.format(output_folder, output_file)
    zf = zipfile.ZipFile(file=output_path, mode='w', compression=zipfile.ZIP_DEFLATED)
    
    for root, folders, files in os.walk(input_path):
        for file in files:
            input_file = os.path.join(root, file)
            
            # preserve the relative path below the state level in the zip archive
            if '/shapefiles/' in input_file:
                pattern = '/shapefiles/'
            elif '/graphml/' in input_file:
                pattern = '/graphml/'
            elif '/node_edge_lists/' in input_file:
                pattern = '/node_edge_lists/'
            else:
                raise ValueError
            arcname = input_file[input_file.find(pattern) + len(pattern) + 6:]
            zf.write(filename=os.path.join(root, file), arcname=arcname)

    zf.close()

In [None]:
%%time
for item in manifest:
    print(item['output'])
    for state_folder in os.listdir(item['input']):

        input_path = '{}/{}'.format(item['input'], state_folder)
        output_folder = '{}/{}'.format(config.staging_folder, item['output'])
        output_file = '{}-{}'.format(state_folder.replace('_', '-'), item['output'])
        zip_dir(input_path, output_folder, output_file)