## Zip all data files into a staging area for upload to Dataverse

In [1]:
import config
import os
import zipfile

In [2]:
manifest = [{'input':config.cities_graphml_folder, 'output':'cities-street_networks-graphml'},
            {'input':config.cities_shapefile_folder, 'output':'cities-street_networks-shapefiles'},
            {'input':config.neighborhoods_graphml_folder, 'output':'neighborhoods-street_networks-graphml'},
            {'input':config.neighborhoods_shapefile_folder, 'output':'neighborhoods-street_networks-shapefiles'},
            {'input':config.urbanized_areas_graphml_folder, 'output':'urbanized_areas-street_networks-graphml'},
            {'input':config.urbanized_areas_shapefile_folder, 'output':'urbanized_areas-street_networks-shapefiles'}]

In [3]:
# zip a whole directory
def zip_dir(input_path, output_folder, output_file):
    
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    output_path = '{}/{}.zip'.format(output_folder, output_file)
    zf = zipfile.ZipFile(file=output_path, mode='w', compression=zipfile.ZIP_DEFLATED)
    
    for root, folders, files in os.walk(input_path):
        for file in files:
            input_file = os.path.join(root, file)
            
            # preserve the relative path below the state level in the zip archive
            pattern = '/shapefiles/' if '/shapefiles/' in input_file else '/graphml/'
            arcname = input_file[input_file.find(pattern) + len(pattern) + 6:]
            zf.write(filename=os.path.join(root, file), arcname=arcname)

    zf.close()

In [4]:
%%time
for item in manifest:
    print(item['output'])
    for state_folder in os.listdir(item['input']):

        input_path = '{}/{}'.format(item['input'], state_folder)
        output_folder = '{}/{}'.format(config.staging_folder, item['output'])
        output_file = '{}-{}'.format(state_folder.replace('_', '-'), item['output'])
        zip_dir(input_path, output_folder, output_file)

cities-street_networks-graphml
cities-street_networks-shapefiles
neighborhoods-street_networks-graphml
neighborhoods-street_networks-shapefiles
urbanized_areas-street_networks-graphml
urbanized_areas-street_networks-shapefiles
Wall time: 2h 24min 35s
