In [1]:
# # Create config file
# import json

# model_configs = dict(
    
#     HJDataset = dict(
#         faster_rcnn_R_50_FPN_3x = 'https://www.dropbox.com/s/j4yseny2u0hn22r/config.yml?dl=1',
#         mask_rcnn_R_50_FPN_3x =  'https://www.dropbox.com/s/4jmr3xanmxmjcf8/config.yml?dl=1',
#         retinanet_R_50_FPN_3x = 'https://www.dropbox.com/s/z8a8ywozuyc5c2x/config.yml?dl=1',
#     ),
#     PubLayNet = dict(
#         faster_rcnn_R_50_FPN_3x = 'https://www.dropbox.com/s/f3b12qc4hc0yh4m/config.yml?dl=1',
#         mask_rcnn_R_50_FPN_3x = 'https://www.dropbox.com/s/u9wbsfwz4y0ziki/config.yml?dl=1',
#         mask_rcnn_X_101_32x8d_FPN_3x = 'https://www.dropbox.com/s/nau5ut6zgthunil/config.yaml?dl=1',
#     ),
#     PrimaLayout = dict(
#         mask_rcnn_R_50_FPN_3x = 'https://www.dropbox.com/s/yc92x97k50abynt/config.yaml?dl=1',
#     ),
#     NewspaperNavigator = dict(
#         faster_rcnn_R_50_FPN_3x = 'https://www.dropbox.com/s/wnido8pk4oubyzr/config.yml?dl=1',
#     ),
#     TableBank = dict(
#         faster_rcnn_R_50_FPN_3x = 'https://www.dropbox.com/s/7cqle02do7ah7k4/config.yaml?dl=1',
#         faster_rcnn_R_101_FPN_3x = 'https://www.dropbox.com/s/h63n6nv51kfl923/config.yaml?dl=1',
#     ),
#     MFD_faster = dict(
#         faster_rcnn_R_50_FPN_3x = 'https://www.dropbox.com/s/ld9izb95f19369w/config.yaml?dl=1'
#     )
# )

# with open('model_and_configs/model_configs.json', 'w') as config_references:
#     json.dump(model_configs, config_references, indent=4)

In [2]:
import os
import yaml
import json

In [3]:
# Load config file
with open('model_and_configs/model_configs.json') as config_references:
    model_configs = json.load(config_references)

In [4]:
def download_models(model_configs: dict, output_folder: str, target_datasets: list=None, overwrite: bool=False):
    """Download layout parser models"""

    # Keep only target models (all if target_datasets is None)
    if target_datasets:
        model_configs = {dataset: models for dataset,models in model_configs.items() if dataset in target_datasets}
    
    for dataset, models in model_configs.items():

        print(f'Dataset {dataset}:')

        for model_name,config_path in models.items():

            # Create model folder under output_folder
            os.makedirs(os.path.join(output_folder, dataset, model_name), exist_ok=True)

            # Set output paths for config and model files
            output_config_path = os.path.join(output_folder, dataset, model_name, 'config.yml')
            output_model_path = os.path.join(output_folder, dataset, model_name, 'model_final.pth')

            # Check if model exists
            if os.path.exists(output_config_path) & os.path.exists(output_model_path) & (not overwrite):
                continue

            # Print model
            print(f'• Downloading model {model_name}...')

            # Download config file
            os.system(f'wget {config_path} -O {output_config_path}')

            # Extract online path to model weights
            with open(output_config_path) as config_file:
                config_content = yaml.unsafe_load(config_file)
                model_path = config_content['MODEL']['WEIGHTS']

            # Download model weights 
            os.system(f'wget {model_path} -O {output_model_path}')

            # Update model weights' path
            config_content['MODEL']['WEIGHTS'] = output_model_path
            with open(output_config_path, 'w') as config_file:
                yaml.dump(config_content, config_file)

        print('Downloaded!')
        print()

In [5]:
download_models(
    model_configs = model_configs,
    output_folder = './model_and_configs',
    target_datasets = ['PrimaLayout', 'PubLayNet', 'TableBank'],
    overwrite = False
)

Dataset PubLayNet:
Downloaded!

Dataset PrimaLayout:
Downloaded!

Dataset TableBank:
Downloaded!

