# Geospatial metadata for Novel Urbanization Metics and Poverty (MLI)

## Author
- Kamwoo Lee (klee16@worldbank.org)

## Contents
1. [Load packages and data](#load_packages_data)
2. [Metadata information](#metadata_information)
3. [General description](#general_description)
4. [Data quality info](#data_quality_info)
5. [Upload to NADA catalog](#upload)

# 1. Load packages and data <a name="load_packages_data"></a>

In [15]:
import os
import pynada as nada
import inspect
import pandas as pd
import numpy as np
import json
import datetime
from tqdm.notebook import tqdm

In [2]:
DATA_DIR = "DRC_CPF/"
SUMMARY_DIR = "04_Novel_Urbanization_MLI_data_exploration/"
RESULT_DIR = "05_Novel_Urbanization_MLI_metadata/"

In [3]:
dataset_info = pd.read_excel(SUMMARY_DIR + "dataset_summary_v1.xlsx", sheet_name="dataset_info", dtype=str, index=False, header=None, encoding='utf8').set_index(0).transpose().to_dict(orient='records')[0]
layer_summaries = pd.read_excel(SUMMARY_DIR + "dataset_summary_v1.xlsx", sheet_name="layer_summaries", index=False, encoding='utf8').fillna('')

In [4]:
layer_summaries

Unnamed: 0,dir,layer_name,layer_label,description,data_type,crs_name,crs_code,num_dimensions,raster_width,raster_cell_width,raster_height,raster_cell_height,raster_nodata_value,min_lon,max_lon,min_lat,max_lat,source_name,source_url,data_process_summary
0,,mli_adm.tif,,,RasterDataset,World_Mollweide,54009,1,6464,250,7204,250,-200,-12.746095,4.390086,10.154247,25.005682,,,
1,,mli_cpo15.tif,,,RasterDataset,World_Mollweide,54009,1,6464,250,7204,250,-200,-12.746095,4.390086,10.154247,25.005682,,,
2,,mli_cpo15_urban.tif,,,RasterDataset,World_Mollweide,54009,1,6464,250,7204,250,0,-12.746095,4.390086,10.154247,25.005682,,,
3,,mli_cpo15_urban_hd.tif,,,RasterDataset,World_Mollweide,54009,1,6464,250,7204,250,0,-12.746095,4.390086,10.154247,25.005682,,,
4,,mli_cpo20.tif,,,RasterDataset,World_Mollweide,54009,1,6464,250,7204,250,-200,-12.746095,4.390086,10.154247,25.005682,,,
5,,mli_cpo20_urban.tif,,,RasterDataset,World_Mollweide,54009,1,6464,250,7204,250,0,-12.746095,4.390086,10.154247,25.005682,,,
6,,mli_cpo20_urban_hd.tif,,,RasterDataset,World_Mollweide,54009,1,6464,250,7204,250,0,-12.746095,4.390086,10.154247,25.005682,,,
7,,mli_des.tif,,,RasterDataset,World_Mollweide,54009,1,6464,250,7204,250,-200,-12.746095,4.390086,10.154247,25.005682,,,
8,,mli_ele.tif,,,RasterDataset,World_Mollweide,54009,1,6464,250,7204,250,-200,-12.746095,4.390086,10.154247,25.005682,,,
9,,mli_gbu.tif,,,RasterDataset,World_Mollweide,54009,1,6464,250,7204,250,-200,-12.746095,4.390086,10.154247,25.005682,,,


# 2. Metadata information <a name="metadata_information"></a>

In [5]:
dataset_id = dataset_info['dataset ID']
repository_id = "central"  # Collection ID that owns the document
published = 1  # Status: 0=draft, 1=published
overwrite = "yes"  # Overwrite document if already exists? Valid values "yes" "no"
metadata_information = {
    "title": dataset_info['dataset title'],
    "idno": dataset_info['dataset ID'],
    "producers": [
        {
        "name": dataset_info['owner name'],
        "abbr": "DECAT"
        }
    ],
    "production_date": dataset_info['creation date'],
    "version": "1.0"
}
provenance = [
    {
        "origin_description": {
            "harvest_date": "string",
            "altered": True,
            "base_url": "string",
            "identifier": "string",
            "date_stamp": "string",
            "metadata_namespace": "string"
        }
    }
]

tags =  [
    {
        "tag": "MLI"
    },
    {
        "tag": "NUMP"
    }
]

lda_topics = []
embeddings = []
additional = {}

# 2. General description <a name="general_description"></a>

In [6]:
# spatialRepresentationInfo


In [7]:
# referenceSystemInfo
reference_systems = []
epsg_codes = [int(code) for code in set(layer_summaries['crs_code']) if code!= '']
for code in epsg_codes:
    reference_systems.append({
        "code": str(code),
        "codeSpace": "EPSG"
    })

In [8]:
# geographicBoundingBox
OCHA_adm0 = layer_summaries[layer_summaries['layer_name'] == "mli_adm.tif"]
southBoundLatitude = min(OCHA_adm0['min_lat'])
westBoundLongitude = min(OCHA_adm0['min_lon'])
northBoundLatitude = max(OCHA_adm0['max_lat'])
eastBoundLongitude = max(OCHA_adm0['max_lon'])

In [9]:
description = {
    "idno": dataset_id,
    "language": "ENG",
    "characterSet": {
        "codeListValue": "utf-8"
    },
    "hierarchyLevel": [
        "dataset"
    ],
    "contact": [
        {
            "organisationName": dataset_info['owner name'],
            "contactInfo": {
                "address": {
                    "elctronicMailAddress": dataset_info['owner email']
                }
            },
            "role": "owner"
        }
    ],
    "dateStamp": dataset_info['release date'],
    "metadataStandardName": "ISO 19115-1, ISO 19110, ISO/TS 19139",
    "dataSetURI": "http://microdatalibqa.worldbank.org/index.php",
    "spatialRepresentationInfo": [
        # {
        #     "vectorSpatialRepresentation": {
        #         "topologyLevel": "geometryOnly",
        #         "geometricObjects": [
        #             {
        #                 "geometricObjectType": 'point',
        #                 "geometricObjectCount": geometric_object_counts['point']
        #             },
        #             {
        #                 "geometricObjectType": 'curve',
        #                 "geometricObjectCount": geometric_object_counts['curve']
        #             },
        #             {
        #                 "geometricObjectType": 'surface',
        #                 "geometricObjectCount": geometric_object_counts['surface']
        #             }
        #         ]
        #     }
        # }
    ],
    "referenceSystemInfo": reference_systems,
    "identificationInfo": [
        {
            "citation": {
                "title": dataset_info['dataset title'],
                "date": [
                    {
                        "date": dataset_info['creation date'],
                        "type": "creation"
                    },
                    {
                        "date": dataset_info['release date'],
                        "type": "released"
                    }
                ],
                "edition": "v.1",
                "editionDate": dataset_info['release date'],
                "identifier": {
                    "code": dataset_id
                },
                "citedResponsibleParty": [
                    {
                        "organisationName": dataset_info['owner name'],
                        "contactInfo": {
                            "address": {
                                "elctronicMailAddress": dataset_info['owner email']
                            }
                        },
                        "role": "owner"
                    }
                ],
                "presentationForm": [
                    "mapDigital"
                ],
                "series": {
                    "name": "Novel Urbanization Metrics and Poverty"
                }
            },
            "abstract": dataset_info['abstract'],
            "purpose": dataset_info['purpose'],
            "pointOfContact": [
                {
                    "organisationName": dataset_info['owner name'],
                    "contactInfo": {
                        "address": {
                            "elctronicMailAddress": dataset_info['owner email']
                        }
                    },
                    "role": "owner"
                }
            ],
            "resourceMaintenance": [
                {
                    "maintenanceAndUpdateFrequency": "asNeeded"
                }
            ],
            "graphicOverview": [
                {
                    "fileName": "raster_adm_MLI.jpg",
                    "fileDescription": "adm overview",
                    "fileType": "image/jpeg"
                },
                {
                    "fileName": "raster_cop15_MLI.jpg",
                    "fileDescription": "cop15 overview",
                    "fileType": "image/jpeg"
                },
                {
                    "fileName": "raster_cop20_MLI.jpg",
                    "fileDescription": "cop20 overview",
                    "fileType": "image/jpeg"
                },
                {
                    "fileName": "raster_des_MLI.jpg",
                    "fileDescription": "des overview",
                    "fileType": "image/jpeg"
                },
                {
                    "fileName": "raster_ele_MLI.jpg",
                    "fileDescription": "ele overview",
                    "fileType": "image/jpeg"
                },
                {
                    "fileName": "raster_gbu_MLI.jpg",
                    "fileDescription": "gbu overview",
                    "fileType": "image/jpeg"
                },
                {
                    "fileName": "raster_gpo_MLI.jpg",
                    "fileDescription": "gpo overview",
                    "fileType": "image/jpeg"
                },
                {
                    "fileName": "raster_gsmod_MLI.jpg",
                    "fileDescription": "gsmod overview",
                    "fileType": "image/jpeg"
                },
                {
                    "fileName": "raster_pop_MLI.jpg",
                    "fileDescription": "pop overview",
                    "fileType": "image/jpeg"
                },
                {
                    "fileName": "raster_slo_MLI.jpg",
                    "fileDescription": "slo overview",
                    "fileType": "image/jpeg"
                },
                {
                    "fileName": "raster_upo15_MLI.jpg",
                    "fileDescription": "upo15 overview",
                    "fileType": "image/jpeg"
                },
                {
                    "fileName": "raster_wat_MLI.jpg",
                    "fileDescription": "wat overview",
                    "fileType": "image/jpeg"
                }
            ],
            "resourceFormat": [
                {
                    "name": "image/tiff",
                    "specification": "GeoTIFF"
                }
            ],
            "descriptiveKeywords": [
                {
                    "type": "theme",
                    "keyword": "Inclusive Growth (131)",
                    "thesaurusName": "World Bank Theme Taxonomy and Definitions",
                },
                {
                    "type": "place",
                    "keyword": dataset_info['country name'],
                    "thesaurusName": "ISO 3166-1"
                }
            ],
            "resourceConstraints": [
                {
                    "legalConstraints": {
                        "useLimitation": [
                            "unrestricted"
                        ],
                        "accessConstraints": [
                            "unrestricted"
                        ],
                        "useConstraints": [
                            "unrestricted"
                        ]
                    }
                }
            ],
            "extent": {
                "geographicElement": [
                    {
                        "geographicBoundingBox": {
                            "southBoundLatitude": southBoundLatitude,
                            "westBoundLongitude": westBoundLongitude,
                            "northBoundLatitude": northBoundLatitude,
                            "eastBoundLongitude": eastBoundLongitude
                        },
                        "geographicDescription": dataset_info['country name']
                    }
                ],
            },
            "spatialRepresentationType": "grid",
            "language": [
                "English"
            ],
            "characterSet": [
                {
                    "codeListValue": "utf8"
                }
            ],
            "topicCategory": [
                "society"
            ],
            "supplementalInformation": "",
        }
    ],
    "distributionInfo": {
        "distributionFormat": [
            {
                "name": "image/tiff",
                "specification": "GeoTIFF"
            }
        ],
        "distributor": [
            {
                "organisationName": dataset_info['owner name'],
                "contactInfo": {
                    "address": {
                        "elctronicMailAddress": dataset_info['owner email']
                    }
                },
                "role": "owner"
            }
        ]
    }
}

# 3. Data quality info <a name="data_quality_info"></a>

In [10]:
dataQualityInfo = [
    # {
    #     "scope": "dataset",
    #     "lineage": {
    #         "statement": "string",
    #         "processStep": [
    #             {
    #                 "description": "string",
    #                 "rationale": "string",
    #                 "dateTime": "string",
    #                 "processor": [
    #                     {
    #                         "individualName": "string",
    #                         "organisationName": "string",
    #                         "positionName": "string",
    #                         "contactInfo": {
    #                             "phone": {
    #                                 "voice": "string",
    #                                 "facsimile": "string"
    #                             },
    #                             "address": {
    #                                 "deliveryPoint": "string",
    #                                 "city": "string",
    #                                 "postalCode": "string",
    #                                 "country": "string",
    #                                 "elctronicMailAddress": "string"
    #                             },
    #                             "onlineResource": {
    #                                 "linkage": "string",
    #                                 "name": "string",
    #                                 "description": "string",
    #                                 "protocol": "string",
    #                                 "function": "string"
    #                             }
    #                         },
    #                         "role": "string"
    #                     }
    #                 ],
    #                 "source": [
    #                     {
    #                         "description": "string",
    #                         "sourceCitation": {
    #                             "title": "string",
    #                             "alternateTitle": "string",
    #                             "date": [
    #                                 {
    #                                     "date": "string",
    #                                     "type": "string"
    #                                 }
    #                             ],
    #                             "edition": "string",
    #                             "editionDate": "string",
    #                             "identifier": {
    #                                 "authority": "string",
    #                                 "code": None
    #                             },
    #                             "citedResponsibleParty": [
    #                                 {
    #                                     "individualName": "string",
    #                                     "organisationName": "string",
    #                                     "positionName": "string",
    #                                     "contactInfo": {
    #                                         "phone": {
    #                                             "voice": "string",
    #                                             "facsimile": "string"
    #                                         },
    #                                         "address": {
    #                                             "deliveryPoint": "string",
    #                                             "city": "string",
    #                                             "postalCode": "string",
    #                                             "country": "string",
    #                                             "elctronicMailAddress": "string"
    #                                         },
    #                                         "onlineResource": {
    #                                             "linkage": "string",
    #                                             "name": "string",
    #                                             "description": "string",
    #                                             "protocol": "string",
    #                                             "function": "string"
    #                                         }
    #                                     },
    #                                     "role": "string"
    #                                 }
    #                             ],
    #                             "presentationForm": [
    #                                 "string"
    #                             ],
    #                             "series": {
    #                                 "name": "string",
    #                                 "issueIdentification": "string",
    #                                 "page": "string"
    #                             },
    #                             "otherCitationDetails": "string",
    #                             "collectiveTitle": "string",
    #                             "ISBN": "string",
    #                             "ISSN": "string"
    #                         }
    #                     }
    #                 ]
    #             }
    #         ]
    #     }
    # }
]

In [13]:
description['dataQualityInfo'] = dataQualityInfo

# 5. Test upload to NADA catalog <a name="test_upload"></a>

In [11]:
metadata = {
    'metadata_information': metadata_information,
    'description': description,
    'provenance': provenance,
    'tags': tags,
    'lda_topics': lda_topics,
    'embeddings': embeddings,
    'additional': additional
}
with open(RESULT_DIR + 'MLI_NUMP_metadata.json', 'w') as fp:
    json.dump(metadata, fp,  indent=4)

In [12]:
api_info = pd.read_csv('../API_info.csv', header=None)
nada.set_api_url(api_info.iloc[4,0])
nada.set_api_key(api_info.iloc[4,1])

In [13]:
response = nada.create_geospatial_dataset(
    dataset_id=dataset_id,
    repository_id=repository_id,
    published=published,
    overwrite=overwrite,
    metadata_information=metadata_information,
    description=description,
    provenance=provenance,
    tags=tags,
    lda_topics=lda_topics,
    embeddings=embeddings,
    additional=additional
)
print(response)

Geospatial dataset successfully added to the catalog.
                                                                  0
id                                                            14588
repositoryid                                                central
type                                                     geospatial
idno                                        MLI_2022_NUMP_GEO_v01_M
title             Geospatial Dataset for Novel Urbanization Metr...
year_start                                                     2020
year_end                                                       2020
nation                                                         Mali
authoring_entity                                                   
published                                                         1
created                                                  1650429942
changed                                                  1650429942
varcount                                                      

In [19]:
for file in os.listdir(SUMMARY_DIR):
    if file.endswith("jpg"):
        nada.add_resource(
            dataset_id=dataset_id,        
            dctype="map",
            dcformat="image/jpeg",
            title='[Visualization] ' + file,
            filename=SUMMARY_DIR + file,
            overwrite='yes'
        )

You provided a resource file. Processing...
Uploading the file...
File successfully uploaded.
Resource successfully added to the dataset.
You provided a resource file. Processing...
Uploading the file...
File successfully uploaded.
Resource successfully added to the dataset.
You provided a resource file. Processing...
Uploading the file...
File successfully uploaded.
Resource successfully added to the dataset.
You provided a resource file. Processing...
Uploading the file...
File successfully uploaded.
Resource successfully added to the dataset.
You provided a resource file. Processing...
Uploading the file...
File successfully uploaded.
Resource successfully added to the dataset.
You provided a resource file. Processing...
Uploading the file...
File successfully uploaded.
Resource successfully added to the dataset.
You provided a resource file. Processing...
Uploading the file...
File successfully uploaded.
Resource successfully added to the dataset.
You provided a resource file. Proc

In [20]:
nada.upload_thumbnail(dataset_id, SUMMARY_DIR + "raster_ele_MLI.jpg")

Uploading thumbnail...
Thumbnail successfully uploaded.
