# Indicator Self Registration

This notebook demonstrates how to register an indicator if you have an indicator dataset in the CauseMos compliant format and have the associated metadata in a schema compliant JSON format.

First you should make sure the appropriate environment variables are set (in the terminal, before running this notebook):

```
export AWS_ACCESS_KEY=KEY_GOES_HERE
export AWS_SECRET_KEY=SECRET_KEY_GOES_HERE
export DOJO_USER=DOJO_USERNAME_GOES_HERE
export DOJO_PWD=DOJO_PASSWORD_GOES_HERE
```

**Requirements**:

```
boto3==1.16.41
pandas==1.2.4
requests==2.25.1
pydantic==1.8.2
```

**Contact**: please reach out to dojo@jataware.com with any questions or issues.

In [1]:
import os
import requests
from requests.auth import HTTPBasicAuth
import json
import sys
import pandas as pd
import boto3

In order to validate your indicator metadata against the `indicator` schema you should run the following:

In [2]:
# Update path accordingly to `dojo/api/src/validation`:
sys.path.insert(0, "../api/validation")
from IndicatorSchema import IndicatorMetadataSchema

In [3]:
def validate_metadata(metadata_path):
    with open(metadata_path,'r') as f:
        ind_meta = json.loads(f.read())
        try:
            IndicatorMetadataSchema(**ind_meta)
        except Exception as e:
            raise Exception(f"Metadata is not schema compliant: {e}")
        return ind_meta

In [4]:
def validate_indicator(indicator_path):
    required = ['timestamp','country','admin1','admin2','admin3','lat','lng','feature','value']
    df = pd.read_parquet(indicator_path)
    missing = set(required)-set(df.columns)
    if len(missing) != 0:
        raise Exception(f"Indicator is missing required column(s): {list(missing)}")

In [5]:
def register_indicator(indicator_path, metadata_path):
    s3_accessKey = os.getenv("AWS_ACCESS_KEY")
    s3_secretKey = os.getenv("AWS_SECRET_KEY")
    dojo_username = os.getenv("DOJO_USER")
    dojo_password = os.getenv("DOJO_PWD")
    
    bucket_name = "jataware-world-modelers"
    url = "http://dojo-test.com/indicators/"
    header = {"content-type": "application/json"}
    
    # Validate data and metadata
    ind_meta = validate_metadata(metadata_path)
    validate_indicator(indicator_path)
    
    # Store data to S3
    print("Uploading dataset to S3...")
    session = boto3.Session(aws_access_key_id=s3_accessKey, aws_secret_access_key=s3_secretKey)
    s3 = session.resource("s3")
    s3_client = session.client("s3")   
    s3_key = f"indicators/causemosified/{ind_meta['id']}.parquet.gzip"
    s3_client.upload_file(indicator_path, 
                          bucket_name, 
                          s3_key,
                          ExtraArgs={'ACL':'public-read'}) # here we make the file public    
    s3_url = f"https://jataware-world-modelers.s3.amazonaws.com/indicators/causemosified/{ind_meta['id']}.parquet.gzip"
    ind_meta["data_paths"] = [s3_url]
    print(f"Uploaded to {s3_url}")
    
    # Register metadata
    print("Registering to Dojo...")
    response = requests.post(url, data=json.dumps(ind_meta), headers=header, auth=auth=HTTPBasicAuth(dojo_username, dojo_password))
    print(response.text)

In [6]:
register_indicator('example.parquet.gzip','example.json')

Uploading dataset to S3...
Uploaded to https://jataware-world-modelers.s3.amazonaws.com/indicators/causemosified/ETH-CENSUS.parquet.gzip
Registering to Dojo...
Created indicator with id = ETH-CENSUS
