# Publish Report
* * * 

In [None]:
# Get the data to be processed and get overall stats
# If using mybinder you cannot query the dev or test endpoints (change BASE_URL to dev or test tiers)
import requests
import json
from datetime import date
import pandas as pd
from pprint import pprint

# Get publish report for a WAF
# public endpoint does not require auth

BASE_URL = "https://data.noaa.gov/waf"

filename = "PublishReport.txt"  # leave PublishReport.txt if you want to grab a file and save manually from a waf
                                # set to '' if you want to be prompted or use API Primarly; ignore if using API
record_group = 'NOAA/NESDIS/ncei/oer/'  # set to '' if you wanted to be promoted to select;
                                        # set to a your record group/waf name if you have only one WAF
                                        # ignored if you use file 

request_type = "file"   # file retrieves the file named in the parameter file name - ignores variable record group; 
                        # api will find a record group waf at data.nooa.gov/waf/ - ignores variable filename

# will prompt if you set request_type to ''
if not request_type:
    request_type = input("Enter 'file' to manually download a file to disk as PublishReport.txt. Or, enter API to query API")

# # Testing a file with more variety -- notebook not fully functional until OSIM is on PROD
# filename = "PublishReport.txt"
# record_group = ''

if request_type == "file":
    filename = "PublishReport.txt"
else:
    if not record_group:
        record_group = input("Enter d record group name")
        filename = record_group

WAF_URL = f"{BASE_URL}/{record_group}/iso/reports/PublishReport.txt"
date = date.today()

def process_file(resp):
    log_dict = {}
    # Do I really need registery and search -- why are there two, will one show up and the other not if there is a problem ?
    osim_registery = "OSIM Details: https://data.dev.ncei.noaa.gov/onestop/api/registry/metadata/collection/comet/"
    osim_search = "OSIM Details: https://data.dev.ncei.noaa.gov/onestop/collections/details/"
    error_uuid = "...Register Lookup Error 204:uuid already in use "
    error_osim = "...OSIM registry web service is down..."


    for line in resp:
        # Do "OSIM Details" if you want both messages for now just doing osim_search
        if line.startswith(osim_search):
            search_term =  osim_registery if line.startswith(osim_registery) else osim_search
            process_type =  "Register" if line.startswith(osim_registery) else "Search"

            log_item = (line.strip('\n').split(search_term).pop(1)).split("    ")
            if log_dict:
                yield log_dict
            log_dict = {"process_type":process_type,
                            "uuid":log_item[0],
                            "filename":log_item[1],
                            "message": "success"}
        elif line.startswith("Error"):
            search_term =  error_uuid if line.find(error_uuid) > -1  else error_osim
            process_type =  "UUID already in use" if line.find(error_uuid) > -1 else "Error OSIM"
            osim_error = "Bad UUID"
        
            if process_type == "Error OSIM":
                osim_error = "Service Unavailable" if line.find("503 Service Unavailable") > -1 else "500 Server Error"
            log_item = (line.strip('\n')).strip("Error processing ").split(search_term)
            if log_dict:
                yield log_dict
                
                log_dict = {"process_type":process_type,
                                "uuid": "" if osim_error != "Bad UUID" else log_item[1],
                                "filename":log_item[0],
                                "message": osim_error}

    yield log_dict

def getPulishData (request_type):
    # Retreives file from URL -- test with local file below for now
    # print(WAF_URL)
    # resp = requests.get(WAF_URL, allow_redirects=True)
    # if resp.status_code == 200:
    #    with open(filename, 'wb') as f:
    #        f.write(resp.content)
    
    with open(filename) as f:
        publish_report = list(process_file(f))

    return publish_report

raw_data = list(getPulishData(request_type));
publish_results = pd.DataFrame.from_dict(raw_data)
publish_results.set_index(publish_results.columns[-2], inplace=True)
publish_results.sort_values(by='filename',ascending=True,inplace=True)
print("Total Records Processed:", len(publish_results))
print("____________________________________________")
stats = publish_results.groupby(['message'], sort=False).size().reset_index(name='Count')
print(stats.to_string())

## Bad UUIDs
* * *

In [None]:
# BAD UUIDS - Did not register because UUIDs already exist
bad_uuid = publish_results.query('message == "Bad UUID"')

styles = [
    dict(selector='th', props=[('text-align', 'left')]),
    dict(selector='td', props=[('text-align', 'left')]),
    dict(selector='td:nth-child(2)', props=[('white-space', 'nowrap')]),
]

print("Total Bad UUIDs", len(bad_uuid))
print("____________________________________________")

html = (bad_uuid.style.set_table_styles(styles))
display(html)

## 500 Server Error -- OSIM unknown error
* * *

In [None]:
# OSIM 500 Server Error Unknown Error
server_error = publish_results.query('message == "500 Server Error"')

styles = [
    dict(selector='th', props=[('text-align', 'left')]),
    dict(selector='td', props=[('text-align', 'left')]),
    dict(selector='td:nth-child(2)', props=[('white-space', 'nowrap')]),
]

print("Total 500 Server Error:", len(server_error))
print("____________________________________________")

html = (server_error.style.set_table_styles(styles))
display(html)

## OSIM Service Unavailable
* * *

In [None]:
# OSIM Service Unavailable
server_down = publish_results.query('message == "Service Unavailable"')

styles = [
    dict(selector='th', props=[('text-align', 'left')]),
    dict(selector='td', props=[('text-align', 'left')]),
    dict(selector='td:nth-child(2)', props=[('white-space', 'nowrap')]),
]

print("Total OSIM Service Unavailable:", len(server_down))
print("____________________________________________")

html = (server_down.style.set_table_styles(styles))
display(html)

## Success
* * *

In [None]:
# Success Results
success = bad_uuid = publish_results.query('message == "success"')

styles = [
    dict(selector='th', props=[('text-align', 'left')]),
    dict(selector='td', props=[('text-align', 'left')]),
    dict(selector='td:nth-child(2)', props=[('white-space', 'nowrap')]),
]

print("Total Success:", len(success))
print("____________________________________________")

html = (success.style.set_table_styles(styles))
display(html)