In [183]:
# Step 1: Read the Fermi LAT notice text
import requests
from bs4 import BeautifulSoup
import urllib.request

# Step 2: Create the json example
import re
import json

# Step 3: Create the json schema
import genson

# Step 4: Validate example against schema
import jsonschema

In [184]:
## Step 1: Read Fermi LAT GCN notice from the url, ignore the first line with "////..."
# To read text from a URL using Python, requests module is used.
url = 'https://gcn.gsfc.nasa.gov/fermi_lat_mon_trans.html'

html = requests.get(url).text
soup = BeautifulSoup(html, 'html.parser')

table = soup.find('table')
rows = table.find_all('tr')

ref_nums = []
full_url = []
for row in rows:
    cols = row.find_all('a')
    if cols:
        ref_number = cols[0].text.strip()
        ref_nums.append(ref_number)
        
        # URL of each Fermi LAT notice
        full_url.append("https://gcn.gsfc.nasa.gov/other/"+str(ref_number)+"_fermi.txt")
    
        
url_ = full_url[0]

match_slash = []
with urllib.request.urlopen(url_) as f:
    text = f.read().decode('utf-8')
    lines = text.split("\n")
    filtered_lines = [line for line in lines if line != "//////////////////////////////////////////////////////////////////////"]
    new_text = "\n".join(filtered_lines)

print(new_text)

TITLE:           GCN/FERMI NOTICE
NOTICE_DATE:     Thu 23 Feb 23 15:49:32 UT
NOTICE_TYPE:     Fermi-LAT Monitor
SOURCE_OBJ:      BLLac_86400.png
REF_NUM:         1677167372
RA:              330.680d {+22h 02m 43s} (J2000),
                 330.919d {+22h 03m 41s} (current),
                 330.164d {+22h 00m 39s} (1950)
DEC:             +42.278d {+42d 16' 40"} (J2000),
                 +42.390d {+42d 23' 25"} (current),
                 +42.036d {+42d 02' 09"} (1950)
CURR_FLUX:       1.70e-06 +- 6.50e-08 [ph/cm2/sec]
BASE_FLUX:       6.99e-07 +- 1.59e-07 [ph/cm2/sec]
SIGNIFICANCE:    5.59 [sigma]
TIME_SCALE:      0  {0=1day, 1=1week}
ENERGY_BAND:     0.1 - 300.0 [GeV]
OUTBURST_DATE:   19997 TJD;    53 DOY;   23/02/22 (yy/mm/dd)
OUTBURST_TIME:   43200.00 SOD {12:00:00.00} UT
SOLN_STATUS:     0x0
LC_URL:          http://fermi.gsfc.nasa.gov/FTP/glast/data/lat/catalogs/asp/current/lightcurves/BLLac_86400.png
SUN_POSTN:       336.62d {+22h 26m 28s}   -9.76d {-09d 45' 42"}
SUN_DIST:        

In [185]:
## Step 2: Convert the Fermi LAT GCN notice into json example file; fermi_lat_example.json
# extract key-value pairs using regex
pattern = r'([A-Z_]+):\s+(.*)'
matches = re.findall(pattern, new_text)

# create dictionary
d = {}
for match in matches:
    key = match[0]
    value = match[1]
    d[key] = value

# convert dictionary to JSON and write to file
with open('fermi_lat_example.json', 'w') as f:
    json.dump(d, f, indent=4)
    
# load the JSON example file
with open('fermi_lat_example.json') as f:
    example_data = json.load(f)

print(example_data)
print('\n \n')

## Step 3: Generating json schema using genson, for the json example file
# Create a Genson schema object
schema = genson.Schema()

# Add the data to the schema object
schema.add_object(example_data)

# Add $id and $schema properties to the schema object
schema.add_schema({"$id": "http://example.com/schema", "$schema": "http://json-schema.org/draft-07/schema#"})

# Generate the JSON schema
json_schema = schema.to_json(indent=4)

# Write the JSON schema to a file
with open("fermi_lat_schema.json", "w") as f:
    json.dump(json.loads(json_schema), f, indent=4)
    
# load the JSON schema file
with open('fermi_lat_schema.json') as f:
    schema_data = json.load(f)

print(schema_data)

{'TITLE': 'GCN/FERMI NOTICE', 'NOTICE_DATE': 'Thu 23 Feb 23 15:49:32 UT', 'NOTICE_TYPE': 'Fermi-LAT Monitor', 'SOURCE_OBJ': 'BLLac_86400.png', 'REF_NUM': '1677167372', 'RA': '330.680d {+22h 02m 43s} (J2000),', 'DEC': '+42.278d {+42d 16\' 40"} (J2000),', 'CURR_FLUX': '1.70e-06 +- 6.50e-08 [ph/cm2/sec]', 'BASE_FLUX': '6.99e-07 +- 1.59e-07 [ph/cm2/sec]', 'SIGNIFICANCE': '5.59 [sigma]', 'TIME_SCALE': '0  {0=1day, 1=1week}', 'ENERGY_BAND': '0.1 - 300.0 [GeV]', 'OUTBURST_DATE': '19997 TJD;    53 DOY;   23/02/22 (yy/mm/dd)', 'OUTBURST_TIME': '43200.00 SOD {12:00:00.00} UT', 'SOLN_STATUS': '0x0', 'LC_URL': 'http://fermi.gsfc.nasa.gov/FTP/glast/data/lat/catalogs/asp/current/lightcurves/BLLac_86400.png', 'SUN_POSTN': '336.62d {+22h 26m 28s}   -9.76d {-09d 45\' 42"}', 'SUN_DIST': '52.41 [deg]   Sun_angle= 0.4 [hr] (West of Sun)', 'MOON_POSTN': '19.50d {+01h 17m 59s}   +6.71d {+06d 42\' 18"}', 'MOON_DIST': '55.67 [deg]', 'MOON_ILLUM': '15 [%]', 'GAL_COORDS': '92.59,-10.44 [deg] galactic lon,lat of

In [186]:
## Step 4: validate the schema example against the json schema
from jsonschema.validators import Draft7Validator

def validate_json(json_file_path, schema_file_path):
    with open(schema_file_path) as f:
        schema = json.load(f)

    with open(json_file_path) as f:
        json_data = json.load(f)

    # Create a JSON schema validator from the schema data
    schema_validator = Draft7Validator(schema_data)
    
    # Validate the example data against the schema
    try:
        schema_validator.validate(example_data)
        print("JSON example data is valid against the schema!")
    except jsonschema.exceptions.ValidationError as e:
        print("JSON example data is not valid against the schema:")
        print(e)

if __name__ == "__main__":
    json_file_path = "fermi_lat_example.json"
    schema_file_path = "fermi_lat_schema.json"
    validate_json(json_file_path, schema_file_path)



JSON example data is valid against the schema!
