In [None]:
import os
import requests
from pymongo import MongoClient
import pandas as pd
import random
from IPython.display import display

cde_pth =os.getcwd()
print("Code Path ::"+ cde_pth)

DAILYMED_URL = "https://dailymed.nlm.nih.gov/dailymed/services/v2"

DB_HOST = "localhost"

DB_PORT = 27017

DB_NAME ="dailymed"

NDC_COLXN ="spls"
NDC_COLXN_NEXT ="spls_next"


# Styling notebook
from IPython.core.display import HTML
# Styling notebook
def css_styling():
    styles = open( cde_pth + "/custom.css", "r").read()
    HTML(''.format(styles))
    #return HTML(styles)
css_styling()

HTML("""
<style>
.renderjson a              { text-decoration: none !important; }
.renderjson .disclosure    { color: crimson;
                             font-size: 100%; }
.renderjson .syntax        { color: grey; }
.renderjson .string        { color: red; }
.renderjson .number        { color: cyan; }
.renderjson .boolean       { color: plum; }
.renderjson .key           { color: lightblue; }
.renderjson .keyword       { color: lightgoldenrodyellow; }
.renderjson .object.syntax { color: lightseagreen; }
.renderjson .array.syntax  { color: lightsalmon; }
/* unvisited link */

.renderjson a:link {
  color: blue;
}

/* visited link */
.renderjson a:visited {
  color: green;
}

/* mouse over link */
.renderjson a:hover {
  color: hotpink;
}

/* selected link */
.renderjson a:active {
  color: blue;
}
</style>
""")
#----------------------------------------------------------    
# Class utilized for Display At Notebook
'''
    Class utilized for Display At Notebook
    https://stackoverflow.com/questions/18873066/pretty-json-formatting-in-ipython-notebook
    https://github.com/caldwell/renderjson
'''
import uuid
from IPython.display import display_javascript, display_html, display
import json

class RenderJSON(object):
    def __init__(self, json_data):
        if isinstance(json_data, dict):
            self.json_str = json.dumps(json_data)
        else:
            self.json_str = json
        self.uuid = str(uuid.uuid4())
        
    def _ipython_display_(self):        
        display_html('<div id="{}" style="height: 250px; width:100%;" class="renderjson"></div>'.format(self.uuid),
            raw=True
        )
        display_javascript("""
        require(["https://rawgit.com/caldwell/renderjson/master/renderjson.js"], function() {
          renderjson.set_icons("+", "-");
          renderjson.set_show_to_level(1);
          document.getElementById('%s').appendChild(renderjson(%s))
        });
        """ % (self.uuid, self.json_str), raw=True) 


import json
from datetime import datetime
from typing import Any

from bson import ObjectId


class MongoJSONEncoder(json.JSONEncoder):
    def default(self, o: Any) -> Any:
        if isinstance(o, ObjectId):
            return str(o)
        if isinstance(o, datetime):
            return str(o)
        return json.JSONEncoder.default(self, o)
    
class objdict(dict):
    def __getattr__(self, name):
        if name in self:
            return self[name]
        else:
            raise AttributeError("No such attribute: " + name)

    def __setattr__(self, name, value):
        self[name] = value

    def __delattr__(self, name):
        if name in self:
            del self[name]
        else:
            raise AttributeError("No such attribute: " + name)
            

class MongoDBClient:
    def __init__(self, db_hostname=None,db_port=None, db_name=None):
        if db_hostname is None:
            self.hostname =DB_HOST
        else:
            self.hostname = db_hostname
        if db_port is None:
            self.port = DB_PORT
        else:
            self.port = db_port
        if db_name is None:
            self.dbName = DB_NAME
        else:
            self.dbName = db_name
    
    def create_client(self):
        client = MongoClient(self.hostname,self.port)        
        self.client = client
        return client
    
    def get_list_databases(self):
        if self.client is None:
            self.client = self.create_client()
            
        return self.client.list_databases()
        
    def get_dailymed_db(self,dbName=None):
        client = self.create_client()
        db = None
        if dbName is None:
            db = client[self.dbName]
        else:
            db = client[dbName]
        return db
    
    def get_list_collection(self, dbName=None):
        if self.client is None:
            self.client = self.create_client()
            
        db = self.get_dailymed_db(dbName)
        clxns = None
        if db is not None:
            clxns = db.list_collection_names()
        return clxns
            
class BaseRequest:
    def __init__(self,api_url=None):
        self.api_url = api_url
    
    def get_request(self,api_url=None,path_suffix=None, params=None):
        if api_url is None:
            api_url = self.api_url
            
        if path_suffix is not None:
            api_url = api_url+"/" + path_suffix
        print("Fetching URL : "+api_url)
        r = requests.get(url = api_url, params = params)
        return r.json()
    


In [None]:
def testClasses():
    print("~~~~~~~~~~~ Test Run Classes START ~~~~~~~~~~~~~~~")
    br = BaseRequest(api_url=DAILYMED_URL)
    #print(br.api_url)
    respo = br.get_request(None,"spls.json",{})
   

    mClient = MongoDBClient()
    db = mClient.create_client()
    '''
    dbl = mClient.get_list_databases()
    for db in dbl:
        print(db)

    dbclxns = mClient.get_list_collection(DB_NAME)
    '''
    print(db[DB_NAME][NDC_COLXN])
    #dbclxns['spls'].insert_many(respo["data"])
    #db[DB_NAME][NDC_COLXN].insert_many(respo["data"])
    new_entry = MongoJSONEncoder().encode(respo["metadata"])
   
    if respo["metadata"]['previous_page'] == "null":
        respo["metadata"]['previous_page']= None
    if respo["metadata"]['previous_page_url'] == "null":
        respo["metadata"]['previous_page_url']= None
    #db[DB_NAME][NDC_COLXN_NEXT].insert_one(respo["metadata"])
    
    return respo
    print("~~~~~~~~~~~ Test Run Classes END ~~~~~~~~~~~~~~~")

respo = testClasses()
RenderJSON(respo)
df = pd.DataFrame.from_dict(respo["data"])
df.head(10)

In [None]:
from bs4 import BeautifulSoup
import json

#Load xml
xml_parser = BeautifulSoup(open('car.xml'), 'xml')

fspec = BeautifulSoup(open('fspc.json'), 'html.parser')
#fspec_json=json.loads(fspec.text)
jsn ={}
with open(cde_pth+"/fspc.json","r") as f:
    contents = f.read()
    jsn = json.loads(contents)
    #print(contents)
RenderJSON(jsn)


In [None]:
#print(xml_parser.prettify())

In [None]:
class JsonSchemaUtil:
    def __init__(self , file_location=None,file_name=None , schema_json=None):
        
        self.file_location = os.getcwd() if file_location is None  else file_location
        self.file_name = file_name
        
        self.schema_json = schema_json if schema_json is not None else self.load_json(self.file_location, self.file_name )
        print("------ JsonSchemaUtil initialized with")
        print(self.file_location)
        print(self.file_name)    
        #print(self.schema_json)    
        print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
        
    def load_json(self,file_location=None,file_name=None):
        jsn ={}
        with open(file_location+"/"+file_name,"r") as f:
            contents = f.read()
            jsn = json.loads(contents)
        return jsn
    
    def get_object_type(self,schema_json=None):
        if schema_json is None:
            schema_json = self.schema_json
        if "type" in schema_json and schema_json["type"]=="object":
            return "O"
        elif "type" in schema_json and schema_json["type"]=="array":
            return "A"
        elif "type" in schema_json and schema_json["type"]=="string":
            return "S"
        else:
            return None
    
    def get_object_attr_schema(self,obj=None):
        if obj is not None:
            if "properties" in obj:
                return obj["properties"]
            elif "items" in obj:
                return obj["items"]
            else:
                return obj
            
    def get_key_value(self,schema_json=None, key=None) :
        if schema_json is None:
            schema_json = self.schema_json
        if schema_json is not None and key in schema_json:
            return schema_json[key]
        else:
            return None
    
    def get_object_schema(self,schema_json=None, key=None):
        if schema_json is None:
            schema_json = self.schema_json
        
        val = self.get_key_value(schema_json,key)
        if val is not None:
            if self.get_object_type(val) == "O":                
                    return self.get_object_attr_schema(val)
            elif self.get_object_type(val) == "A":
                return self.get_object_attr_schema(val)
    
    def is_only_one_attr(self,obj=None):
        if obj is not None:
            if len(obj)==2 and "type" in obj:
                return True
            elif len(obj)==3 and "type" in obj and "required" in obj:
                return True
            else:
                return False
       
    def is_only_one_attr_not_obj(self,obj=None):
        return obj is not None and "type" in obj and obj["type"] not in ["object","array"]
            
    def is_array_node(self,schema_json=None):
        return self.get_object_type(schema_json)=="A"
    
    def is_object_node(self,schema_json=None):
        return self.get_object_type(schema_json)=="O"
    
    def is_single_node_no_child(self,schema_json=None):
        return self.is_only_one_attr_not_obj(schema_json)
    
tmp = {"text": {"type": "string"}}
ijs = JsonSchemaUtil(cde_pth,"fspc.json",None)
title1 = ijs.get_object_schema(None,"title")
print(title1)
title2 = ijs.get_object_schema(jsn,"component")
print("is_only_one_attr : ",ijs.is_only_one_attr(title2))
print("is_array_node : ",ijs.is_array_node(ijs.get_key_value(None,"component")))
print("is_object_node : ",ijs.is_object_node(ijs.get_key_value(None,"title")))
print("is_single_node_no_child : ",ijs.is_single_node_no_child(tmp["text"]))
print(ijs.is_only_one_attr_not_obj(tmp["text"]))


class XmlUtil:
    def __init__(self, file_location=None,file_name=None):
        self.bs_xml = None
        self.file_location = os.getcwd() if file_location is None  else file_location
        self.file_name = file_name
        
    def parse_xml_file(self,file_location=None,file_name=None):
        if file_location is None:
            file_location = self.file_location
        if file_name is None:
            file_name = self.file_name
            
        bs_xml = BeautifulSoup(open(file_location+'/'+file_name), 'xml')
        return bs_xml
    
    def get_xml_parsed(self,file_location=None,file_name=None):
        if self.bs_xml is None:
            self.bs_xml = self.parse_xml_file(file_location,file_name)
        return self.bs_xml
    
    def find_attribute_by_key(self,  key, bs_xml=None):
        found_bs = xml_parser.find(key)
        return found_bs
    
    def is_attribute_data_key(self,key):
        if len([ch for ch in ["_","__"] if ch in key])>0:
            return True
        else:
            return False
    
xmlutil = XmlUtil(None, "car.xml")

In [None]:
class SchemaToJsonSample:
    def __init__(self, file_location=None,
                 file_name=None, 
                 xml_file_location=None, 
                 xml_file_path=None, schema_json=None):
        self.schema_json = schema_json
        self.file_location = os.getcwd() if file_location is None  else file_location
        self.file_name = file_name
        
        self.json_util = JsonSchemaUtil(self.file_location,self.file_name,self.schema_json)
        
        self.schema_json = schema_json if schema_json is not None else self.json_util.schema_json
        
        self.xml_file_location = xml_file_location if xml_file_location is not None else os.getcwd()
        self.xml_file_path = xml_file_path
        
        self.xml_util = XmlUtil(xml_file_location,xml_file_path)
        
        print("------ JsonSchemaUtil initialized with")
        print(self.file_location)
        print(self.file_name)        
        print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
        
    def parse_json_schema(self, file_location=None,file_name=None):
        file_location = file_location if file_location is not None else self.file_location
        file_name = file_name if file_name is not None else self.file_name
        
        return self.json_util.load_json(file_location,file_location)
    
    def get_schema_root(self, schema_json = None):
        return self.json_util.get_object_attr_schema(schema_json)
        
    def is_object(self, schema_json = None):
        if schema_json is None:
            schema_json = self.schema_json
        return self.json_util.get_object_type(schema_json) == "O"
        
    def is_array(self, schema_json = None):
        if schema_json is None:
            schema_json = self.schema_json
        return self.json_util.get_object_type(schema_json) == "A"
    
    '''
        Take an object schema and iterate over its properties.
    '''
    def iterate_object_properties(self,value_json=None, schema_json=None, bs_xml = None, key=None):
        if value_json is None:
            value_json = {}
        if schema_json is None:
            schema_json = self.schema_json
            
        if key is not None:
            schema_json = schema_json[key]   
            if self.is_object(current_json):
                value_json[key] = {}
            elif self.is_array(current_json):
                value_json[key] =[]
            else:
                xml_tag = self.xml_util.find_attribute_by_key(key)
                
        for jkey in schema_json:
            current_json = json_root[jkey]
            if self.is_object(current_json):
                self.iterate_object_properties(current_json,jkey)
            elif self.is_array(current_json):
                
               
        
    
    def travel_schema(self,schema_json = None, sample_json = None):
        json_root = self.get_schema_root(schema_json)
        value_json = {}
        for jkey in json_root:
            print(jkey)
            if self.is_object(json_root[jkey]):
                print("is Object")
            elif self.is_array(json_root[jkey]):
                print("is Array")
            xml_tag = self.xml_util.find_attribute_by_key(jkey)
            if xml_tag:
                print(jkey , "exit ")
                
        return json_root
    
    def sample_json_from_schema(self,schema_json = None ):
        if schema_json is None:
            schema_json = self.schema_json
            
        sample_json = {}  
        sample_json = self.travel_schema(schema_json, sample_json)
        return sample_json
    
    
    
sjs = SchemaToJsonSample(cde_pth,"fspc.json",cde_pth,"car.xml",None)
sample_json = sjs.sample_json_from_schema(None)
RenderJSON(sample_json)

In [None]:
finalJ ={}
'''for key in jsn:
    print(key)
    if key == "id":        
        xp1 = xml_parser.find(key)
        if "type" in jsn[key]:
            if jsn[key]["type"] =="object":
                finalJ[key]={}
                for k1 in jsn[key]["properties"]:
                    if "_" in k1:
                        key1 = k1[1:]
                        print(":"+key1)
                        finalJ[key][key1] = xp1[key1]
print(finalJ)
'''
counter = 0 
def assign_dict_values(schema_json=None, xml_parse=None,outJson=None, counter=0):
    print("Running Loop ~~~~~~~~~~~~ "+str(counter))
    counter = counter + 1
    if schema_json is not None and xml_parse is not None:
        for schema_key in schema_json:
            print("Running for :: --->"+schema_key)       
            xp1 = xml_parser.find(schema_key)
            print(type(xp1))
            #print(xp1)
            if xp1 is not None and len(xp1.attrs) > 0:
                if "type" in jsn[schema_key] and schema_json[schema_key]["type"] == "object":
                    outJson[schema_key] = {}
                    for pkey in schema_json[schema_key]["properties"]:
                        rkey = pkey
                        if "__" in pkey:
                            rkey = pkey.replace("__" ,"")
                            #print(":1>"+rkey)
                        elif "_" in pkey:
                            rkey = pkey[1:]
                            #print("Going For :>"+rkey)
                        #print(xp1.attrs)
                        if len(xp1.attrs) == 0:
                            print(xp1.contents)
                            if rkey == "title":
                                outJson[schema_key][rkey] =xp1.contents

                        else:
                            outJson[schema_key][rkey] = xp1[rkey]

                elif "type" in jsn[schema_key] and schema_json[schema_key]["type"] == "array":
                    outJson[schema_key] =[]
            else:
                print("Empty Attr :"+schema_key)
                if schema_key == "title":
                    outJson[schema_key]={}
                    outJson[schema_key]["title"] =xp1.contents
                else:                    
                    outJson[schema_key]={}
                    #print(schema_json[schema_key])
                    print("000000000000 :"+ schema_key)
                    if "type" in schema_json[schema_key]:
                        if schema_json[schema_key]["type"] =="object":
                            if "properties" in schema_json[schema_key]:
                                for pkey in schema_json[schema_key]["properties"]:
                                    rkey = pkey
                                    if "__" in pkey:
                                        rkey = pkey.replace("__" ,"")
                                        #print(":1>"+rkey)
                                    elif "_" in pkey:
                                        rkey = pkey[1:]
                                        print("Going For 1 :>"+rkey)
                                    else:
                                        print(rkey)
                                    #print(xp1)
                                    print("-------")
                                    print(pkey)
                                    outJson[schema_key][rkey]={}
                                    assign_dict_values(schema_json[schema_key]["properties"][pkey], 
                                                       xp1,outJson[schema_key][rkey],counter)
                            else:
                                print(" DING DING")
                        elif schema_json[schema_key]["type"] =="array":
                            for pkey in schema_json[schema_key]["items"]:
                                rkey = pkey
                                if "__" in pkey:
                                    rkey = pkey.replace("__" ,"")
                                    #print(":1>"+rkey)
                                elif "_" in pkey:
                                    rkey = pkey[1:]
                                    print("Going For 2 :>"+rkey)
                                else:
                                    print(rkey)
                                print(xp1)
                                outJson[schema_key][rkey]={}
                                assign_dict_values(schema_json[schema_key]["properties"][pkey], 
                                                   xp1,outJson[schema_key][rkey],counter)
                        else:
                            print("FFF DING DING")
                                
                            

                    
                
    return outJson
    
    
value_json = assign_dict_values(jsn,xml_parser,finalJ)
print(value_json) 
#RenderJSON(value_json)

In [None]:
print(finalJ)