# Generate one schema automatically from one example

In [1]:
# example data has been anonymized
data = {"@context": {
  "mls": "http://www.w3.org/ns/mls#",
  "dct": "http://purl.org/dc/terms/",
  "schema": "http://schema.org/",
  "iot": "http://iotschema.org/",
  "sosa" : "http://www.w3.org/ns/sosa",
  "ssn" : "http://www.w3.org/ns/ssn/",
  "td": "http://www.w3.org/ns/td#",
  "adiiot" : "http://www.amlog.com/schema/adiiot#"
  },

  "schema:description" : "Sensing/Audio & inference(Anomoly detection & recognition for an electric motor)",
  "dct:license" : "Company/Restricted",
  
  "sosa:Platform": {
	"td:id": "urn:uuid1:ccd39f72-0000-0000-0000-000000000000",
	"iot:Microphone" : "urn:uuid5:5c5ef83f-0000-0000-0000-000000000000"
	},
  
  "mls:Dataset" : {
	   
    "schema:AudioObject" : {   
	   "@type": "AudioObject",
	   "@id" : "5c5ef83f-8c49-0000-0000-0000-000000000000_00000000",
	   "dct:creator" : "5c5ef83f-8c49-0000-0000-0000-000000000000",
	   "schema:description" : "Audio data captured using Microphone, in LPCM/16-bit encoding",
		 
	   "mls:DataCharacteristic" : "audio/L16;width=16bit;channel=1;rate=44100",
	   "schema:encodingFormat": "audio/wav",
	   "schema:contentUrl" : "http://some_url/data/0000-0000-0000-000000000000.wav"
      }
	},
		
	"adiiot:inferenceData" : {
	  "mls:Dataset" : "5c5ef83f-0000-0000-000000000000_00000000",
	  "dct:source" : "5c5ef83f-0000-0000-000000000000_00000000",
	  "mls:Run" : "URN:UUID4:734a4667-0000-0000-000000000000_00000000",
	  "mls:ModelCharacteristic" : "classical",
	  
	"sosa:Result":[
	  { 
	   "@type" : "Featurizer vectors",
	   "dct:description" : "feature vectors generated by Model",
	   "dct:publisher" : "account_dev",
       "schema:dateCreated" : "2021-08-02T20:31",
	   "dc:accessRights" : "Company",
	   
	   "adiiot:numberOfFeaturesVectors" : "32",
  	   "adiiot:FeaturesVectors" : [
                138.964,
                290.935,
                702.526,
                762.162,
                936.691,
                985.553,
                1528.44,
                969.775,
                1078.67,
                998.349,
                1119.36,
                1510.06,
                1006.12,
                953.424,
                989.735,
                986.928,
                1144.12,
                1016.29,
                1000.56,
                998.099,
                1046.79,
                1025.35,
                1007.77,
                1039.77,
                1076.97,
                1238.56,
                1055.45,
                1033.42,
                1125.65,
                1288.81,
                1366.08,
                95841.9
        ]
    },
	
	{
	"dct:description" : "outlier model output record",
	"dct:source" : "5c5ef83f-0000-0000-000000000000_00000000",	 
    "dct:publisher" : "account_dev",		 
	
	"mls:Model" : "OutlierModel",
	"@id" : "5e3868e00000000000000",
	"mls:Implementation" : "5c1a00000000000000000",
      "mls:Run" : "URN:UUID4:0000000000-0000-0000-000000000000000",
	"adiiot:distance" : 204311,
	"adiiot:closest_centroid" : 82,
	"adiiot:outlier_score" : 90.9653,
	"adiiot:centroid_rareness" : 4.26592,
	"adiiot:is_outlier" : 1
	},
	{
	  "dct:description" : "category recognition model output",
	  "dct:source" : "0000000-0000-0000-000000000000000", 
	  "dct:publisher" : "account_dev",		
	  "@id" : "5e3868eb00000000000000",		
	  
	  "mls:Model" : "CentroidSmoothing",        
      "mls:Run" : "URN:UUID4:734a4667-0000-0000-000000000000000",
	  "adiiot:categoryTrigger" : {
        "adiiot:name" : "1",
        "adiiot:probability" : 1
      }
      
      }
      ]
 }
}

In [2]:
# create a json file from that example
import json
with open('data.txt', 'w') as f:
  json.dump(data, f, ensure_ascii=False)

In [3]:
# install genson: pip install genson
from genson import SchemaBuilder

builder = SchemaBuilder()
filename = 'data.txt'
with open(filename, 'r') as f:
    datastore = json.load(f)
    builder.add_object(datastore )

# create a json schema from the example file
my_schema = builder.to_schema()

## View the schema generated automatically

In [4]:
my_schema

{'$schema': 'http://json-schema.org/schema#',
 'type': 'object',
 'properties': {'@context': {'type': 'object',
   'properties': {'mls': {'type': 'string'},
    'dct': {'type': 'string'},
    'schema': {'type': 'string'},
    'iot': {'type': 'string'},
    'sosa': {'type': 'string'},
    'ssn': {'type': 'string'},
    'td': {'type': 'string'},
    'adiiot': {'type': 'string'}},
   'required': ['adiiot', 'dct', 'iot', 'mls', 'schema', 'sosa', 'ssn', 'td']},
  'schema:description': {'type': 'string'},
  'dct:license': {'type': 'string'},
  'sosa:Platform': {'type': 'object',
   'properties': {'td:id': {'type': 'string'},
    'iot:Microphone': {'type': 'string'}},
   'required': ['iot:Microphone', 'td:id']},
  'mls:Dataset': {'type': 'object',
   'properties': {'schema:AudioObject': {'type': 'object',
     'properties': {'@type': {'type': 'string'},
      '@id': {'type': 'string'},
      'dct:creator': {'type': 'string'},
      'schema:description': {'type': 'string'},
      'mls:DataChar

## Generate data automatically from a schema

In [5]:
# install jsf: pip install jsf
from jsf import JSF

In [6]:
faker = JSF(my_schema)
faker.generate()

{'@context': {'mls': 'dolor sit magnam, repellendus nobis quas libero',
  'dct': 'amet exercitationem ipsum accusantium placeat sit',
  'schema': 'accusantium exercitationem accusantium ipsum esse',
  'iot': 'culpa! sit repellendus consectetur illum nobis',
  'sosa': 'architecto modi adipisicing esse possimus ipsum,',
  'ssn': 'possimus culpa! amet illum odit illum modi',
  'td': 'exercitationem ipsum, placeat ipsum esse magnam,',
  'adiiot': 'architecto modi quas architecto'},
 'schema:description': 'reiciendis odit reiciendis exercitationem',
 'dct:license': 'esse quas magnam, amet Hic modi officiis placeat',
 'sosa:Platform': {'td:id': 'possimus officiis possimus magnam, exercitationem',
  'iot:Microphone': 'odit repellendus magnam, modi modi veniam placeat'},
 'mls:Dataset': {'schema:AudioObject': {'@type': 'molestias, magnam, consectetur repellendus sit',
   '@id': 'illum odit libero sit ipsum elit. Lorem',
   'dct:creator': 'placeat ipsum nobis consectetur amet consectetur',
   '

# Inject the data to mongodb

In [7]:
from pymongo import MongoClient
client = MongoClient()

In [8]:
db = client.test_database

In [9]:
collection = db.test_collection

In [10]:
collection

Collection(Database(MongoClient(host=['localhost:27017'], document_class=dict, tz_aware=False, connect=True), 'test_database'), 'test_collection')

In [11]:
outputs = db.outputs
output_id = outputs.insert_one(data).inserted_id
output_id

ObjectId('6176b747b8da892bdc94a36f')

In [12]:
db.list_collection_names()

['outputs']

In [13]:
import pprint
pprint.pprint(outputs.find_one())

{'@context': {'adiiot': 'http://www.amlog.com/schema/adiiot#',
              'dct': 'http://purl.org/dc/terms/',
              'iot': 'http://iotschema.org/',
              'mls': 'http://www.w3.org/ns/mls#',
              'schema': 'http://schema.org/',
              'sosa': 'http://www.w3.org/ns/sosa',
              'ssn': 'http://www.w3.org/ns/ssn/',
              'td': 'http://www.w3.org/ns/td#'},
 '_id': ObjectId('6176afcd72faed858c46f90a'),
 'adiiot:inferenceData': {'dct:source': '5c5ef83f-0000-0000-000000000000_00000000',
                          'mls:Dataset': '5c5ef83f-0000-0000-000000000000_00000000',
                          'mls:ModelCharacteristic': 'classical',
                          'mls:Run': 'URN:UUID4:734a4667-0000-0000-000000000000_00000000',
                          'sosa:Result': [{'@type': 'Featurizer vectors',
                                           'adiiot:FeaturesVectors': [138.964,
                                                                      

In [14]:
print(db.list_collection_names())

['outputs']


## Inject several

In [15]:
outputs_list = [faker.generate() for _ in range(3)]

In [16]:
result = outputs.insert_many(outputs_list)
result.inserted_ids

[ObjectId('6176b74cb8da892bdc94a370'),
 ObjectId('6176b74cb8da892bdc94a371'),
 ObjectId('6176b74cb8da892bdc94a372')]