Skip to content

Commit

Permalink
Example and test for ICPSR dimension with frequency counts.
Browse files Browse the repository at this point in the history
  • Loading branch information
agbeltran committed Mar 15, 2018
1 parent 04c62b4 commit e7d91e9
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 14 deletions.
75 changes: 62 additions & 13 deletions dats/dats_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,24 @@
DATS_schemasPath = os.path.join(os.path.dirname(__file__), "../json-schemas")
DATS_contextsPath = os.path.join(os.path.dirname(__file__), "../json-schemas/contexts")

def validate_dataset(path, filename, error_printing):
def validate_instance(path, filename, schema_filename, error_printing):
try:
dataset_schema_file = open(join(DATS_schemasPath,"dataset_schema.json"))
datasetSchema = json.load(dataset_schema_file)
resolver = RefResolver('file://'+DATS_schemasPath+'/'+"dataset_schema.json", datasetSchema) #, base_uri=schemasPath)
validator = Draft4Validator(datasetSchema, resolver=resolver)
logger.info("Validating %s", filename)
schema_file = open(join(DATS_schemasPath, schema_filename))
schema = json.load(schema_file)
resolver = RefResolver('file://' + DATS_schemasPath + '/' + schema_filename, schema) # , base_uri=schemasPath)
validator = Draft4Validator(schema, resolver=resolver)
logger.info("Validating %s against %s ", filename, schema_filename)

try:
dataset_file = open(join(path,filename))
instance = json.load(dataset_file)
instance_file = open(join(path, filename))
instance = json.load(instance_file)

if (error_printing == 0):
errors = sorted(validator.iter_errors(instance), key=lambda e: e.path)
for error in errors:
print(error.message)
print(error.message)

if (len(errors)==0):
if (len(errors) == 0):
return True
else:
return False
Expand All @@ -40,22 +40,71 @@ def validate_dataset(path, filename, error_printing):
for suberror in sorted(error.context, key=lambda e: e.schema_path):
print(list(suberror.schema_path), suberror.message, sep=", ")

if (len(errors)==0):
if (len(errors) == 0):
logger.info("...done")
return True
else:
return False
else:
try:
validator.validate(instance, datasetSchema)
validator.validate(instance, schema)
logger.info("...done")
return True
except Exception as e:
logger.error(e)
return False
finally:
dataset_file.close()
instance_file.close()
finally:
schema_file.close()


def validate_dataset(path, filename, error_printing):
#validate_instance(path, filename, "dataset_schema.json", error_printing)
try:
dataset_schema_file = open(join(DATS_schemasPath, "dataset_schema.json"))
datasetSchema = json.load(dataset_schema_file)
resolver = RefResolver('file://' + DATS_schemasPath + '/' + "dataset_schema.json",
datasetSchema) # , base_uri=schemasPath)
validator = Draft4Validator(datasetSchema, resolver=resolver)
logger.info("Validating %s", filename)

try:
dataset_file = open(join(path, filename))
instance = json.load(dataset_file)

if (error_printing == 0):
errors = sorted(validator.iter_errors(instance), key=lambda e: e.path)
for error in errors:
print(error.message)

if (len(errors) == 0):
return True
else:
return False

elif (error_printing == 1):
errors = sorted(validator.iter_errors(instance), key=lambda e: e.path)
for error in errors:
for suberror in sorted(error.context, key=lambda e: e.schema_path):
print(list(suberror.schema_path), suberror.message, sep=", ")

if (len(errors) == 0):
logger.info("...done")
return True
else:
return False
else:
try:
validator.validate(instance, datasetSchema)
logger.info("...done")
return True
except Exception as e:
logger.error(e)
return False
finally:
dataset_file.close()
finally:
dataset_schema_file.close()


Expand Down
56 changes: 56 additions & 0 deletions json-instances/ICPSR-Dimensions/Dimension-33581-0001-AQ5.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{
"identifier": {
"identifier": "AQ5",
"identifierSource": ""
},
"name": {
"valueIRI": "",
"value": "Current marital status"
},
"values": [
{
"value" :"1",
"label": "Married",
"unweightedFrequency": 35,
"%": 6.2
},
{
"value" :"2",
"label": "Widowed",
"unweightedFrequency": 11,
"%": 2.0
},
{
"value" :"3",
"label": "Separated",
"unweightedFrequency": 56,
"%": 9.9
},
{
"value" :"3",
"label": "Divorced",
"unweightedFrequency": 82,
"%": 14.6
},
{
"value" :"4",
"label": "Separated",
"unweightedFrequency": 56,
"%": 9.9
},
{
"value" :"5",
"label": "Never married",
"unweightedFrequency": 379,
"%": 67.3
}
],
"extraProperties": [
{
"category": "landingPage",
"values": [
"http://www.icpsr.umich.edu/icpsrweb/ICPSR/ssvd/studies/33581/datasets/0001/variables/AQ5"
]
}
]
}
6 changes: 5 additions & 1 deletion tests/test_dats_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_validate_dats_contexts(self):
self.assertTrue(dats_model.validate_dats_contexts())

def test_validate_dataset_1(self):
self.assertTrue(dats_model.validate_dataset(self.path, "SBGrid-179.json", 1))
self.assertTrue(dats_model.validate_dataset(self.path, "SBGrid-179.json", 0))

def test_validate_dataset_2(self):
self.assertTrue(dats_model.validate_dataset(self.path, "ClinicalTrials.gov-NCT00001372.json", 1))
Expand Down Expand Up @@ -47,6 +47,10 @@ def test_validate_dataset_8(self):
def test_validate_dataset_9(self):
self.assertTrue(dats_model.validate_dataset(self.path, "NYU-10040-dats.json", 1))

def test_validate_instance(self):
folder_path = os.path.join(self.path, "ICPSR-Dimensions")
self.assertTrue(dats_model.validate_instance(folder_path, "Dimension-33581-0001-AQ5.json", "dimension_schema.json", 0) )




Expand Down

0 comments on commit e7d91e9

Please sign in to comment.