## Converting CSV of Karen's example into ShEx

Links
* https://github.com/dcmi/dcap/blob/master/painting/profile2Instance1.ipynb - this notebook
* https://github.com/dcmi/dcap/blob/master/painting/profile2Instance1.csv - Karen's schema

In [65]:
import csv
reader = csv.DictReader(open('profile2Instance1.csv', newline='', encoding='utf-8-sig'))
reader.fieldnames

['Entity_name',
 'Entity_label',
 'Property',
 'Property_label',
 'Cardinality',
 'Value',
 'Value_type',
 'Annotation']

In [66]:
all_rows = [dict(row) for row in reader]
all_rows

[{'Entity_name': 'book',
  'Entity_label': 'Book',
  'Property': 'dct:creator',
  'Property_label': 'Author',
  'Cardinality': '0..-1',
  'Value': 'person',
  'Value_type': 'entity',
  'Annotation': 'Author is not required; no limit on the number'},
 {'Entity_name': '',
  'Entity_label': '',
  'Property': 'dct:title',
  'Property_label': 'Title',
  'Cardinality': '1..1',
  'Value': '',
  'Value_type': 'literal',
  'Annotation': 'Each book must have a title'},
 {'Entity_name': '',
  'Entity_label': '',
  'Property': 'dct:date',
  'Property_label': 'Year of publication',
  'Cardinality': '1..1',
  'Value': '',
  'Value_type': 'xsd:year',
  'Annotation': 'Only the year, 9999'},
 {'Entity_name': 'person',
  'Entity_label': 'Person',
  'Property': 'foaf:Name',
  'Property_label': 'Name',
  'Cardinality': '1..1',
  'Value': '',
  'Value_type': 'literal',
  'Annotation': 'Each person has one name'},
 {'Entity_name': '',
  'Entity_label': '',
  'Property': 'foaf:mbox',
  'Property_label': 'Ema

In [67]:
prefixes = """PREFIX dct: <http://purl.org/dc/terms/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>"""

In [68]:
end_matter = """}"""

In [69]:
[ row['Value_type'] for row in all_rows ]

['entity', 'literal', 'xsd:year', 'literal', 'URI', 'xsd:year']

In [70]:
set([ row['Cardinality'] for row in all_rows ])

{'0..-1', '0..1', '1..1'}

In [71]:
import pandas as pd
pd.read_csv('profile2Instance1.csv')

Unnamed: 0,Entity_name,Entity_label,Property,Property_label,Cardinality,Value,Value_type,Annotation
0,book,Book,dct:creator,Author,0..-1,person,entity,Author is not required; no limit on the number
1,,,dct:title,Title,1..1,,literal,Each book must have a title
2,,,dct:date,Year of publication,1..1,,xsd:year,"Only the year, 9999"
3,person,Person,foaf:Name,Name,1..1,,literal,Each person has one name
4,,,foaf:mbox,Email,0..1,,URI,Email is optional but only one allowed
5,,,dct:date,Birth year,0..1,,xsd:year,"Only the year, 9999"


In [72]:
schema = []
start = ''
started = False
for row in all_rows:
    entity_name = row['Entity_name']
    property = row['Property']
    property_label = row['Property_label']
    cardinality = row['Cardinality']
    value_type = row['Value_type'] # Node Constraint
    value = row['Value'] # Node Constraint
    annotation = row['Annotation']
    vtype = ''
    card = ''
    if not start:
        if entity_name:
            start_node = entity_name
            start = f"""start = @<{start_node}>"""
            schema.append(f"{start}\n\n")
    if entity_name:
        if started:
            schema.append("}\n\n")
        shape_name = f"<{entity_name}>" + " {\n"
        schema.append(shape_name)
        started = True
    if property_label:
        schema.append(f"    # {repr(property_label)} {annotation}\n")
    if cardinality == "1..1":
        card = ''
    elif cardinality == "0..1":
        card = '?'
    elif cardinality == "0..-1":
        card = '*'
    if value_type == "xsd:year":
        vtype = " xsd:year"
        schema.append(f"    {property}{vtype}{card} ;\n")
    elif value_type == "literal":
        value = "xsd:string"
        schema.append(f"    {property} {value} {card} ;\n")
    elif value_type == "entity":
        value = f"@<{value}>"
        schema.append(f"    {property} {value}{card} ;\n")
    elif not value:
        schema.append(f"    {property} . {card} ;\n")
    elif value:
        schema.append(f"    {property} [{value}] {card} ;\n")

print(prefixes, "\n")
for line in schema:
    print(line, end="")
print("\n", end_matter)

PREFIX dct: <http://purl.org/dc/terms/>
PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/> 

start = @<book>

<book> {
    # 'Author' Author is not required; no limit on the number
    dct:creator @<person>* ;
    # 'Title' Each book must have a title
    dct:title xsd:string  ;
    # 'Year of publication' Only the year, 9999
    dct:date xsd:year ;
}

<person> {
    # 'Name' Each person has one name
    foaf:Name xsd:string  ;
    # 'Email' Email is optional but only one allowed
    foaf:mbox . ? ;
    # 'Birth year' Only the year, 9999
    dct:date xsd:year? ;

 }
