## Set working directory for imports

In [1]:
import json
import requests
import os

#Set working directory to src on your machine
os.chdir('/Users/justinniestroy-admin/Documents/Round2/Fairscape_Repos/mds_python/src')


## Setup jinja env with function to autolabl links

In [4]:
from jinja2 import Environment, FileSystemLoader
from pydantic import BaseModel, Field
from typing import List, Optional
from rdflib import Graph, URIRef, Literal, Namespace
from rdflib.namespace import RDF, XSD

from fairscape_mds.mds.models.user import User
from fairscape_mds.mds.models.fairscape_base import *

# Set up the Jinja2 environment and load the template
env = Environment(loader=FileSystemLoader('../'))

import re


FAIRSCAPE_URL = '"https://fairscape.pods.uvarc.io/'
def add_link(value):
    """For values that match ark or look like urls add a hyperlink"""
    url_pattern = r'^(http|https)://[^\s]+'
    if re.match(IdentifierPattern, value):
        return f'<a href={FAIRSCAPE_URL}{value}">{value}</a>'
    elif re.match(url_pattern, value):
        return f'<a href="{value}">{value}</a>'
    return value

env.filters['add_link'] = add_link

def convert_to_rdf(json_data):
    # Sample dataset in JSON format

    # Convert JSON string to Python dictionary
    data = json.loads(json_data)

    # Create an RDF graph
    g = Graph()

    # Define a namespace
    ex = Namespace("https://w3id.org/EVI#")

    # Convert JSON data to RDF triples
    for key, value in data.items():
        subject = URIRef(ex["dataset"])
        predicate = URIRef(ex[key])
        if isinstance(value, str):
            # Simple conversion of string values to literals
            obj = Literal(value, datatype=XSD.string)
        elif isinstance(value, int):
            # Example of converting integer values to literals
            obj = Literal(value, datatype=XSD.integer)
        else:
            # Add more conditions for other data types as needed
            obj = Literal(str(value), datatype=XSD.string)
        g.add((subject, predicate, obj))

    # Serialize the graph to RDF/XML and Turtle formats
    rdf_xml_data = g.serialize(format='application/rdf+xml')
    turtle_data = g.serialize(format='turtle')

    return rdf_xml_data, turtle_data



## User Template

In [5]:
# Create an instance of the User class with some data
user = User(
    guid = "ark:99999/fake-user",
    name = 'fake user',
    email="user@example.com",
    password="password123",
    organizations=["Org1", "Org2"],
    projects=["Project1", "Project2"],
    datasets=['ark:99999/fake-dataset', "Dataset2"],
    rocrates=["ROCrate1", "ROCrate2"],
    software=['ark:99999/fake-software', "Software2"],
    computations=["Computation1", "Computation2"]
)
json_data = json.dumps(user.dict(by_alias=True), default=str, indent=2)
rdf, turtle = convert_to_rdf(json_data)
template = env.get_template('./development notebooks/vizualization/user_template.html')

# Render the template with the user data
rendered_html = template.render(user=user, 
                                json = json_data,
                                rdf_xml = rdf,
                                turtle = turtle, 
                                type = 'User')

# Print or save the rendered HTML
with open('../development notebooks/vizualization/jinja_created/user_example.html', 'w') as f:
    f.write(rendered_html)

## Software Example

In [57]:
from fairscape_mds.mds.models.software import Software
sample_software = Software(
    guid = 'ark:99999/fake-software',
    name = 'Software Example',
    description = 'Fake Software to show template working.',
    owner="John Doe",
    distribution=["https://example.com/software/download", "https://example.com/software/mirror"],
    usedBy=['ark:99999/fake-software','ark:99999/fake-computation'],
    sourceOrganization='ark:99999/fake-org',
    includedInDataCatalog="ark:99999/fake-catalog"
)

json_data = json.dumps(sample_software.dict(by_alias=True), default=str, indent=2)
rdf, turtle = convert_to_rdf(json_data)
template = env.get_template('./development notebooks/vizualization/software_template.html')

# Render the template with the user data
rendered_html = template.render(software=sample_software, 
                                json = json_data,
                                rdf_xml = rdf,
                                turtle = turtle, 
                                type = 'Software')

# Print or save the rendered HTML
with open('../development notebooks/vizualization/jinja_created/software_example.html', 'w') as f:
    f.write(rendered_html)


## Dataset Example

In [58]:
from fairscape_mds.mds.models.dataset import Dataset
sample_dataset = Dataset(
    guid = 'ark:99999/fake-dataset',
    metadataType = 'EVI:Dataset',
    name = 'Fake Input Dataset Example',
    description = 'Fake Input Dataset to show template working.',
    owner="John Doe",
    author='Roy Kniestroy',
    distribution=["https://example.com/data/download", "https://example.com/data/mirror"],
    usedBy=['ark:99999/fake-software','ark:99999/fake-computation'],
    sourceOrganization='ark:99999/fake-org',
    includedInDataCatalog="ark:99999/fake-catalog"
)

json_data = json.dumps(sample_dataset.dict(by_alias=True), default=str, indent=2)
rdf, turtle = convert_to_rdf(json_data)

template = env.get_template('./development notebooks/vizualization/dataset_template.html')


# Render the template with the user data
rendered_html = template.render(
                dataset=sample_dataset,
                json = json_data,
                rdf_xml=rdf_xml_data,
                turtle=turtle_data,
                type='Dataset'
            )

# Print or save the rendered HTML
with open('../development notebooks/vizualization/jinja_created/dataset_example.html', 'w') as f:
    f.write(rendered_html)


## Computation Example

In [72]:
import datetime
from fairscape_mds.mds.models.computation import Computation
sample_compuation = Computation(
    guid = 'ark:99999/fake-compuattion',
    metadataType = 'evi:Computation',
    name = 'Fake Input Compuation Example',
    description = 'Fake Input Compuation to show template working.',
    owner="ark:99999/A-Person",
    author='Roy Kniestroy',
    usedDataset = 'ark:99999/fake-dataset',
    usedSoftware='ark:99999/fake-software',
    sourceOrganization='ark:99999/fake-org',    
    sourceProject='ark:99999/fake-project',
    generated = ['ark:99999/an-output'],
    command = "python3 test.py",
    container = 'Container',
    containerId = 'ark:99999/a-container',
    dateFinished = datetime.datetime(2023,1,1),
    dateCreated = datetime.datetime(2023,1,1)
)

json_data = json.dumps(sample_compuation.dict(by_alias=True), default=str, indent=2)
rdf, turtle = convert_to_rdf(json_data)

template = env.get_template('./development notebooks/vizualization/computation_template.html')


# Render the template with the user data
rendered_html = template.render(
                computation=sample_compuation,
                json = json_data,
                rdf_xml=rdf,
                turtle=turtle,
                type='Dataset'
            )

# Print or save the rendered HTML
with open('../development notebooks/vizualization/jinja_created/computation_example.html', 'w') as f:
    f.write(rendered_html)


## Evidence Graph Example

In [74]:
import datetime
from fairscape_mds.mds.models.evidencegraph import EvidenceGraph
sample_evidencegraph = EvidenceGraph(
    guid = 'ark:99999/fake-evidencegraph',
    metadataType = 'evi:Computation',
    name = 'Fake Input Compuation Example',
    description = 'Fake Input Compuation to show template working.',
    owner="ark:99999/A-Person"
)

json_data = json.dumps(sample_evidencegraph.dict(by_alias=True), default=str, indent=2)
rdf, turtle = convert_to_rdf(json_data)

template = env.get_template('./development notebooks/vizualization/evidencegraph_template.html')


# Render the template with the user data
rendered_html = template.render(
                evidencegraph=sample_evidencegraph,
                json = json_data,
                rdf_xml=rdf,
                turtle=turtle,
                type='Dataset'
            )

# Print or save the rendered HTML
with open('../development notebooks/vizualization/jinja_created/evidencegraph_example.html', 'w') as f:
    f.write(rendered_html)


## ROCrate Example

## Schema Example

In [7]:
schema = {
    "@context": {
        "@vocab": "https://schema.org/",
        "EVI": "https://w3,org/EVI#"
    },
    "@id": "ark:59852/schema-apms-music-embedding-izNjXSs",
    "@type": "evi:Schema",
    "name": "APMS Embedding Schema",    
    "description": "Tabular format for APMS music embeddings from PPI networks from the music pipeline from the B2AI Cellmaps for AI project",    
    "properties": {    
    "Experiment Identifier": {    
        "description": "Identifier for the APMS experiment responsible for generating the raw PPI used to create this embedding vector",    
        "index": 0,                                 
        "type": "string",    
        "pattern": "^APMS_[0-9]*$" 
    },                                 
    "Gene Symbol": {                                             
        "description": "Gene Symbol for the APMS bait protien",    
        "index": 1,    
        "valueURL": "http://edamontology.org/data_1026",    
        "type": "string",          
        "pattern": "^[A-Za-z0-9\-]*$"    
    },                                                                          
    "MUSIC APMS Embedding": {                                                                
        "description": "Embedding Vector values for genes determined by running node2vec on APMS PPI networks. Vector has 1024 values for each bait protien",    
        "index": "2::",                                                            
        "type": "array",    
        "maxItems": 1024,                               
        "minItems": 1024,                                    
        "uniqueItems": False,                                        
        "items": {
            "type": "number"
        }                                                              
    }},                        
    "type": "object",                                   
    "additionalProperties": True,                                               
    "required": ["Experiment Identifier", "Gene Symbol", "MUSIC APMS Embedding"],    
    "seperator": ",",                         
    "header": False,    
    "examples": []    
}

def validate_type(value):
    valid_types = {'integer', 'number', 'string', 'array','boolean'}
    if value is not None:
        if value not in valid_types:
            raise ValueError(f"Type must be one of {valid_types}")
    return value

class Item(BaseModel):
    type: str = Field(...)
    _validate_type = validator('type', allow_reuse=True)(validate_type)

class Property(BaseModel, extra = Extra.allow):
    description: str = Field(...)
    index: Union[str, int] = Field(...)
    type: str = Field(...)
    value_url: Optional[str] = Field(default = None, alias = 'value-url')
    pattern: Optional[str] = Field(default = None)
    items: Optional[Item] = Field(default = None, alias = 'items')
    min_items: Optional[int] = Field(default = None, alias = 'min-items')
    max_items: Optional[int] = Field(default = None, alias = 'max-items')
    unique_items: Optional[bool] = Field(default = None, alias = 'unique-items')

    @validator('index')
    def validate_index(cls, value):
        if isinstance(value, str):
            # Allow something like int::int for index. Raise error if else
            pattern = r'^\d+$|^-?\d+::|^-?\d+::-?\d+$|^::-?\d+'
            if not re.match(pattern, value):
                raise ValueError("Index must match the pattern 'int::int'")
        return value

    _validate_type = validator('type', allow_reuse=True)(validate_type)

    @validator('pattern')
    def validate_pattern(cls, value):
        if value is not None:
            try:
                re.compile(value)
            except re.error:
                raise ValueError("Pattern must be a valid regular expression")
        return value

class Schema(FairscapeBaseModel, extra=Extra.allow):
    context: dict = Field( 
        default= {"@vocab": "https://schema.org/", "evi": "https://w3id.org/EVI#"},
        alias="@context" 
    )
    metadataType: str = Field(alias="@type", default= "evi:Schema")
    properties: Dict[str, Property]
    type: Optional[str] = Field(default="object")
    additionalProperties: Optional[bool] = Field(default=True)
    required: Optional[List[str]] = []  
    separator: Optional[str] = Field(default=",")
    header: Optional[bool] = Field(default=True)
    examples: Optional[List[Dict]] = []  
    
sample_schema = Schema(**schema)

/var/folders/rm/9q6pfp8n2cgg5dnwfyctrmq40000gq/T/ipykernel_30694/1348593386.py:52: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.6/migration/
  _validate_type = validator('type', allow_reuse=True)(validate_type)
/var/folders/rm/9q6pfp8n2cgg5dnwfyctrmq40000gq/T/ipykernel_30694/1348593386.py:54: PydanticDeprecatedSince20: `pydantic.config.Extra` is deprecated, use literal values instead (e.g. `extra='allow'`). Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.6/migration/
  class Property(BaseModel, extra = Extra.allow):
/var/folders/rm/9q6pfp8n2cgg5dnwfyctrmq40000gq/T/ipykernel_30694/1348593386.py:65: PydanticDeprecatedSince20: Pydantic V1 style `@validator` 

In [20]:
json_data = json.dumps(sample_schema.dict(by_alias=True), default=str, indent=2)
rdf, turtle = convert_to_rdf(json_data)

template = env.get_template('./development notebooks/vizualization/schema_template.html')


# Render the template with the user data
rendered_html = template.render(
                schema=sample_schema,
                json = json_data,
                rdf_xml=rdf,
                turtle=turtle,
                type='Dataset'
            )

# Print or save the rendered HTML
with open('../development notebooks/vizualization/jinja_created/schema_example.html', 'w') as f:
    f.write(rendered_html)