In [None]:
%pip install cdisc_library_client
%pip install pandas
%pip install numpy
%pip install markdown

In [1]:
import os
import pandas as pd
import random
import json
import markdown
from string import Template

from cdisc_library_client import CDISCLibraryClient

To set a permenent environment variable in user scope using PowerShell:
`[System.Environment]::SetEnvironmentVariable("MY_VAR", "HelloWorld", "User")`

In [2]:
api_key = os.environ.get("CDISC_LIBRARY_API_KEY")
client = CDISCLibraryClient(api_key=api_key)

In [3]:
cosmos_api_version = "v2"
all_dss = client.get_sdtm_latest_sdtm_datasetspecializations(cosmos_api_version)

In [None]:
def generate_introduction_narrative(data):
    """
    Generates a markdown narrative for the introduction section of a CDISC SDTM Dataset Specialization.

    Args:
        data (dict): A dictionary containing information about the dataset specialization.

    Returns:
        str: A markdown formatted string containing the introduction narrative.
    """

    template = Template("""
# CDISC Biomdical Concepts
## SDTM Dataset Specialization for $shortName

The data provided represents a detailed structure of variables related to the `$shortName` dataset specialization, denoted by the ID: `$dssId`.
It corresponds to the `$domain` domain in the Study Data Tabulation Model (SDTM).
                        
Here is a detailed breakdown of the key elements of this dataset specialization:

- High-Level Information:
	 - This dataset is specialized for `$shortName`, identified with the ID of `$dssId`.
	 - It corresponds to the `$domain` domain and and draws its source from `$source`.
	 - The dataset is designed to be compliant with the SDTMIG versions `$sdtmigStartVersion` to `$sdtmigEndVersion`.

- Variable Details:
""")

    # Fill the template
    markdown = template.substitute(shortName=data['shortName'], dssId=data['datasetSpecializationId'],
                                   domain=data['domain'], source=data['source'],
                                   sdtmigStartVersion=data['sdtmigStartVersion'], sdtmigEndVersion=data['sdtmigEndVersion'])

    return markdown

In [None]:
def generate_variable_narrative(data):
	"""
	Generates a markdown narrative for the variables section of a CDISC SDTM Dataset Specialization.

	Args:
		data (dict): A dictionary containing information about the dataset specialization.

	Returns:
		str: A markdown formatted string containing the variables narrative.
	"""

	def format_relationship(relationship):
		return f"{relationship['subject']} {relationship['linkingPhrase']} ({relationship['predicateTerm']}) {relationship['object']}"

	template = Template("""
	1. **$name**
		- Data element definition: `$dataElementConceptId`
		- Variable is non-standard: `$isNonStandard`
		- Role: `$role`
		- Variable must be present: `$mandatoryVariable`
		- Variable must Be populated: `$mandatoryValue`

		- Data Type: `$dataType`
		- Length: `$length`
		- Origin Type: `$originType`
		- Origin Source: `$originSource`
		- Variable is a target: `$vlmTarget`

		- Assigned Term: `$assignedTerm`

		- Codelist Concept ID: `$codelistConceptId`
		- Submission Value: `$codelistSubmissionValue`
		- Concept URL: `$codelistHref`
		- Value List: `$valueList`

		- Relationship: `$relationship`
""")

	narrative = []

	for variable in data['variables']:
		variable_info = template.substitute(
			name=variable['name'],
			dataElementConceptId=variable.get('dataElementConceptId', 'N/A'),
			isNonStandard=variable.get('isNonStandard', 'N/A'),
			role=variable.get('role', 'N/A'),
			mandatoryVariable=variable.get('mandatoryVariable', 'N/A'),
			mandatoryValue=variable.get('mandatoryValue', 'N/A'),
			assignedTerm=variable.get('assignedTerm', 'N/A'),
			relationship=format_relationship(variable['relationship']) if 'relationship' in variable else 'N/A',
			dataType=variable.get('dataType', 'N/A'),
			length=variable.get('length', 'N/A'),
			originType=variable.get('originType', 'N/A'),
			originSource=variable.get('originSource', 'N/A'),
			vlmTarget=variable.get('vlmTarget', 'N/A'),
			codelistConceptId=variable.get('codelist', {}).get('conceptId', 'N/A'),
			codelistSubmissionValue=variable.get('codelist', {}).get('submissionValue', 'N/A'),
			codelistHref=variable.get('codelist', {}).get('href', 'N/A'),
			valueList=', '.join(variable.get('valueList', []))
		)

		narrative.append(variable_info)

	return "\n".join(narrative)

Rerun this cell for generating an additional narrative. 

In [9]:
# Select a random dataset specialization from the list of all dataset specializations
selected_dss = random.choice(all_dss)

# Extract the dataset specialization name from the href
dss_name = selected_dss["href"].split("/")[-1]

# Retrieve the detailed information for the selected dataset specialization using CDISC Library API
dss = client.get_sdtm_latest_sdtm_datasetspecialization(cosmos_api_version, dss_name)

# Generate the markdown narrative for the introduction and variables sections
narrative = generate_introduction_narrative(dss) + generate_variable_narrative(dss)

# Write the generated narrative to a markdown file
with open(f"{dss_name}.md", "w") as file:
	file.write(narrative)