### Introduction to Pre and Post Simulation Tasks on Energy Exemplar Cloud. 
Pre and Post Simulation tasks enables you to run custom logic before or after a simulation on cloud. Python scripts are supported by default, but you also have abilty to run native code compiled for Linux and x64 architecture. All scripts or artifacts needed for your tasks need to be uploaded into Energy Exemplar Datahub, which will be accessible during run on the cloud. Datahub provides a seamless storage mechansism and offers versioning and account control capability. Multiple tasks can be configured to run before or after a run. Task output can be manually uploaded to Datahub, or you can retrieve data via CLI manually. 

### Covered in this Example
1. Python libraries, environment variables and SDK setup
2. Setting SDK/CLI Environment
3. Authentication (SSO)
4. Authentication (Service Principal)
5. Configure Datahub Sync directory
6. Sync local content with Datahub
7. List Datahub content
8. Configure Simulation and Tasks
9. Enqueue Simulation


#### Instantiate the Python SDK client, Setup variables. <a class="anchor" id="environment-setup"></a>

In [None]:
import os, sys, utilities
from pathlib import Path
from eecloud.cloudsdk import CloudSDK, SDKBase
from eecloud.models import *

repo_name = "JupyterSamples"
root_path = os.environ.get("HOME")
jupyter_user = os.environ.get("JUPYTERHUB_USER")
project_root = os.path.join(root_path, repo_name, "PreAndPostTasks")

local_path = os.path.join(project_root, "Scripts") 
datahub_path = f"{repo_name}/{jupyter_user}"
simulation_body = os.path.join(local_path, "simulation_body.json")

solution_download_directory = os.path.join(project_root, "Downloads")
cli_path = os.environ.get("cloud_cli_path")
environment: str = "NA"

if environment is None:
    raise Exception("environment value must be configured")

# Setting an environment variable named "cloud_cli_path" with full path to CLI prevented need to configure SDK manually
pxc = CloudSDK(cli_path)

#### Set Environment

In [None]:
try:
    env_response: list[CommandResponse[Contracts_EnvironmentResponse]] = pxc.environment.set_user_environment(environment)
    env_data: Contracts_EnvironmentResponse = pxc.environment.get_final_response(env_response)
    print(f"{env_data.EventData.Environment} selected, please authenticate")
except Exception as ex:
    print(ex)

#### Login to PLEXOS Cloud using SSO - Use this option on Windows desktop. Not for Hub or automations.

In [None]:
#EXAMPLE for Local Notebook Use.
#try:
#    login_response: list[CommandResponse[Contracts_LoginResponse]] = pxc.auth.login()
#    login_data: Contracts_LoginResponse = SDKBase.get_response_data(login_response)
#    print(f"Tenant: {login_data.TenantName}, User: {login_data.UserName}")
#except Exception as ex:
#    print(ex)

#### Login to PLEXOS Cloud using Client credentials

In [None]:
try:
    ee_tenant_id = '5b3f24e2-a55d-452b-9c4b-2cdd5d35040c'
    ee_client_id = 'b75bb1d7-b0d9-43ac-b8c6-36007f147980'
    ee_client_secret = '5oulqrzZSpBdUJNPS1N97AUDJaLR05d3BXdlLrmOUzY='
    
    command_responses = pxc.auth.login_client_credentials(tenant_id=ee_tenant_id, client_id=ee_client_id, client_secret=ee_client_secret, use_client_credentials=True, print_message=True)
    last_command_response: CommandResponse[Contracts_LoginResponse] = pxc.auth.get_final_response(command_responses)
    if last_command_response is not None and last_command_response.Status == "Success":
        data: Contracts_LoginResponse = last_command_response.EventData
        print(f"{data.TenantName} logged into {data.Environment}")
        
except Exception as ex:
    print(ex)

#### Setup Datahub to local folder sync

In [None]:
try:
    print(f"Attempting to map local path: {local_path} to {datahub_path} on Datahub")
    map_response: list[CommandResponse[Contracts_DatahubMapResponse]] = pxc.datahub.map_folder(local_path, datahub_path, print_message=False)
    map_data: Contracts_DatahubMapResponse = SDKBase.get_response_data(map_response)

    if map_data is not None:
        print(f"Map success: {map_data.Success}, Local Path: {map_data.LocalPath}, Remote Path: {map_data.RemotePath}, Patterns: {map_data.Patterns}")
    else:
        print(f"Mapping already exists!")
except Exception as ex:
    print(ex)

#### Sync data in between mapped local folder and Datahub

In [None]:
try:
    sync_response: list[CommandResponse[Contracts_DatahubCommandResponse]] = pxc.datahub.sync(local_path_to_sync=local_path, print_message=True)
    sync_data: Contracts_DatahubCommandResponse = SDKBase.get_response_data(sync_response)
    print(f"Sync Status: {sync_data.DatahubCommandStatus.value}")
except Exception as ex:
    print(ex)

#### List Datahub files

In [None]:
try:
    glob_pattern_1: str = f"{datahub_path}/**" # this is a wildcard search based on previously provided path

    search_response: list[CommandResponse[Contracts_DatahubSearchResponse]] = pxc.datahub.search([f"{datahub_path}/**"], print_message=False)
    search_data: Contracts_DatahubSearchResponse = pxc.datahub.get_final_response(search_response)
    
    if search_data.EventData.DatahubSearchResults is not None:
        for item in search_data.EventData.DatahubSearchResults:
            if item.IsDeleted == False:
                print(f"{item.RelativePath} - Total Versions: {len(item.Versions)} : Latest Version: {item.LatestServerVersion}")
    else:
        raise Exception("Failure listing datahub files") 
            
except Exception as ex:
    print(ex)

#### Define Tasks in Simulation Payload <a class="anchor" id="define-tasks"></a>

Manually update the [simulation_body.json](Scripts/simulation_body.json) located in the Scripts directory. The following must be updated
- StudyId
- ChangesetId
- Models
- SimulationData
- SimulationEngine
- The tasks under SimulationTasks must be updated to include your Datahub path names. Documentation here: [Task Documentation](https://portal.energyexemplar.com/unified-help/plexos-cloud/#t=Simulations%2FPre-_and_Post-Simulation_Tasks.htm&rhsearch=pre%20and%20post&ux=search) 
  - TaskType: - Pre or Post
  - Files: Datahub Path and Version
  - Arguments: If python script needs to be executed, the arguments must contain "python3 yourscript.py"

*** If new scripts are added, the sync process must be run again. 


```json
...
{
    "Name": "TASK NAME",
    "TaskType": "Pre",
    "Files": [
        {
            "Path": "JupyterSamples/<user.name>/query_write_memberships.py",
            "Version": null
        },
        {
            "Path": "JupyterSamples/<user.name>/requirements.txt",
            "Version": 1
        }
    ],
    "Arguments": "python3 query_write_memberships.py",
    "ContinueOnError": true,
    "ExecutionOrder": 1
}
...

```

#### Enqueue Simulation, Wait for Completion. Download Results

In [None]:
try:
    enqueue_response : list[CommandResponse[Contracts_EnqueueSimulationResponse]] = pxc.simulation.enqueue_simulation(simulation_body, print_message=False)
    enqueue_data: Contracts_EnqueueSimulationResponse = SDKBase.get_response_data(enqueue_response)
    
    simulation = enqueue_data.SimulationStarted[0]
    simulation_id = simulation.Id.Value
    execution_id = simulation.ExecutionId.Value

    print(f"Simulation: {simulation_id} enqueued")

    utilities.wait_simulation_finish(pxc, simulation_id)
    simulation_result = utilities.get_simulation(pxc, simulation_id=simulation_id)

    #optionally if multiple simulations were enqueued we can wait for all executions to finish 
    #execution_result = utilities.get_executions(pxc, execution_id=simulation_result.ExecutionId.Value)
    #execution_result = utilities.wait_execution_finish(pxc, execution_id=simulation_result.ExecutionId.Value)

    if simulation_result.Status in ['CompletedSuccess']:
        solution_id = simulation_result.ModelIdentifiers[0].Id
        print(f"Simulation complete. {simulation_result.Status} Downloading artifacts for solution: {solution_id}")  
        sim_directory = f"{solution_download_directory}/{simulation_id}"
        Path(sim_directory).mkdir(parents=True, exist_ok=True)
        utilities.download_solution_data(pxc, solution_id , sim_directory)
        print(f"Solution data downloaded to {solution_download_directory}")
    else:
        print(f"Simulation finished: {simulation_result.Status} - Possible failure")
except Exception as ex:
    print(ex)
    

### Bonus. Querying downloaded artifacts for further analysis

In [None]:
import duckdb
import matplotlib.pyplot as plt

with duckdb.connect() as con:
    #convert Parquet to csv
    con.execute(f"COPY (select * from '{solution_download_directory}/**/solution_data*.parquet') TO '{solution_download_directory}/solution_data.csv' (DELIMITER ',');")
    
    #aggregate Generation by Date
    query: str = f"""
    SELECT date_trunc('month', StartDate) as Month, sum(TotalValue) as TotalGeneration 
    FROM '{solution_download_directory}/**/solution_data*.parquet' 
    where BandId = 1 
    group by date_trunc('month', StartDate);"""
    
    #show tabular results
    con.sql(query).show() 

    result = con.sql(query).to_df()
    #plot generation by month   
    plt.figure(figsize=(10, 6), dpi=100)
    plt.plot(result)
    plt.title("Generation by Month")
    plt.xlabel("Month")
    plt.ylabel("TotalGeneration")    