# Creating a conda package from a Modelica Model
This example illustrates how to develop, run, test(, repeat) a Modelica model in a (local) pandas/pyspark environment that can be deployed in a cluster as a conda package for large scale analytics.
- Build and compile the model once
- Make it a conda package
- Call the compiled model with different input data
- Use it on scale. Change parameters only during runtime.

See https://openmodelica.org/doc/OpenModelicaUsersGuide/v1.11.0/ompython.html

In [None]:
import pandas as  pd
import os
import sys
import json
import tempfile
import importlib

In [None]:
# Finds the package in the repo instead of the installed one
sys.path.insert(0, '../../src')

In [None]:
import DyMat

## Parametric simulation without OMC
https://openmodelica.org/doc/OpenModelicaUsersGuide/latest/scripting_api.html#simulation-parameter-sweep

In [None]:
from ModelicaModels import EDrives

In [None]:
mod = EDrives.instantiatemodel()
mod.getParameters(['dMax', 'dMin', 'tauNominal', 'dcpmData'])

In [None]:
p = pd.to_numeric(pd.Series(mod.getParameters()), errors='coerce')

In [None]:
p['dcpmData.VaNominal'] 

In [None]:
dMin = float(mod.getParameters(["dMin"])[0])
dMax = 1.0 - dMin
tauNominal=p['dcpmData.VaNominal'] * p['dcpmData.IaNominal'] / p['dcpmData.wNominal']  # The mode uses ViNominal

In [None]:
ts_df1 = pd.DataFrame(columns=[
    'time', 'value'], data=[
    [0.0, 0.5],
    [3.0, 0.5],
    [4.0, dMax],
    [12.0, dMax],
    [13.0, dMin],
    [21.0, dMin],
    [22.0, 0.5],
    [24.0, 0.5],    
])
ts_df1['input'] = 'dutyCycle_series'
ts_df2 = pd.DataFrame(columns=[
    'time', 'value'], data=[
    [0.0, 0.0],
    [6.0, 0.0],
    [7.0, -tauNominal],
    [9.0, -tauNominal],
    [10.0, tauNominal],
    [15.0, tauNominal],
    [16.0, -tauNominal],
    [18.0, -tauNominal],
    [19.0, 0.0],
    [24.0, 0.0],
])
ts_df2['input'] = 'loadTorque_Series'
ts_df = pd.concat([ts_df1, ts_df2], ignore_index=True)
# ts_df['run_key'] = 'r1'
ts_df = ts_df.pivot(index='time', columns='input', values='value').ffill().reset_index()#.drop('input', axis=1)
ts_df['run_key'] = 'r1'
#ts_df.to_csv('test.csv', sep=',', line_terminator=',\n')

In [None]:
parameters_var_df = pd.DataFrame(columns=['run_key', 'modifiers'], data=[
    ['r1', {'e': 0.7}],
    ['r2', {'e': 0.5}],
    ['r3', {'e': 0.9}],
])
parameters_var_df['modifiers'] = parameters_var_df['modifiers'].apply(json.dumps)
parameters_var_df

### Sequential execution

In [None]:
mod.getContinuous()  # list of states

In [None]:
def dymat2pandas(dm, block, names) -> pd.DataFrame:
    ts_df = pd.DataFrame(dm.getVarArray(names)).T
    #ts_df['time'] = dm.abscissa(2)
    ts_df.columns=['time'] + names
    return ts_df


def run_sim_parametric(pdf, modelwrapper_name=None, res_vars=None, use_local=True) -> pd.DataFrame:
    """Simulation of a single run. The unique run identifier is in the column run_key."""
    if use_local:
        modelwrapper = importlib.import_module(modelwrapper_name)
        mod = modelwrapper.instantiatemodel()
    else:
        mod = instantiatemodel(use_local=False)
    log_str = ''
    print(pdf)
    temp_dir = tempfile.gettempdir()
    grp = pdf['run_key'].iloc[0]
    resfilename = grp + '.mat'
    resfilepathname = os.path.join(temp_dir, resfilename)
    # mod.setParameters(pdf['modifiers'].iloc[0])  # Seems to be not implemented in OMPython
    # We might need to encode the dictonary as json when using pyspark
    if 'modifiers' in pdf.columns:
        overridevariables = json.loads(pdf['modifiers'].iloc[0])
        if len(overridevariables) > 0:
            mod.overridevariables = overridevariables
        df.drop('modifiers', axis=1, inplace=True)
    inputs_required = set(mod.getInputs().keys())
    inputs_available = set(pdf.columns)
    if inputs_available.intersection(inputs_required) != inputs_required:
        raise ValueError(
            'Required input series not provided in the DataFrame.\nRequired: {0}. Provided:{1}'.format(
                str(inputs_required), str(inputs_available)
            )
        )
    print(str(['time']+list(mod.getInputs().keys())))
    pdf = pdf[['time']+list(mod.getInputs().keys())]
    # Write csv file and tell the model class
    mod.csvFile = os.path.join(temp_dir, grp + '_inputs.csv')
    pdf.to_csv(mod.csvFile, sep=',', line_terminator=',\n')
    mod.inputFlag = True
    # Run the simulation
    log_str += str(mod.simulate(
        resultfile=resfilepathname,
        simflags=None,
        overrideaux='variableFilter="'+'|'.join(list(res_vars))+'"'
    ))
    print(log_str)
    os.remove(mod.csvFile)
    # Collect results
    if isinstance(res_vars, tuple):
        res_vars = list(res_vars)
    try:
        # TODO: The result file can be very big because all parameters are recorded.
        # see https://www.openmodelica.org/forum/default-topic/1666-saving-and-loading-states
        # and use simulate(Model, variableFilter="x1|x2|x3");
        dm = DyMat.DyMatFile(resfilepathname)
        ts_df = dymat2pandas(dm, 2, res_vars)
        #os.remove(resfilepathname)
    except Exception as e:
        ts_df = pd.DataFrame(columns=['time'] + res_vars, data=[[-1.0 ,0.0, 0.0]])
    ts_df.columns = ['time'] + res_vars
    ts_df['run_key'] = grp
    # print(ts_df.head(3))
    return ts_df


def get_sim_dist_func(modelwrapper, run_fun=run_sim_parametric, res_vars=None, use_local=True):
    """Return the pandas (udf) function to simulate a set of runs."""
    modelwrapperName = modelwrapper.__name__
    def run_sim_dist(pdf) -> pd.DataFrame():
        return run_fun(
            pdf,
            modelwrapper_name=modelwrapperName, res_vars=res_vars,
            use_local=use_local
        )
    return run_sim_dist

In [None]:
%%time
res_pars = ['dcpm.inertiaRotor.w', 'torque.tau', 'currentSensor.p.i']
ts_all_df = ts_df.groupby(['run_key']).apply(
        get_sim_dist_func(EDrives, res_vars=res_pars)
    )

In [None]:
ts_sim_df = ts_all_df

In [None]:
%matplotlib inline

In [None]:
import matplotlib.pyplot as plt

In [None]:
for n, run_key in enumerate(ts_df['run_key'].unique()):
    #ts_run_sim_pdf = ts_sim_sdf.where(F.col('run_key') == run_key).toPandas()
    ts_run_sim_pdf = ts_sim_df[ts_sim_df['run_key'] == run_key]
    plt.plot(ts_run_sim_pdf['time'], ts_run_sim_pdf['dcpm.inertiaRotor.w'])
plt.show()

### Parallel execution with Spark

In [None]:
parameters_var_sdf = spark.createDataFrame(parameters_var_df)
parameters_var_sdf.show()

In [None]:
parameters_var_sdf.toPandas()['modifiers'].iloc[0]

### Parallel execution

In [None]:
from ModelicaRuntimeTools import addpymodules

In [None]:
sc = spark.sparkContext

In [None]:
addpymodules(['../../src/DyMat'], 'mdymat.zip', sc=sc, dironly=True)
addpymodules(['../../src/OMPython'], 'mOMPython.zip', sc=sc, dironly=True)

In [None]:
# Run the model wrapper module in the current namespace
modelwrapper_pyfile = BouncingBall.__file__

In [None]:
%run -i $modelwrapper_pyfile

In [None]:
from pyspark.sql import types as T, functions as F
res_schema = T.StructType([
    T.StructField("time", T.DoubleType(), True),
    T.StructField("h", T.DoubleType(), True),
    T.StructField("v", T.DoubleType(), True),
    T.StructField("run_key", T.StringType(), True),
])
# Running the parametric simulation
ts_sim_sdf = parameters_var_sdf.groupby(['run_key']).applyInPandas(
        get_sim_dist_func(BouncingBall, res_vars=['h', 'v'], use_local=False), schema=res_schema,
    ).cache()

In [None]:
ts_sim_sdf.show()

In [None]:
for n, run_key in enumerate(parameters_var_df['run_key'].unique()):
    ts_run_sim_pdf = ts_sim_sdf.where(F.col('run_key') == run_key).toPandas()
    plt.plot(ts_run_sim_pdf['time'], ts_run_sim_pdf['h'])
plt.show()