In [0]:
""" This script is to setup a mock MODEL + TOML file. 
It is used to avoid training a model for each institution for testing purposes
It copies an existing model from an existin institution called "institution_x" to the new institution's schema.

The notebook is designed to run within a Databricks environment as a job task, leveraging Databricks 
utilities for widget input, job task values, and Spark session management.

This is a POC notebook, it is advised to refactor to .py and add tests before using in production.

"""

import logging
import toml
import os
import mlflow
from databricks.connect import DatabricksSession
from databricks.sdk.runtime import dbutils
import pandas as pd


import student_success_tool.dataio as dataio
import student_success_tool.targets.pdp as targets
import student_success_tool.schemas.pdp as schemas
import student_success_tool.preprocessing.pdp as preprocessing

# Disable mlflow autologging (due to Databricks issues during feature selection)
mlflow.autolog(disable=True)

# Configure logging
logging.basicConfig(level=logging.INFO)
logging.getLogger("py4j").setLevel(logging.WARNING)  # Ignore Databricks logger

# Attempt to create a Spark session
try:
    spark_session = DatabricksSession.builder.getOrCreate()
except Exception:
    logging.warning("Unable to create Spark session; are you in a Databricks runtime?")
    spark_session = None

# Databricks workspace identifier
DB_workspace = dbutils.widgets.get("DB_workspace")

model_name = dbutils.widgets.get("model_name")
model_version = dbutils.widgets.get("version_id")
catalog = DB_workspace

# Input parameters from Databricks widgets
institution_name = dbutils.widgets.get("databricks_institution_name")
db_run_id = dbutils.widgets.get("db_run_id")


existing_model_uri = f"models:/{catalog}.institution_x_bronze.{model_name}/1"
new_institution_model_uri = f"{catalog}.{institution_name}_gold.{model_name}"
mlflow.register_model(existing_model_uri, new_institution_model_uri)

# Creating an institution configuration file copying from a template. This is a placeholder.
# In a real scenario, toml file is specifically built for the institution.
toml_template_path = "/Workspace/Users/pedro.melendez@datakind.org/repo-student-success-tool-develop/tests/institution_onboarding/template_enrollment_model_toml_file.toml"
with open(toml_template_path, "r") as f:
    toml_template = toml.load(f)

toml_template["institution_id"], toml_template["institution_name"] = (
    institution_name,
    institution_name,
)

# All other model-specific parameters stored in the toml file should be updated here


# Create directory on the volume
config_file_path = f"/Volumes/{catalog}/{institution_name}_gold/gold_volume/configuration_files/"
os.makedirs(config_file_path, exist_ok=True)

with open(f"{config_file_path}/{institution_name}_{model_name}_configuration_file.toml", "w") as f:
    toml.dump(toml_template, f)


