In [0]:
from pyspark.sql.functions import col
from pyspark.sql.functions import lit
from pyspark.sql import functions as f 

In [0]:

template = """You are an expert health advisor evaluating the health status of military pilots based on medical and readiness data. Given the following features and record for a pilot, assess their health status on a scale from 1 (unhealthy) to 10 (healthy). Consider factor like age, diagnosis, history, mental health status, prescriptions, lab results, and procedural compliance.
**Pilot Information**
- Age: {age}
- Gender: {gender}
- Rank: {rank}
- Flight Hours Total: {flight_hours_total}
- Flight Hours Last 12 Months: {flight_hours_last_12mo}
**Medical Records**
- Diagnosis: {diagnosis}
- Medications: {medication}
- Hospitalization reason: {hospitalization_reason}
- Dental Readiness: {dental_readiness}
- Lab results: {test_name} - {result_value} - {unit} ref low {ref_low} ref high {ref_high}
- Doctor Visits: {encounters_6mo}
**Mental Health**
- Mental Health Medication: {mental_health_medication}
- Therapy Sessions: {therapy_sessions}

Analyze the data and provide a health status score, considering the severity of the conditions, frequency of health visists, mental health management, and laboratory flags. If fields are left blank assume that the pilot did not have to any condition or procedure. Provide reasoning for your score in the following list format:
[score, reason]"""



@f.udf("string")
def render_prompt(m: dict)-> str:
    return template.format(**m)

def generate_json():
    df = spark.table("avengers.default.all_pilots_data")
    profile_columns = ["age","gender",'rank','flight_hours_total','flight_hours_last_12mo',"diagnosis","medication","hospitalization_reason","dental_readiness","test_name","result_value","unit", "ref_low","ref_high","encounters_6mo","mental_health_medication","therapy_sessions"]
    sections_struct = [f.col(col).alias(col) for col in profile_columns]
    record_map = f.create_map(*sum([[f.lit(c), f.coalesce(f.col(c).cast("string"), f.lit(""))] for c in profile_columns],[]))

    df = df.withColumn('content', f.to_json(f.struct(*sections_struct)))
    df = df.withColumn('prompt', render_prompt(record_map))
    return(df)

In [0]:
df = generate_json()

In [0]:
from mlflow.deployments import get_deploy_client

client = get_deploy_client("databricks")
model_name = "databricks-gpt-oss-120b"
inputs = {
  "messages": [
    {
      "role": "user",
      "content": "What is Databricks?"
    }
  ],
  "max_tokens": 128
}
results = client.predict(endpoint = model_name, inputs = inputs)

In [0]:
results[]

In [0]:
df.display()

In [0]:
template = """You are an expert health advisor evaluating the health status of military pilots based on medical and readiness data. Given the following features and record for a pilot, assess their health status on a scale from 1 (unhealthy) to 10 (healthy). Consider factor like age, diagnosis, history, mental health status, prescriptions, lab results, and procedural compliance.
**Pilot Information**
- Age: {age}
- Gender: {gender}
- Rank: {rank}
- Flight Hours Total: {flight_hours_total}
- Flight Hours Last 12 Months: {flight_hours_last_12mo}
**Medical Records**
- Diagnosis: {diagnosis}
- Medications: {medication}
- Hospitalization reason: {hospitalization_reason}
- Dental Readiness: {dental_readiness}
- Lab results: {test_name} - {result_value} - {unit} ref low {ref_low} ref high {ref_high}
- Doctor Visits: {encounters_6mo}
**Mental Health**
- Mental Health Medication: {mental_health_medication}
- Therapy Sessions: {therapy_sessions}

Analyze the data and provide a health status score, considering the severity of the conditions, frequency of health visists, mental health management, and laboratory flags. If fields are left blank assume that the pilot did not have to any condition or procedure. Provide reasoning for your score in the following list format:
[score, reason]"""


In [0]:
@F.udf("string")
def render_prompt(m: dict)-> str:
    return template.format(**m)