In [0]:
%pip install databricks-vectorsearch databricks-sdk
%restart_python

In [0]:


import re
import pandas as pd
from databricks.vector_search.client import VectorSearchClient
from pyspark.sql import SparkSession
from IPython.display import Markdown, display
from vector_search_utils import create_endpoint, get_index, create_index

In [0]:

test_data_path = "/Workspace/Users/benjamin.wynn@peraton.com/GlobalEditsModel/testing/testing_data.csv"
n_results=20
search_type="HYBRID"
endpoint_name="global-edits-policy-search"
index_name="sandbox_catalog.default.gem_text_index"
embedding_endpoint="global-edits-embeddings"
primary_key="chunk_id"
source_column="text"
indexed_columns=["chunk_id", "text", "doc_title", "title","chapter_title","hcpcs_codes","cpt","icd10","modifiers","hcpcs_descriptions", "cpt_descriptions", "icd10_descriptions", "modifier_descriptions"]


In [0]:
client = VectorSearchClient(disable_notice=True)
create_endpoint(client, endpoint_name)
index = create_index(
    client,
    endpoint_name,
    index_name,
    "sandbox_catalog.default.gem_text",
    primary_key,
    source_column,
    indexed_columns,
    embedding_endpoint
)

In [0]:
def remove_empty_cols(row, columns):
    columns_to_drop = row[(row.isna()) | (row == "")].index
    remaining_columns = claim_lines.columns.difference(columns_to_drop)
    return row.drop(columns_to_drop), remaining_columns

def construct_query(row, columns):
    updated_row, updated_columns = remove_empty_cols(row, columns)
    
    ret_str = ""
    for i in range(0, len(columns)):
        ret_str += f"{columns[i]} {row[columns[i]]}, "
    return re.sub(r'[^\w\s]', '', ret_str)


# query_text = construct_query(row, cols)

In [0]:
test_df = pd.read_csv(test_data_path, index_col=0)
policy = test_df["policy"]
claim_lines = test_df.drop(columns="policy")

results_list = []
total = claim_lines.shape[0]

In [0]:
last_percent = -1
cols = claim_lines.columns

for i, row in claim_lines.iterrows():
    if i > 2:
        break
    percent = int((i / total) * 100)
    if percent != last_percent and percent % 5 == 0:
        print(f"Progress: {percent}%")
        last_percent = percent

    query_text = construct_query(row, cols)
    response = index.similarity_search(
        query_text=query_text,
        columns=indexed_columns,
        num_results=n_results,
        query_type=search_type,
        disable_notice=True
    )

    manifest = response.get("manifest", {}).get("columns", [])
    raw_results = response.get("result", {}).get("data_array", [])

    results = raw_results[:n_results]
    columns = [col.get("name") for col in manifest]
    df = pd.DataFrame(results, columns=columns)
    
    for _, match_row in df.iterrows():
        result_entry = {
            "claim_id": row["claim_id"],
            **match_row.to_dict()
        }
        results_list.append(result_entry)


In [0]:
results_df = pd.DataFrame(results_list)
results_df.head()
grouped = results_df.groupby("claim_id").agg(list)

In [0]:
test_df.head()

In [0]:
joined_df = pd.merge(test_df, grouped, on="claim_id", how="inner")
joined_df.head()

In [0]:
claim_line = joined_df.iloc[0]
docs = claim_line["text"]
similarity_score = claim_line["score"]
joined_docs = "\n\n---\n\n".join(
    f"Document {i+1} (Similarity: {similarity_score[i]}):\n{doc} " for i, doc in enumerate(docs)
)

In [0]:
with open('/Workspace/Users/benjamin.wynn@peraton.com/GlobalEditsModel/testing/test_policy/fps88_policy.md', 'r', encoding='utf-8') as file:
    fps88_policy = file.read()
with open('/Workspace/Users/benjamin.wynn@peraton.com/GlobalEditsModel/testing/test_policy/fps17_policy.md', 'r', encoding='utf-8') as file:
    fps17_policy = file.read()

In [0]:
claim_line['modifier_descriptions'] = claim_line['modifier_descriptions_x']
claim_line['modifiers'] = claim_line['modifiers_x']
claim_line_query = construct_query(claim_line, cols)
print(claim_line_query)

In [0]:
from openai import OpenAI
import os

# DATABRICKS_TOKEN = os.environ.get('DATABRICKS_TOKEN')
# Alternatively in a Databricks notebook you can use this:
DATABRICKS_TOKEN = dbutils.notebook.entry_point.getDbutils().notebook().getContext().apiToken().get()

client = OpenAI(
  api_key=DATABRICKS_TOKEN,
  base_url="https://cms-fps-dev-sandbox.cloud.databricks.com/serving-endpoints"
)
summarizer_prompt = """
    Role: You are a medical-policy claim summarizer.
    Goal: Given one or more policy documents (PDF, DOCX, or plain text), produce a concise, structured summary that a claims-processing system can ingest.
    Scope of work (follow in order):

    Pre-clean the text – remove headers/footers, boiler-plate disclaimers, revision histories, and marketing language.

    Locate policy requirements – isolate all statements that (a) set coverage conditions, (b) list exclusions, or (c) reference billing codes/modifiers. Ignore narrative background and clinical rationale unless it changes adjudication logic.

    Extract decision rules – for every distinct service, supply, or scenario, determine whether a claim **must be ACCEPTED or DENIED, and specify the exact conditions that trigger that outcome.

    Capture billing details – whenever a CPT, HCPCS, ICD-10-CM/PCS code, or modifier is mentioned, embed it inline with its rule and describe when it applies (e.g., “Use modifier 26 when…”).

    Highlight exceptions & frequency limits"""

chat_completion = client.chat.completions.create(
  messages=[
  {
    "role": "system",
    "content": summarizer_prompt
  },
  {
      "role": "user",
      "content": f"Use the following policy summarization as a guide. This is a separate policy do not include any information from this policy when summarizing the provided policy: {fps88_policy}"
  },
  {
    "role": "user",
    "content": f"Read the following documents and produce a strict list of requirements and rules for processing claims: {joined_docs}"
  }
  ],
  model="summarizationTesting"
)

output_policy = chat_completion.choices[0].message.content
print(output_policy)

In [0]:
chunk_ids = joined_df.iloc[0]['chunk_id']
scores = joined_df.iloc[0]['score']
for i in range(20):
    print(f"Chunk_id {chunk_ids[i]}")
    print(f"Score {scores[i]}")

In [0]:
joined_df.iloc[0]['chunk_id']

In [0]:
import matplotlib.pyplot as plt
fps_17_lines = joined_df[joined_df['edit'] == 17]
chunks = [chunk_id for chunk_list in joined_df['chunk_id'] for chunk_id in chunk_list]
plt.figure(figsize=(8, 5))
plt.hist(chunks, bins=100, edgecolor='black')
plt.title("Histogram of Chunk IDs Found")
plt.xlabel("Chunk ID Buckets")
plt.ylabel("Frequency")
plt.grid(True)
plt.show()

In [0]:
%sql
SELECT * FROM sandbox_catalog.default.gem_text WHERE chunk_id=1775.0

In [0]:
claim_line_query

In [0]:
comparison_prompt = """
Role: Policy-summary compliance & claim-readiness evaluator

Inputs (always supplied):

reference_summary – authoritative policy summary (ground truth).

candidate_summary – summary generated by another model.

summarization_prompt – original prompt used to create candidate_summary.

claim_line – the claim (service code + modifiers + context) that must be adjudicated with the summary.

Objectives (in priority order):

Content fidelity check – confirm that candidate_summary reproduces all substantive facts in reference_summary (coverage rules, exclusions, billing codes/modifiers, frequency limits, exceptions, and accept/deny criteria) and introduces no new or conflicting facts.

Claim readiness check – decide whether the information in candidate_summary is sufficient to adjudicate claim_line. Do not use the fidelity check. Analyze the claim_line using both candidate_summary and reference_summary to determine whether the candidate_summary is just as sufficient to adjudicate the claim as the reference_summary

Gap analysis – list any extra data elements required to process claim_line that are missing from candidate_summary.

Prompt improvement – suggest one concrete rewrite of summarization_prompt that would have prevented the detected issues.

Ignore completely:
• Formatting/style differences (tables vs bullets, section order, headings, line breaks).
• Wording changes that leave meaning intact.

Evaluation procedure:

Read reference_summary and create a checklist of every discrete factual assertion (codes, conditions, limits, exceptions, outcomes).

Examine candidate_summary and note where each checklist item is present, missing, or mis-stated.

Flag any hallucinated facts present in candidate_summary but absent from reference_summary.

Using the accepted/denied logic and billing details in candidate_summary, simulate adjudication of claim_line:
• If all required variables are supplied by the policy, mark it processable. 
• Otherwise, list what the policy lacks in terms of details to process claim_line. If claim_line is missing data see if the policy addresses the missing data.

Draft a single tightened version of summarization_prompt that would likely have produced a fully accurate, claim-ready summary.

Output (strict JSON, no extra keys):

{
  "verdict": "PASS" | "FAIL",
  "missing_facts": ["<brief description>", "..."],
  "incorrect_facts": ["<brief description>", "..."],
  "hallucinated_facts": ["<brief description>", "..."],
  "claim_processable": true | false,
  "additional_info_required": ["<data element>", "..."],
  "comments": "<≤100-word free-text note>",
  "improved_prompt": "<single revised prompt>"
}
Set verdict to PASS only when there are no missing, incorrect, or hallucinated facts and claim_processable is true.
"""
chat_completion = client.chat.completions.create(
  messages=[
  {
    "role": "system",
    "content": comparison_prompt
  },
  {
      "role": "user",
      "content": f"reference_summary: {fps17_policy}"
  },
  {
    "role": "user",
    "content": f"candidate_summary: {output_policy}"
  }, {
      "role": "user",
      "content": f"summarization_prompt: {summarizer_prompt}"
  }, {
      "role": "user",
      "content": f"""
claim_line: {claim_line_query}
"""
  }

  ],
  model="summarizationTesting"
)

print(chat_completion.choices[0].message.content)

In [0]:
claim_line