In [2]:
# ------------------------------------------------------------------
# Install required libraries if not already installed (silent install)
# ------------------------------------------------------------------
# Uncomment if needed:
# !pip install sentence_transformers --quiet

# ------------------------------------------------------------------
# Imports
# ------------------------------------------------------------------
import json
import numpy as np
import torch
from IPython.display import HTML, display
import html
import os
import nltk
from transformers import AutoTokenizer
from transformer_lens import HookedTransformer
from sentence_transformers import SentenceTransformer, util

# Download necessary NLTK data (for tokenization, if needed)
nltk.download('punkt')

# ------------------------------------------------------------------
# 1. GLOBAL MODEL LOADING
# ------------------------------------------------------------------
def load_model(model_name="gemma-2-2b"):
    """Load the model and return it for reuse"""
    print(f"Loading model: {model_name}")
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = HookedTransformer.from_pretrained(model_name, device=device)
    return model, model.tokenizer, device

global_model, global_tokenizer, global_device = load_model()

# ------------------------------------------------------------------
# 2. PROCESS EXAMPLE
# ------------------------------------------------------------------
def process_example(model, tokenizer, hook_name, probe, text, device):
    """
    Encode text, run model forward pass, apply probe to get token-level activations.
    Returns (list_of_token_strings, list_of_scores)
    """
    tokens = tokenizer.encode(text, return_tensors="pt").to(device)
    token_strs = [tokenizer.decode(t).replace('▁', ' ') for t in tokens[0]]
    
    with torch.no_grad():
        _, cache = model.run_with_cache(tokens, names_filter=[hook_name])
        activations = cache[hook_name]

    scores = []
    for pos in range(activations.shape[1]):
        pos_activations = activations[0, pos].cpu().numpy().reshape(1, -1)
        score = probe.predict_proba(pos_activations)[0, 1]  # Probability of class=1
        scores.append(float(score))
    
    return token_strs, scores

# ------------------------------------------------------------------
# 3. SENTENCE-TRANSFORMER FOR SEMANTIC SIMILARITY
# ------------------------------------------------------------------
semantic_model = SentenceTransformer('all-MiniLM-L6-v2')

def compute_semantic_similarity(phrase, concept_text):
    """
    Compute cosine similarity between 'phrase' and 'concept_text' using Sentence-BERT.
    """
    emb_phrase = semantic_model.encode(phrase, convert_to_tensor=True)
    emb_concept = semantic_model.encode(concept_text, convert_to_tensor=True)
    return util.cos_sim(emb_phrase, emb_concept).item()

# ------------------------------------------------------------------
# 4. ACTIVATION VALIDATION (DETAILED)
# ------------------------------------------------------------------
def compute_activation_validation_details(token_strs, scores, concept_text):
    """
    Splits tokens into groups based on '.' or ','.
    For each group, computes semantic similarity to 'concept_text'.
    Then for each token in that group, calculates:
        activation_validation = activation * abs(similarity)
    Returns a list of details (one entry per token).
    """
    groups = []
    current_group_tokens = []
    current_group_scores = []

    # Group tokens by punctuation ('.' or ',')
    for token, score in zip(token_strs, scores):
        current_group_tokens.append(token)
        current_group_scores.append(score)
        if token.strip() in ['.', ',']:
            group_string = "".join(current_group_tokens).strip()
            groups.append((group_string, current_group_scores.copy()))
            current_group_tokens = []
            current_group_scores = []

    if current_group_tokens:
        group_string = "".join(current_group_tokens).strip()
        groups.append((group_string, current_group_scores.copy()))
    
    details = []
    for group_string, group_scores in groups:
        similarity = compute_semantic_similarity(group_string, concept_text)
        # Use absolute similarity in the multiplication
        for activation in group_scores:
            act_val = activation * abs(similarity)
            details.append({
                "token": None,  # to be filled in later in match_tokens_to_groups
                "activation": activation,
                "group": group_string,
                "concept": concept_text,
                "similarity": similarity,
                "activation_validation": act_val
            })
    return details

def compute_activation_validation_score(token_strs, scores, concept_text):
    """
    A simpler aggregator that sums the activation-validation (using abs similarity).
    """
    details = compute_activation_validation_details(token_strs, scores, concept_text)
    total = sum(d["activation_validation"] for d in details)
    return total

# ------------------------------------------------------------------
# 5. PRINT TOKEN-LEVEL DETAILS AS TEXT BLOCK
# ------------------------------------------------------------------
def match_tokens_to_groups(token_strs, scores, concept_text):
    """
    Similar to compute_activation_validation_details, but returns row-level data with
    the actual token included. This helps list only tokens with non-zero activation.
    """
    rows = []
    current_group_tokens = []
    current_group_indices = []
    current_group_scores = []

    for i, (token, score) in enumerate(zip(token_strs, scores)):
        current_group_tokens.append(token)
        current_group_scores.append(score)
        current_group_indices.append(i)
        if token.strip() in ['.', ',']:
            group_string = "".join(current_group_tokens).strip()
            similarity = compute_semantic_similarity(group_string, concept_text)
            for idx, tok, sc in zip(current_group_indices, current_group_tokens, current_group_scores):
                act_val = sc * abs(similarity)
                rows.append({
                    "index": idx,
                    "token": tok,
                    "group": group_string,
                    "activation": sc,
                    "concept": concept_text,
                    "similarity": similarity,
                    "activation_validation": act_val
                })
            current_group_tokens = []
            current_group_scores = []
            current_group_indices = []
    
    if current_group_tokens:
        group_string = "".join(current_group_tokens).strip()
        similarity = compute_semantic_similarity(group_string, concept_text)
        for idx, tok, sc in zip(current_group_indices, current_group_tokens, current_group_scores):
            act_val = sc * abs(similarity)
            rows.append({
                "index": idx,
                "token": tok,
                "group": group_string,
                "activation": sc,
                "concept": concept_text,
                "similarity": similarity,
                "activation_validation": act_val
            })
    return rows

def print_token_contributions_table(rows, label="Example Text"):
    """
    Print a fixed-width table of tokens with non-zero activation.
    Columns: Index, Token, Group, Activation, Similarity, ActVal
    Uses precise alignment to avoid misalignment.
    """
    threshold = 0.001
    filtered = [r for r in rows if abs(r["activation"]) > threshold]
    
    if not filtered:
        print(f"\nNo non-zero activations found for {label}.")
        return
    
    # Define column widths & alignment
    # Index: 5 chars (left)
    # Token: 15 chars (left)
    # Group: 75 chars (left)
    # Activation, Similarity, ActVal: 12 chars each (right), with 4 decimals
    header_format = "{:<5} | {:<15} | {:<75} | {:>12} | {:>12} | {:>12}"
    line_width = 5 + 3 + 15 + 3 + 75 + 3 + 12 + 3 + 12 + 3 + 12
    
    header = header_format.format("Idx", "Token", "Group", "Activation", "Similarity", "ActVal")
    separator = "-" * line_width
    
    table_lines = [f"\n--- Token-Level Contributions ({label}) ---", header, separator]
    
    for r in filtered:
        idx_str = f"{r['index']}"
        token_str = r["token"]
        group_str = r["group"]
        activation_str = f"{r['activation']:.4f}"
        similarity_str = f"{r['similarity']:.4f}"
        av_str = f"{r['activation_validation']:.4f}"
        
        line = header_format.format(
            idx_str,
            token_str[:15],  # Truncate token if it's longer than 15
            group_str[:75],  # Truncate group if it's longer than 50
            activation_str,
            similarity_str,
            av_str
        )
        table_lines.append(line)
    
    print("\n".join(table_lines))

# ------------------------------------------------------------------
# 6. VALIDATION FUNCTION (MODIFIED TO PRINT DETAILS)
# ------------------------------------------------------------------
def validate_example_unrelated(example_path, unrelated_path, concept_key, concept_string=None, layer=22):
    """
    1) Load the probe for the concept.
    2) Compute token-level activations for example and unrelated texts.
    3) Sum activation validation (using abs semantic similarity) and print overall difference.
    4) Print a detailed table (text block) of non-zero tokens for both texts.
    """
    if concept_string is None:
        concept_string = concept_key.replace("_", " ")
    
    probe_dir = os.path.join("probes", concept_key)
    joblib_path = os.path.join(probe_dir, "probe.joblib")
    pkl_path = os.path.join(probe_dir, "probe.pkl")
    config_path = os.path.join(probe_dir, "config.json")

    if os.path.exists(joblib_path):
        import joblib
        probe = joblib.load(joblib_path)
        print(f"Loaded probe from {joblib_path}")
    elif os.path.exists(pkl_path):
        import pickle
        with open(pkl_path, 'rb') as f:
            probe = pickle.load(f)
        print(f"Loaded probe from {pkl_path}")
    else:
        print(f"Probe not found at {joblib_path} or {pkl_path}")
        return
    
    if os.path.exists(config_path):
        with open(config_path, "r") as f:
            config = json.load(f)
        concept_string = config.get("concept", concept_string)

    hook_name = f"blocks.{layer}.hook_resid_post"

    # Process example text
    with open(example_path, "r", encoding="utf-8") as f:
        example_text = f.read().strip()
    example_tokens, example_scores = process_example(global_model, global_tokenizer, hook_name, probe, example_text, global_device)
    example_activation_validation = compute_activation_validation_score(example_tokens, example_scores, concept_string)

    # Process unrelated text
    with open(unrelated_path, "r", encoding="utf-8") as f:
        unrelated_text = f.read().strip()
    unrelated_tokens, unrelated_scores = process_example(global_model, global_tokenizer, hook_name, probe, unrelated_text, global_device)
    unrelated_activation_validation = compute_activation_validation_score(unrelated_tokens, unrelated_scores, concept_string)

    overall_difference = example_activation_validation - unrelated_activation_validation

    # Print summary results
    print("\n=== Activation Validation Results ===")
    print(f"Concept Key: {concept_key}")
    print(f"Concept String: '{concept_string}'")
    print(f"Example Text Score:   {example_activation_validation:.4f}")
    print(f"Unrelated Text Score: {unrelated_activation_validation:.4f}")
    print(f"Overall Difference:   {overall_difference:.4f}")

    # Get detailed rows and print tables for each text
    example_rows = match_tokens_to_groups(example_tokens, example_scores, concept_string)
    print_token_contributions_table(example_rows, label="Example Text")

    unrelated_rows = match_tokens_to_groups(unrelated_tokens, unrelated_scores, concept_string)
    print_token_contributions_table(unrelated_rows, label="Unrelated Text")

# ------------------------------------------------------------------
# 7. LIST AVAILABLE CONCEPTS
# ------------------------------------------------------------------
def list_available_concepts(json_file_path):
    with open(json_file_path, 'r') as file:
        data = json.load(file)
    concepts = data['concepts']
    return [concept.replace(" ", "_") for concept in concepts]

# ------------------------------------------------------------------
# 8. VISUALIZATION FUNCTION
# ------------------------------------------------------------------
def visualize_concept_on_text(text, concept_key, model=global_model, tokenizer=global_tokenizer, layer=22):
    probe_dir = os.path.join("probes", concept_key)
    joblib_path = os.path.join(probe_dir, "probe.joblib")
    pkl_path = os.path.join(probe_dir, "probe.pkl")
    config_path = os.path.join(probe_dir, "config.json")

    if os.path.exists(joblib_path):
        import joblib
        probe = joblib.load(joblib_path) 
        print(f"Loaded probe from {joblib_path}")
    elif os.path.exists(pkl_path):
        import pickle
        with open(pkl_path, 'rb') as f:
            probe = pickle.load(f)
        print(f"Loaded probe from {pkl_path}")
    else:
        print(f"Probe not found at {joblib_path} or {pkl_path}")
        return None

    if not os.path.exists(config_path):
        print(f"Config not found at {config_path}")
        return None

    with open(config_path, "r") as f:
        config = json.load(f)

    concept = config.get("concept", concept_key.replace("_", " "))
    hook_name = f"blocks.{layer}.hook_resid_post"
    tokens, scores = process_example(model, tokenizer, hook_name, probe, text, global_device)

    html_output = f"<h2>Activation visualization for concept: '{concept}'</h2>"
    html_output += "<div style='line-height: 2.5; font-family: monospace; font-size: 14px;'>"

    for i, (token, score) in enumerate(zip(tokens, scores)):
        escaped_token = html.escape(token)
        green_intensity = 255
        other_intensity = int(255 * (1 - score))
        color = f"rgb({other_intensity}, {green_intensity}, {other_intensity})"
        html_output += f"""<span title='Token: "{escaped_token}"
Position: #{i}
Activation: {score:.4f}' style='background-color: {color}; padding: 3px; border-radius: 3px; margin: 1px;'>{escaped_token}</span>"""
    
    html_output += "</div>"
    return HTML(html_output)

# ------------------------------------------------------------------
# 9. MAIN USAGE EXAMPLE
# ------------------------------------------------------------------
# Load example and unrelated text
with open("inputs/example.txt", "r", encoding="utf-8") as f:
    example_text = f.read()

with open("inputs/unrelated.txt", "r", encoding="utf-8") as f:
    unrelated_text = f.read()

# Load concepts and process each
concepts = list_available_concepts("inputs/concepts_copy.json")
for concept in concepts:
    print("\n============================================")
    print(f"Concept: {concept}")
    
    print("\n--- Visualizing Example Text ---")
    display(visualize_concept_on_text(example_text, concept))

    print("\n--- Visualizing Unrelated Text ---")
    display(visualize_concept_on_text(unrelated_text, concept))

    print("\n--- Validation Results ---")
    validate_example_unrelated("inputs/example.txt", "inputs/unrelated.txt", concept)



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Loading model: gemma-2-2b


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]



Loaded pretrained model gemma-2-2b into HookedTransformer

Concept: heavy_alcohol_use

--- Visualizing Example Text ---
Loaded probe from probes/heavy_alcohol_use/probe.joblib



--- Visualizing Unrelated Text ---
Loaded probe from probes/heavy_alcohol_use/probe.joblib



--- Validation Results ---
Loaded probe from probes/heavy_alcohol_use/probe.joblib

=== Activation Validation Results ===
Concept Key: heavy_alcohol_use
Concept String: 'heavy alcohol use'
Example Text Score:   1.2905
Unrelated Text Score: 0.0000
Overall Difference:   1.2905

--- Token-Level Contributions (Example Text) ---
Idx   Token           Group                                    Activation   Similarity   ActVal      
-----------------------------------------------------------------------------------------------------
126   .               or depressive symptoms.                  0.0219       0.1948       0.0043      
164   ).              though BP remains mildly elevated (14... 0.9126       0.2222       0.2028      
247   .               with a BMI of 28.                        0.0762       0.1379       0.0105      
252   ).              7 (overweight). Physical exam is unre... 0.9389       0.2145       0.2014      
354   ).              6% (near prediabetic range). Given hi..


--- Visualizing Unrelated Text ---
Loaded probe from probes/elevated_LDL_cholesterol/probe.joblib



--- Validation Results ---
Loaded probe from probes/elevated_LDL_cholesterol/probe.joblib

=== Activation Validation Results ===
Concept Key: elevated_LDL_cholesterol
Concept String: 'elevated LDL cholesterol'
Example Text Score:   4.2895
Unrelated Text Score: 0.0259
Overall Difference:   4.2636

--- Token-Level Contributions (Example Text) ---
Idx   Token           Group                                    Activation   Similarity   ActVal      
-----------------------------------------------------------------------------------------------------
150   ,               His medical history includes hyperten... 1.0000       0.2418       0.2418      
164   ).              though BP remains mildly elevated (14... 0.9615       0.3267       0.3141      
252   ).              7 (overweight). Physical exam is unre... 0.0014       0.1992       0.0003      
361   ,               6% (near prediabetic range). Given hi... 0.9994       0.4665       0.4662      
371   ,               and high total cho


--- Visualizing Unrelated Text ---
Loaded probe from probes/low_HDL_cholesterol/probe.joblib



--- Validation Results ---
Loaded probe from probes/low_HDL_cholesterol/probe.joblib

=== Activation Validation Results ===
Concept Key: low_HDL_cholesterol
Concept String: 'low HDL cholesterol'
Example Text Score:   3.3940
Unrelated Text Score: 0.0000
Overall Difference:   3.3940

--- Token-Level Contributions (Example Text) ---
Idx   Token           Group                                    Activation   Similarity   ActVal      
-----------------------------------------------------------------------------------------------------
138   ),              His medical history includes hyperten... 0.9994       0.2219       0.2218      
150   ,               His medical history includes hyperten... 1.0000       0.2219       0.2219      
164   ).              though BP remains mildly elevated (14... 1.0000       0.3097       0.3097      
166    has            though BP remains mildly elevated (14... 0.0048       0.3097       0.0015      
209   .               and a family history of hypertens


--- Visualizing Unrelated Text ---
Loaded probe from probes/high_total_cholesterol/probe.joblib



--- Validation Results ---
Loaded probe from probes/high_total_cholesterol/probe.joblib

=== Activation Validation Results ===
Concept Key: high_total_cholesterol
Concept String: 'high total cholesterol'
Example Text Score:   2.6662
Unrelated Text Score: 0.0000
Overall Difference:   2.6662

--- Token-Level Contributions (Example Text) ---
Idx   Token           Group                                    Activation   Similarity   ActVal      
-----------------------------------------------------------------------------------------------------
146                   His medical history includes hyperten... 1.0000       0.2366       0.2366      
252   ).              7 (overweight). Physical exam is unre... 0.1907       0.2701       0.0515      
278                   Laboratory investigations reveal dysl... 0.9993       0.5871       0.5868      
361   ,               6% (near prediabetic range). Given hi... 1.0000       0.3385       0.3385      
371   ,               and high total cholester


--- Visualizing Unrelated Text ---
Loaded probe from probes/not_previously_on_statin/probe.joblib



--- Validation Results ---
Loaded probe from probes/not_previously_on_statin/probe.joblib

=== Activation Validation Results ===
Concept Key: not_previously_on_statin
Concept String: 'not previously on statin'
Example Text Score:   2.4315
Unrelated Text Score: 0.3240
Overall Difference:   2.1076

--- Token-Level Contributions (Example Text) ---
Idx   Token           Group                                    Activation   Similarity   ActVal      
-----------------------------------------------------------------------------------------------------
126   .               or depressive symptoms.                  0.0092       0.0823       0.0008      
209   .               and a family history of hypertension ... 1.0000       0.1417       0.1417      
252   ).              7 (overweight). Physical exam is unre... 1.0000       0.1395       0.1395      
266   .               with neurological and cardiovascular ... 0.0093       0.1762       0.0016      
329   ).              Laboratory investi


--- Visualizing Unrelated Text ---
Loaded probe from probes/dyslipidemia/probe.joblib



--- Validation Results ---
Loaded probe from probes/dyslipidemia/probe.joblib

=== Activation Validation Results ===
Concept Key: dyslipidemia
Concept String: 'dyslipidemia'
Example Text Score:   24.7655
Unrelated Text Score: 0.0231
Overall Difference:   24.7424

--- Token-Level Contributions (Example Text) ---
Idx   Token           Group                                    Activation   Similarity   ActVal      
-----------------------------------------------------------------------------------------------------
138   ),              His medical history includes hyperten... 0.0099       0.2054       0.0020      
150   ,               His medical history includes hyperten... 0.9853       0.2054       0.2023      
164   ).              though BP remains mildly elevated (14... 0.9999       0.2383       0.2383      
199    hyper          and a family history of hypertension ... 0.8009       0.3664       0.2934      
200   lip             and a family history of hypertension ... 1.0000     


--- Visualizing Unrelated Text ---
Loaded probe from probes/atorvastatin/probe.joblib



--- Validation Results ---
Loaded probe from probes/atorvastatin/probe.joblib

=== Activation Validation Results ===
Concept Key: atorvastatin
Concept String: 'atorvastatin'
Example Text Score:   14.2757
Unrelated Text Score: 0.0000
Overall Difference:   14.2757

--- Token-Level Contributions (Example Text) ---
Idx   Token           Group                                    Activation   Similarity   ActVal      
-----------------------------------------------------------------------------------------------------
132    (              His medical history includes hyperten... 0.9996       0.3396       0.3395      
141    with           His medical history includes hyperten... 1.0000       0.3396       0.3396      
146                   His medical history includes hyperten... 0.9998       0.3396       0.3396      
150   ,               His medical history includes hyperten... 1.0000       0.3396       0.3396      
164   ).              though BP remains mildly elevated (14... 0.9999     


--- Visualizing Unrelated Text ---
Loaded probe from probes/acute_liver_disease/probe.joblib



--- Validation Results ---
Loaded probe from probes/acute_liver_disease/probe.joblib

=== Activation Validation Results ===
Concept Key: acute_liver_disease
Concept String: 'acute liver disease'
Example Text Score:   1.6485
Unrelated Text Score: 0.3769
Overall Difference:   1.2716

--- Token-Level Contributions (Example Text) ---
Idx   Token           Group                                    Activation   Similarity   ActVal      
-----------------------------------------------------------------------------------------------------
330    Liver          Laboratory investigations reveal dysl... 0.0212       0.3690       0.0078      
410    persistent     persistent liver enzyme elevation,       0.0018       0.5169       0.0009      
411    liver          persistent liver enzyme elevation,       0.9993       0.5169       0.5165      
412    enzyme         persistent liver enzyme elevation,       1.0000       0.5169       0.5169      
506   .               The total cholesterol was 230 whi


--- Visualizing Unrelated Text ---
Loaded probe from probes/elevated_liver_enzymes/probe.joblib



--- Validation Results ---
Loaded probe from probes/elevated_liver_enzymes/probe.joblib

=== Activation Validation Results ===
Concept Key: elevated_liver_enzymes
Concept String: 'elevated liver enzymes'
Example Text Score:   1.7963
Unrelated Text Score: 0.2302
Overall Difference:   1.5661

--- Token-Level Contributions (Example Text) ---
Idx   Token           Group                                    Activation   Similarity   ActVal      
-----------------------------------------------------------------------------------------------------
21     with           presents for a routine check-up with ... 0.3549       0.0939       0.0333      
31    .               presents for a routine check-up with ... 0.9992       0.0939       0.0939      
74    .               worsening by end of the workday but r... 0.9886       0.0800       0.0791      
93    .               or neurological deficits.                0.9220       0.1835       0.1692      
95     is             Fatigue is described as 


--- Visualizing Unrelated Text ---
Loaded probe from probes/pregnancy/probe.joblib



--- Validation Results ---
Loaded probe from probes/pregnancy/probe.joblib

=== Activation Validation Results ===
Concept Key: pregnancy
Concept String: 'pregnancy'
Example Text Score:   0.0989
Unrelated Text Score: 0.0000
Overall Difference:   0.0989

--- Token-Level Contributions (Example Text) ---
Idx   Token           Group                                    Activation   Similarity   ActVal      
-----------------------------------------------------------------------------------------------------
150   ,               His medical history includes hyperten... 0.0021       0.0012       0.0000      
164   ).              though BP remains mildly elevated (14... 0.7130       -0.0064      0.0046      
209   .               and a family history of hypertension ... 0.0051       0.1717       0.0009      
252   ).              7 (overweight). Physical exam is unre... 0.9777       0.0952       0.0931      
454   .               should be considered before initiation.  0.0016       0.0414   


--- Visualizing Unrelated Text ---
Loaded probe from probes/renal_impairment/probe.joblib



--- Validation Results ---
Loaded probe from probes/renal_impairment/probe.joblib

=== Activation Validation Results ===
Concept Key: renal_impairment
Concept String: 'renal impairment'
Example Text Score:   0.6775
Unrelated Text Score: 0.3572
Overall Difference:   0.3203

--- Token-Level Contributions (Example Text) ---
Idx   Token           Group                                    Activation   Similarity   ActVal      
-----------------------------------------------------------------------------------------------------
126   .               or depressive symptoms.                  0.0418       0.2018       0.0084      
146                   His medical history includes hyperten... 1.0000       0.2943       0.2943      
164   ).              though BP remains mildly elevated (14... 1.0000       0.2191       0.2191      
209   .               and a family history of hypertension ... 0.5339       0.2220       0.1185      
252   ).              7 (overweight). Physical exam is unre... 0


--- Visualizing Unrelated Text ---
Loaded probe from probes/hypothyroidism/probe.joblib



--- Validation Results ---
Loaded probe from probes/hypothyroidism/probe.joblib

=== Activation Validation Results ===
Concept Key: hypothyroidism
Concept String: 'hypothyroidism'
Example Text Score:   0.9122
Unrelated Text Score: 0.0000
Overall Difference:   0.9122

--- Token-Level Contributions (Example Text) ---
Idx   Token           Group                                    Activation   Similarity   ActVal      
-----------------------------------------------------------------------------------------------------
164   ).              though BP remains mildly elevated (14... 1.0000       0.2085       0.2085      
252   ).              7 (overweight). Physical exam is unre... 0.2201       0.1634       0.0360      
271    reveal         Laboratory investigations reveal dysl... 0.0261       0.1443       0.0038      
275   :               Laboratory investigations reveal dysl... 0.0011       0.1443       0.0002      
329   ).              Laboratory investigations reveal dysl... 0.8072 