In [3]:
import pandas as pd
from langchain_ollama import ChatOllama
from sklearn.metrics import accuracy_score, classification_report

# Initialize Llama3 with optimized settings
llm = ChatOllama(
    model="mistral:latest",
    temperature=0.3,  # Slight creativity for medical reasoning
    top_p=0.9,
    repeat_penalty=1.1
)

def load_data(json_path):
    """Load and prepare data from JSON"""
    df = pd.read_json(json_path)
    
    # Convert all numerical fields and handle missing values
    num_cols = ['age', 'sysBP', 'diaBP', 'totChol', 'BMI', 'currentSmoker', 'diabetes']
    df[num_cols] = df[num_cols].apply(pd.to_numeric, errors='coerce')
    df.fillna(df.median(numeric_only=True), inplace=True)
    
    return df

def create_one_shot_example(df):
    """Select and format the most informative example"""
    # Find a positive case (TenYearCHD=1) with clear risk factors
    positive_example = df[df['TenYearCHD'] == 1].iloc[0]
    
    example_str = (
        f"Example Patient:\n"
        f"- Age: {int(positive_example['age'])}\n"
        f"- BP: {positive_example['sysBP']:.0f}/{positive_example['diaBP']:.0f}\n"
        f"- Cholesterol: {positive_example['totChol']:.0f}\n"
        f"- Smoker: {'Yes' if positive_example['currentSmoker'] else 'No'}\n"
        f"- BMI: {positive_example['BMI']:.1f}\n"
        f"- Diabetes: {'Yes' if positive_example['diabetes'] else 'No'}\n"
        f"Risk Assessment: 1 (High Risk)\n\n"
        f"Reason: This patient has multiple risk factors including "
        f"{'hypertension ' if positive_example['sysBP'] > 140 else ''}"
        f"{'high cholesterol ' if positive_example['totChol'] > 240 else ''}"
        f"{'smoking ' if positive_example['currentSmoker'] else ''}"
        f"{'diabetes ' if positive_example['diabetes'] else ''}"
        f"{'obesity ' if positive_example['BMI'] > 30 else ''}"
    )
    return example_str

def predict_with_one_shot(row, example):
    """Make prediction using one-shot learning"""
    prompt = f"""As a cardiology AI, assess 10-year CHD risk using this example:

{example}

Now evaluate this new patient:
- Age: {int(row['age'])}
- BP: {row['sysBP']:.0f}/{row['diaBP']:.0f}
- Cholesterol: {row['totChol']:.0f}
- Smoker: {'Yes' if row['currentSmoker'] else 'No'}
- BMI: {row['BMI']:.1f}
- Diabetes: {'Yes' if row['diabetes'] else 'No'}

Instructions:
1. Compare to the example
2. Analyze risk factors
3. Return ONLY 0 (low risk) or 1 (high risk)

Your prediction:"""
    
    response = llm.invoke([
        {"role": "system", "content": "You are a cardiac risk assessment AI."},
        {"role": "user", "content": prompt}
    ])
    
    # Robust output parsing
    output = response.content.strip()
    if output.startswith(('0', '1')):
        return int(output[0])
    return 0  # Default to low risk if unclear

# Main execution
if __name__ == "__main__":
    # Load data
    df = load_data('framingham.json')
    
    # Create one-shot example
    example = create_one_shot_example(df)
    
    # Prepare test set (excluding our example case)
    test_df = df.iloc[1:21].copy()
    true_labels = test_df['TenYearCHD'].astype(int)
    
    # Get predictions
    predictions = test_df.apply(
        lambda x: predict_with_one_shot(x, example), 
        axis=1
    ).astype(int)
    
    # Evaluate
    print(f"Accuracy: {accuracy_score(true_labels, predictions):.1%}")
    print("\nClassification Report:")
    print(classification_report(true_labels, predictions))
    
    # Compare predictions
    print("\nDetailed Comparison:")
    results = pd.DataFrame({
        'Actual': true_labels,
        'Predicted': predictions,
        'Match': true_labels == predictions
    })
    print(results)

Accuracy: 80.0%

Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.94      0.88        16
           1       0.50      0.25      0.33         4

    accuracy                           0.80        20
   macro avg       0.67      0.59      0.61        20
weighted avg       0.77      0.80      0.77        20


Detailed Comparison:
    Actual  Predicted  Match
1        0          0   True
2        0          0   True
3        1          1   True
4        0          0   True
5        0          0   True
6        1          0  False
7        0          0   True
8        0          0   True
9        0          0   True
10       0          0   True
11       0          0   True
12       0          1  False
13       0          0   True
14       0          0   True
15       1          0  False
16       0          0   True
17       1          0  False
18       0          0   True
19       0          0   True
20       0          0   True
