In [2]:
import json
import pandas as pd
from langchain_ollama import ChatOllama

In [3]:
# Initialize Llama3 model
llm = ChatOllama(model="mistral:latest")

In [4]:
def load_and_prepare_data(json_path):
    with open(json_path) as f:
        data = json.load(f)
    
    df = pd.json_normalize(data)
    
    # Ensure correct numeric types
    numeric_cols = ['age', 'sysBP', 'diaBP', 'totChol', 'BMI', 'glucose',
                   'currentSmoker', 'diabetes', 'male', 'BPMeds']
    for col in numeric_cols:
        if col in df:
            df[col] = pd.to_numeric(df[col], errors='coerce')
    
    # Fill missing values with medians (better than 0)
    for col in numeric_cols:
        if col in df:
            df[col].fillna(df[col].median(), inplace=True)
    
    return df

In [10]:
def enhanced_predict(row):
    # Extract and format features
    features = {
        'age': int(row['age']),
        'bp': f"{int(row['sysBP'])}/{int(row['diaBP'])}",
        'chol': int(row['totChol']),
        'smoker': bool(row['currentSmoker']),
        'bmi': float(row['BMI']),
        'diabetes': bool(row['diabetes'])
    }
    
    prompt = f"""As a cardiology specialist, evaluate this patient's 10-year CHD risk:

Patient Data:
- Age: {features['age']}
- Blood Pressure: {features['bp']} mmHg
- Cholesterol: {features['chol']} mg/dL
- Smoker: {'Yes' if features['smoker'] else 'No'}
- BMI: {features['bmi']:.1f}
- Diabetes: {'Yes' if features['diabetes'] else 'No'}

Analysis Guidelines:
1. Calculate Framingham Risk Score:
   - Age >50 (1 point)
   - SBP >140 or DBP >90 (1 point)
   - Cholesterol >240 (1 point)
   - Smoking (1 point)
   - BMI >30 (1 point)
   - Diabetes (2 points)
2. Score ≥3 indicates high risk

Decision:
- Only respond with '0' (low risk) or '1' (high risk)
- No explanations needed

Your prediction:"""
    
    try:
        response = llm.invoke([
            {"role": "system", "content": "You are a cardiac risk assessment AI. Follow the instructions precisely."},
            {"role": "user", "content": prompt}
        ])
        return int(response.content.strip()[:1])  # Takes first character only
    except:
        return 0  # Fallback to low risk on error


In [11]:
# Main execution
if __name__ == "__main__":
    # Load data
    df = load_and_prepare_data('framingham.json')
    
    # Prepare test set
    test_df = df.iloc[:20]  # First 20 samples
    features = test_df.drop(columns=['TenYearCHD'])
    true_labels = test_df['TenYearCHD']
    
    # Get predictions
    predictions = features.apply(enhanced_predict, axis=1)
    predictions = predictions.astype(int)
    true_labels = true_labels.astype(int)
    # Calculate accuracy
    accuracy = (predictions == true_labels).mean()
    print(f"Accuracy: {accuracy:.2%}")
    
    # Debug output
    print("\nSample predictions:")
    print(pd.DataFrame({
        'Actual': true_labels,
        'Predicted': predictions,
        'Match': true_labels == predictions
    }).head(10))

Accuracy: 45.00%

Sample predictions:
   Actual  Predicted  Match
0       0          0   True
1       0          1  False
2       0          1  False
3       1          1   True
4       0          1  False
5       0          1  False
6       1          1   True
7       0          1  False
8       0          0   True
9       0          1  False
