In [9]:
pip install openai==0.28



In [10]:
pip install faiss-cpu



In [11]:
import json
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import faiss
from sklearn.preprocessing import normalize

In [18]:
# Load the enhanced summaries
input_file = '/content/drive/Shareddrives/DATA298B/Readmission/final_enhanced_patient_summaries.json'
output_file = '/content/drive/Shareddrives/DATA298B/Readmission/final_patient_summaries_with_embeddings.json'

In [19]:
# Set up OpenAI API key
import openai

openai.api_key = "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"

In [20]:
# Read the enhanced summaries
df = pd.read_json(input_file, lines=True)

In [21]:
# Function to generate embeddings
def generate_embeddings(df):
    embeddings = []
    for index, summary in enumerate(df['enhanced_summary']):
        try:
            response = openai.Embedding.create(
                input=summary,
                model="text-embedding-ada-002"
            )
            embedding = response['data'][0]['embedding']
            embeddings.append(embedding)
            # Log progress
            if (index + 1) % 50 == 0:
                print(f"Processed {index + 1}/{len(df)} embeddings.")
        except Exception as e:
            print(f"Error generating embedding for index {index}: {e}")
            embeddings.append(None)  # Handle errors gracefully

    return embeddings

# Generate embeddings for the enhanced summaries
df['embeddings'] = generate_embeddings(df)

# Filter out rows without embeddings
df = df[df['embeddings'].notna()]

# Save the DataFrame with embeddings
df.to_json(output_file, orient='records', lines=True)
print(f"Embeddings saved successfully to: {output_file}")

Error generating embedding for index 0: You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.
Error generating embedding for index 1: You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.
Error generating embedding for index 2: You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.
Error generating embedding for index 3: You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.
Error generating embedding for index 4: You exceeded your current quota,

Create FAISS Index

In [None]:
# Convert embeddings into a numpy array
embedding_matrix = np.array(df['embeddings'].tolist(), dtype='float32')

# Check the shape of the embedding matrix
print(f"Embedding matrix shape: {embedding_matrix.shape}")

# Determine the embedding dimension (e.g., 1536 for text-embedding-ada-002)
embedding_dim = embedding_matrix.shape[1]

# Step 1: Initialize a FAISS index
index = faiss.IndexFlatL2(embedding_dim)

# Step 2: Add embeddings to the FAISS index
index.add(embedding_matrix)
print(f"Number of embeddings added to the index: {index.ntotal}")

# Step 3: Save the FAISS index to a file
index_file_path = 'patient_embeddings.index'
faiss.write_index(index, index_file_path)
print(f"FAISS index has been successfully saved to: {index_file_path}")

Verification

In [None]:
# Load the FAISS index
loaded_index = faiss.read_index(index_file_path)
print("FAISS index loaded successfully.")

# Example query to test the index
query_summary = "Patient is a 65-year-old female with a history of diabetes, admitted for chest pain."
try:
    # Generate the embedding for the query
    query_embedding = openai.Embedding.create(
        input=query_summary,
        model="text-embedding-ada-002"
    )['data'][0]['embedding']

    # Search the FAISS index for top 5 similar cases
    distances, indices = loaded_index.search(np.array([query_embedding], dtype='float32'), k=5)

    # Display the top 5 similar patient summaries
    print("Top 5 most similar patient summaries:")
    for i, idx in enumerate(indices[0]):
        print(f"Rank {i + 1}: Summary Index {idx}, Distance: {distances[0][i]}")
        print(df.iloc[idx]['enhanced_summary'])
        print("-" * 50)

except Exception as e:
    print(f"Error during search: {e}")


Integrate with the RAG System - Prediction

In [None]:
# Load the FAISS index and the DataFrame
index_file_path = 'patient_embeddings.index'
data_file_path = 'final_patient_summaries_with_embeddings.json'

In [None]:
# Load the FAISS index
index = faiss.read_index(index_file_path)
print("FAISS index loaded successfully.")


FAISS index loaded successfully.


In [None]:
# Load the DataFrame
df = pd.read_json(data_file_path, lines=True)
print("Data loaded successfully.")

Data loaded successfully.


In [None]:
# Function to generate embeddings for a query
def generate_query_embedding(query):
    try:
        response = openai.Embedding.create(
            input=query,
            model="text-embedding-ada-002"
        )
        return response['data'][0]['embedding']
    except Exception as e:
        print(f"Error generating query embedding: {e}")
        return None

# Function to retrieve similar patient summaries using FAISS
def retrieve_similar_summaries(query_embedding, index, df, k=5):
    try:
        # Search for the top K most similar summaries
        distances, indices = index.search(np.array([query_embedding], dtype='float32'), k)
        similar_summaries = [df.iloc[idx]['enhanced_summary'] for idx in indices[0]]
        return similar_summaries
    except Exception as e:
        print(f"Error during retrieval: {e}")
        return []

# Function to make readmission prediction using GPT-4
def predict_readmission(query_summary, index, df):
    try:
        # Step 1: Generate query embedding
        query_embedding = generate_query_embedding(query_summary)
        if query_embedding is None:
            return "Error generating query embedding."

        # Step 2: Retrieve similar patient summaries
        similar_summaries = retrieve_similar_summaries(query_embedding, index, df, k=5)
        if not similar_summaries:
            return "No similar patient cases found."

        # Step 3: Construct the prompt for GPT-4
        context = "\n\n".join(similar_summaries)
        prompt = f"{context}\n\nBased on the above patient cases, predict if the new patient will be readmitted within 30 days. Answer Yes or No."

        # Step 4: Query GPT-4 for the prediction
        response = openai.ChatCompletion.create(
            model="gpt-4",
            messages=[{"role": "user", "content": prompt}],
            max_tokens=100,
            temperature=0.7
        )
        prediction = response['choices'][0]['message']['content'].strip()
        return prediction
    except Exception as e:
        print(f"Error during prediction: {e}")
        return "Error during prediction."

# Example usage
query_summary = "Patient is a 68-year-old male, diagnosed with pneumonia, discharged after a 5-day stay."
prediction = predict_readmission(query_summary, index, df)
print(f"Readmission Prediction: {prediction}")

Readmission Prediction: No


Evaluation

In [None]:
# Step 1: Load the JSON Data (Line-by-Line for NDJSON Format)
file_path = "final_patient_summaries_with_embeddings.json"
data = []

print("Loading the JSON file...")
with open(file_path, "r") as f:
    for line in f:
        try:
            record = json.loads(line.strip())
            data.append(record)
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON on line: {line.strip()}")
            print(f"Exception: {e}")

print(f"Successfully loaded {len(data)} records.")

# Step 2: Convert the List of Records to a DataFrame
df = pd.DataFrame(data)

Loading the JSON file...
Successfully loaded 1224 records.


In [None]:
# Step 3: Data Preprocessing
# Convert 'readmission_status' to numerical format
df['readmission_status'] = df['readmission_status'].map({'yes': 1, 'no': 0})

# Drop rows with missing values in 'readmission_status'
df.dropna(subset=['readmission_status'], inplace=True)

# Check if the DataFrame is not empty
if df.empty:
    raise ValueError("The DataFrame is empty after filtering. Check the readmission_status values.")

# Step 4: Extract Embeddings and Labels
embeddings = np.array(df['embeddings'].tolist())
labels = df['readmission_status'].values

# Validate embeddings and labels
if len(embeddings) == 0 or len(labels) == 0:
    raise ValueError("No valid embeddings or labels found. Please check the data.")

# Step 5: Split the Data into Training and Test Sets
print("Splitting the data into training and test sets...")
train_df, test_df = train_test_split(
    df, test_size=0.2, random_state=42, stratify=df['readmission_status']
)

train_embeddings = np.array(train_df['embeddings'].tolist())
test_embeddings = np.array(test_df['embeddings'].tolist())
test_labels = test_df['readmission_status'].values

# Step 6: Build FAISS Index
embedding_dim = len(train_embeddings[0])
print(f"Building FAISS index with embedding dimension: {embedding_dim}")
index = faiss.IndexFlatL2(embedding_dim)
index.add(train_embeddings)

# Step 7: Evaluate the Model Using Top-k Nearest Neighbors
k = 3  # Top-k neighbors
predictions = []

print("Evaluating the model...")
for test_embedding in test_embeddings:
    # Retrieve top-k nearest neighbors
    D, I = index.search(np.array([test_embedding]), k)
    # Get the readmission status of the nearest neighbors
    neighbor_labels = np.array(train_df.iloc[I[0]]['readmission_status'].tolist(), dtype=int)
    # Predict based on the majority vote of neighbors
    predicted_label = int(np.round(np.mean(neighbor_labels)))
    predictions.append(predicted_label)

# Step 8: Calculate Evaluation Metrics
accuracy = accuracy_score(test_labels, predictions)
precision = precision_score(test_labels, predictions)
recall = recall_score(test_labels, predictions)
f1 = f1_score(test_labels, predictions)

# Step 9: Display the Results
print("\nEvaluation Results:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")

Splitting the data into training and test sets...
Building FAISS index with embedding dimension: 1536
Evaluating the model...

Evaluation Results:
Accuracy: 0.78
Precision: 0.76
Recall: 0.82
F1-Score: 0.79


In [None]:
# Step 3: Data Preprocessing
df['readmission_status'] = df['readmission_status'].map({'yes': 1, 'no': 0})
df.dropna(subset=['readmission_status'], inplace=True)

if df.empty:
    raise ValueError("The DataFrame is empty after filtering. Check the readmission_status values.")

# Step 4: Extract and Normalize Embeddings
embeddings = np.array(df['embeddings'].tolist())
labels = df['readmission_status'].values
embeddings = normalize(embeddings)

# Step 5: Split the Data into Training and Test Sets
print("Splitting the data into training and test sets...")
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['readmission_status'])

train_embeddings = normalize(np.array(train_df['embeddings'].tolist()))
test_embeddings = normalize(np.array(test_df['embeddings'].tolist()))
test_labels = test_df['readmission_status'].values

# Step 6: Build FAISS Index
embedding_dim = train_embeddings.shape[1]
print(f"Building FAISS index with embedding dimension: {embedding_dim}")
index = faiss.IndexFlatL2(embedding_dim)
index.add(train_embeddings)

# Step 7: Enhanced Evaluation Using Hybrid Prediction
k = 5  # Top-k neighbors
predictions = []

print("Evaluating the model with hybrid RAG predictions...")
for i, test_embedding in enumerate(test_embeddings):
    # Retrieve top-k nearest neighbors
    D, I = index.search(np.array([test_embedding]), k)

    # Get the readmission status of the nearest neighbors
    neighbor_labels = np.array(train_df.iloc[I[0]]['readmission_status'].tolist(), dtype=int)
    weights = 1 / (D[0] + 1e-5)  # Inverse distance weighting

    # k-NN Weighted Majority Vote Prediction
    knn_prediction = np.dot(weights, neighbor_labels) / weights.sum()
    knn_prediction = int(np.round(knn_prediction))

    # Enhanced Prompting to GPT-4 (Simulated with knn_prediction for demonstration)
    retrieved_summaries = " ".join(train_df.iloc[I[0]]['enhanced_summary'].tolist())
    prompt = (
        f"The patient's current summary is: {test_df.iloc[i]['enhanced_summary']}.\n"
        f"Based on similar patient records: {retrieved_summaries}.\n"
        "What is the likelihood of the patient being readmitted within 30 days? Provide a yes or no answer."
    )

    # Placeholder GPT-4 prediction (replace with actual GPT-4 call if integrated)
    gpt_prediction = knn_prediction  # Simulating GPT-4 prediction with k-NN result for now

    # Hybrid Prediction (Average of k-NN and GPT-4 Predictions)
    final_prediction = int(np.round((knn_prediction + gpt_prediction) / 2))
    predictions.append(final_prediction)

# Step 8: Calculate Evaluation Metrics
accuracy = accuracy_score(test_labels, predictions)
precision = precision_score(test_labels, predictions)
recall = recall_score(test_labels, predictions)
f1 = f1_score(test_labels, predictions)

# Step 9: Display the Results
print("\nEnhanced RAG Model Evaluation Results:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")

Splitting the data into training and test sets...
Building FAISS index with embedding dimension: 1536
Evaluating the model with hybrid RAG predictions...

Enhanced RAG Model Evaluation Results:
Accuracy: 0.80
Precision: 0.78
Recall: 0.83
F1-Score: 0.80


In [None]:
import json
import numpy as np
import pandas as pd
import openai
import faiss
import time
from tqdm import tqdm
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Step 1: Load and Preprocess Data
file_path = "final_patient_summaries_with_embeddings.json"
data = [json.loads(line) for line in open(file_path, "r")]
df = pd.DataFrame(data)
df['readmission_status'] = df['readmission_status'].map({'yes': 1, 'no': 0})
df.dropna(subset=['readmission_status'], inplace=True)

# Step 2: Normalize Embeddings
embeddings = normalize(np.array(df['embeddings'].tolist()))
labels = df['readmission_status'].values

# Step 3: Split Data
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['readmission_status'])
train_embeddings = normalize(np.array(train_df['embeddings'].tolist()))
test_embeddings = normalize(np.array(test_df['embeddings'].tolist()))
test_labels = test_df['readmission_status'].values

# Step 4: Build FAISS Index
embedding_dim = train_embeddings.shape[1]
index = faiss.IndexFlatL2(embedding_dim)
index.add(train_embeddings)

# Function for GPT-4-Turbo Prediction with Caching
def gpt4_turbo_predict_with_cache(patient_summary, retrieved_summaries):
    prompt = (
        f"The patient's summary is: {patient_summary}\n"
        f"Based on similar patient cases: {retrieved_summaries}\n"
        "Will the patient be readmitted within 30 days? Answer 'yes' or 'no'."
    )
    prompt_hash = hash_prompt(prompt)

    # Check if the response is already cached
    if prompt_hash in response_cache:
        return response_cache[prompt_hash]

    max_retries = 5
    retries = 0
    while retries < max_retries:
        try:
            response = openai.ChatCompletion.create(
                model="gpt-4-turbo",
                messages=[{"role": "user", "content": prompt}]
            )
            answer = response['choices'][0]['message']['content'].strip().lower()
            prediction = 1 if "yes" in answer else 0

            # Cache the response
            response_cache[prompt_hash] = prediction
            return prediction
        except openai.error.RateLimitError:
            wait_time = 2 ** (retries + 1)
            print(f"Rate limit reached. Retrying in {wait_time} seconds...")
            time.sleep(wait_time)
            retries += 1
        except Exception as e:
            print(f"Error: {e}")
            return None
    return None

# Function for Processing a Single Sample
def process_sample(i, test_embedding, test_df, train_df, index, k):
    D, I = index.search(np.array([test_embedding]), k)
    neighbor_labels = np.array(train_df.iloc[I[0]]['readmission_status'].tolist(), dtype=int)
    weights = 1 / (D[0] + 1e-5)
    knn_prediction = np.dot(weights, neighbor_labels) / weights.sum()
    knn_prediction = int(np.round(knn_prediction))

    # Retrieve summaries for GPT-3.5 context
    retrieved_summaries = " ".join(train_df.iloc[I[0]]['enhanced_summary'].tolist())
    patient_summary = test_df.iloc[i]['enhanced_summary']

    # Get GPT-3.5 prediction
    gpt_prediction = gpt3_predict(patient_summary, retrieved_summaries)

    # Hybrid Prediction
    if gpt_prediction is not None:
        return int(np.round((0.8 * knn_prediction + 0.2 * gpt_prediction)))
    return knn_prediction

# Evaluation with Progress Indicator
k = 5
predictions = []

print("Evaluating the model with simplified GPT-3.5-turbo predictions...")
for i, test_embedding in tqdm(enumerate(test_embeddings), total=len(test_embeddings)):
    prediction = process_sample(i, test_embedding, test_df, train_df, index, k)
    predictions.append(prediction)

# Calculate Evaluation Metrics
accuracy = accuracy_score(test_labels, predictions)
precision = precision_score(test_labels, predictions)
recall = recall_score(test_labels, predictions)
f1 = f1_score(test_labels, predictions)

# Display the Results
print("\nSimplified RAG Model Evaluation Results with GPT-3.5-turbo:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")


Evaluating the model with simplified GPT-3.5-turbo predictions...


  2%|▏         | 4/245 [00:01<02:03,  1.96it/s]

Rate limit reached. Retrying in 2 seconds...


 15%|█▍        | 36/245 [00:18<02:14,  1.56it/s]

Rate limit reached. Retrying in 2 seconds...


 18%|█▊        | 45/245 [00:22<01:34,  2.11it/s]

Rate limit reached. Retrying in 2 seconds...


 22%|██▏       | 55/245 [00:27<01:21,  2.32it/s]

Rate limit reached. Retrying in 2 seconds...
Rate limit reached. Retrying in 2 seconds...


 25%|██▌       | 62/245 [00:31<01:32,  1.97it/s]

Rate limit reached. Retrying in 4 seconds...
Rate limit reached. Retrying in 4 seconds...


 30%|███       | 74/245 [00:36<01:28,  1.94it/s]

Rate limit reached. Retrying in 8 seconds...


 31%|███▏      | 77/245 [00:38<01:12,  2.32it/s]

Rate limit reached. Retrying in 2 seconds...


 33%|███▎      | 82/245 [00:40<01:19,  2.05it/s]

Rate limit reached. Retrying in 2 seconds...


 35%|███▍      | 85/245 [00:42<01:37,  1.63it/s]

Rate limit reached. Retrying in 4 seconds...


 35%|███▌      | 86/245 [00:43<01:36,  1.64it/s]

Rate limit reached. Retrying in 2 seconds...


 37%|███▋      | 91/245 [00:44<00:52,  2.94it/s]

Rate limit reached. Retrying in 4 seconds...


 39%|███▉      | 95/245 [00:47<01:22,  1.83it/s]

Rate limit reached. Retrying in 4 seconds...


 40%|████      | 99/245 [00:48<01:00,  2.42it/s]

Rate limit reached. Retrying in 8 seconds...


 41%|████      | 101/245 [00:49<01:09,  2.06it/s]

Rate limit reached. Retrying in 2 seconds...


 42%|████▏     | 104/245 [00:50<00:56,  2.50it/s]

Rate limit reached. Retrying in 8 seconds...


 44%|████▍     | 109/245 [00:53<01:10,  1.94it/s]

Rate limit reached. Retrying in 4 seconds...


 48%|████▊     | 117/245 [00:58<01:24,  1.51it/s]

Rate limit reached. Retrying in 8 seconds...
Rate limit reached. Retrying in 16 seconds...
Rate limit reached. Retrying in 2 seconds...
Rate limit reached. Retrying in 16 seconds...
Rate limit reached. Retrying in 4 seconds...


 51%|█████▏    | 126/245 [01:16<01:27,  1.37it/s]

Rate limit reached. Retrying in 8 seconds...


 53%|█████▎    | 129/245 [01:19<01:23,  1.39it/s]

Rate limit reached. Retrying in 32 seconds...


 59%|█████▉    | 145/245 [01:26<00:43,  2.30it/s]

Rate limit reached. Retrying in 16 seconds...


 60%|██████    | 148/245 [01:27<00:45,  2.14it/s]

Rate limit reached. Retrying in 32 seconds...


 76%|███████▋  | 187/245 [01:45<00:23,  2.52it/s]

Rate limit reached. Retrying in 2 seconds...


 80%|████████  | 196/245 [01:49<00:20,  2.43it/s]

Rate limit reached. Retrying in 4 seconds...


 82%|████████▏ | 202/245 [01:52<00:23,  1.85it/s]

Rate limit reached. Retrying in 2 seconds...


 84%|████████▎ | 205/245 [01:55<00:30,  1.29it/s]

Rate limit reached. Retrying in 8 seconds...


 84%|████████▍ | 206/245 [01:56<00:32,  1.21it/s]

Rate limit reached. Retrying in 4 seconds...


 88%|████████▊ | 216/245 [02:02<00:18,  1.61it/s]

Rate limit reached. Retrying in 8 seconds...


 90%|█████████ | 221/245 [02:05<00:12,  1.85it/s]

Rate limit reached. Retrying in 16 seconds...


 96%|█████████▋| 236/245 [02:12<00:03,  2.65it/s]

Rate limit reached. Retrying in 2 seconds...


 97%|█████████▋| 238/245 [02:13<00:02,  2.42it/s]

Rate limit reached. Retrying in 16 seconds...


100%|█████████▉| 244/245 [02:15<00:00,  2.18it/s]

Rate limit reached. Retrying in 4 seconds...


100%|██████████| 245/245 [02:16<00:00,  1.80it/s]


Simplified RAG Model Evaluation Results with GPT-3.5-turbo:
Accuracy: 0.80
Precision: 0.78
Recall: 0.83
F1-Score: 0.80





In [None]:
import json
import numpy as np
import pandas as pd
import openai
import faiss
import time
from tqdm import tqdm
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


# Step 1: Load and Preprocess Data
file_path = "final_patient_summaries_with_embeddings.json"
data = [json.loads(line) for line in open(file_path, "r")]
df = pd.DataFrame(data)
df['readmission_status'] = df['readmission_status'].map({'yes': 1, 'no': 0})
df.dropna(subset=['readmission_status'], inplace=True)

# Step 2: Normalize Embeddings
embeddings = normalize(np.array(df['embeddings'].tolist()))
labels = df['readmission_status'].values

# Step 3: Split Data
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['readmission_status'])
train_embeddings = normalize(np.array(train_df['embeddings'].tolist()))
test_embeddings = normalize(np.array(test_df['embeddings'].tolist()))
test_labels = test_df['readmission_status'].values

# Step 4: Build Optimized FAISS Index (HNSW)
embedding_dim = train_embeddings.shape[1]
index = faiss.IndexHNSWFlat(embedding_dim, 32)  # HNSW Index with 32 neighbors (M parameter)
index.hnsw.efSearch = 100  # Increase search quality
index.add(train_embeddings)

# Step 5: Define Caching for GPT-4-Turbo Predictions
response_cache = {}

def hash_prompt(prompt):
    return hash(prompt)

# Function for GPT-4-Turbo Prediction with Caching
def gpt4_turbo_predict_with_cache(patient_summary, retrieved_summaries):
    prompt = (
        f"Patient's summary:\n{patient_summary}\n"
        f"Similar patient cases summaries:\n{retrieved_summaries}\n"
        "Will the patient likely be readmitted within 30 days? Answer 'yes' or 'no' and provide a brief reasoning."
    )
    prompt_hash = hash_prompt(prompt)

    # Check if the response is already cached
    if prompt_hash in response_cache:
        return response_cache[prompt_hash]

    max_retries = 5
    retries = 0
    while retries < max_retries:
        try:
            response = openai.ChatCompletion.create(
                model="gpt-4-turbo",
                messages=[{"role": "user", "content": prompt}],
                temperature=0.2  # Lower temperature for deterministic output
            )
            answer = response['choices'][0]['message']['content'].strip().lower()
            prediction = 1 if "yes" in answer else 0

            # Cache the response
            response_cache[prompt_hash] = prediction
            return prediction
        except openai.error.RateLimitError:
            wait_time = 2 ** (retries + 1)
            print(f"Rate limit reached. Retrying in {wait_time} seconds...")
            time.sleep(wait_time)
            retries += 1
        except Exception as e:
            print(f"Error: {e}")
            return None
    return None

# Function for Processing a Single Sample with Adaptive Weighting
def process_sample(i, test_embedding, test_df, train_df, index, k):
    D, I = index.search(np.array([test_embedding]), k)
    neighbor_labels = np.array(train_df.iloc[I[0]]['readmission_status'].tolist(), dtype=int)

    # Use Softmax Weighting for k-NN Prediction
    weights = np.exp(-D[0])
    knn_prediction = np.dot(weights, neighbor_labels) / weights.sum()
    knn_prediction = int(np.round(knn_prediction))

    # Retrieve summaries for GPT-4-turbo context
    retrieved_summaries = " ".join(train_df.iloc[I[0]]['enhanced_summary'].tolist())
    patient_summary = test_df.iloc[i]['enhanced_summary']

    # Get GPT-4-turbo prediction
    gpt_prediction = gpt4_turbo_predict_with_cache(patient_summary, retrieved_summaries)

    # Adaptive Hybrid Prediction
    knn_confidence = 1 / (np.std(D[0]) + 1e-5)
    gpt_confidence = 0.8  # Fixed confidence for GPT-4-turbo (can be adjusted based on metrics)
    total_confidence = knn_confidence + gpt_confidence

    if gpt_prediction is not None:
        hybrid_prediction = int(np.round(
            (knn_confidence / total_confidence) * knn_prediction +
            (gpt_confidence / total_confidence) * gpt_prediction
        ))
        return hybrid_prediction
    return knn_prediction

# Evaluation with Progress Indicator
k = 5
predictions = []

print("Evaluating the enhanced RAG model with GPT-4-turbo predictions...")
for i, test_embedding in tqdm(enumerate(test_embeddings), total=len(test_embeddings)):
    prediction = process_sample(i, test_embedding, test_df, train_df, index, k)
    predictions.append(prediction)

# Calculate Evaluation Metrics
accuracy = accuracy_score(test_labels, predictions)
precision = precision_score(test_labels, predictions)
recall = recall_score(test_labels, predictions)
f1 = f1_score(test_labels, predictions)

# Display the Results
print("\nEnhanced RAG Model Evaluation Results with GPT-4-turbo:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")


Evaluating the enhanced RAG model with GPT-4-turbo predictions...


  0%|          | 0/245 [00:00<?, ?it/s]

Rate limit reached. Retrying in 4 seconds...
Rate limit reached. Retrying in 16 seconds...
Rate limit reached. Retrying in 8 seconds...
Rate limit reached. Retrying in 16 seconds...


  1%|          | 2/245 [00:15<30:44,  7.59s/it]

Rate limit reached. Retrying in 2 seconds...


  1%|          | 3/245 [00:20<25:32,  6.33s/it]

Rate limit reached. Retrying in 32 seconds...
Rate limit reached. Retrying in 32 seconds...
Rate limit reached. Retrying in 2 seconds...


  2%|▏         | 4/245 [00:28<28:00,  6.97s/it]

Rate limit reached. Retrying in 2 seconds...


  2%|▏         | 5/245 [00:32<23:37,  5.91s/it]

Rate limit reached. Retrying in 2 seconds...
Rate limit reached. Retrying in 4 seconds...


  2%|▏         | 6/245 [00:45<33:11,  8.33s/it]

Rate limit reached. Retrying in 8 seconds...


  3%|▎         | 8/245 [00:55<26:31,  6.71s/it]

Rate limit reached. Retrying in 2 seconds...
Rate limit reached. Retrying in 16 seconds...
Rate limit reached. Retrying in 2 seconds...
Rate limit reached. Retrying in 4 seconds...


  4%|▎         | 9/245 [01:01<25:28,  6.48s/it]

Rate limit reached. Retrying in 8 seconds...


  4%|▍         | 10/245 [01:06<23:56,  6.11s/it]

Rate limit reached. Retrying in 2 seconds...


  4%|▍         | 11/245 [01:11<22:31,  5.78s/it]

Rate limit reached. Retrying in 2 seconds...
Rate limit reached. Retrying in 16 seconds...
Rate limit reached. Retrying in 32 seconds...
Rate limit reached. Retrying in 2 seconds...
Rate limit reached. Retrying in 4 seconds...


  5%|▌         | 13/245 [01:27<25:23,  6.57s/it]

Rate limit reached. Retrying in 2 seconds...
Rate limit reached. Retrying in 4 seconds...
Rate limit reached. Retrying in 32 seconds...
Rate limit reached. Retrying in 2 seconds...


  6%|▌         | 14/245 [01:44<37:05,  9.64s/it]

Rate limit reached. Retrying in 2 seconds...
Rate limit reached. Retrying in 4 seconds...
Rate limit reached. Retrying in 2 seconds...


  6%|▌         | 15/245 [01:54<37:37,  9.82s/it]

Rate limit reached. Retrying in 4 seconds...
Rate limit reached. Retrying in 8 seconds...


  7%|▋         | 16/245 [01:58<30:35,  8.01s/it]

Rate limit reached. Retrying in 8 seconds...


  7%|▋         | 17/245 [02:02<25:52,  6.81s/it]

Rate limit reached. Retrying in 2 seconds...


  7%|▋         | 18/245 [02:07<23:22,  6.18s/it]

Rate limit reached. Retrying in 16 seconds...
Rate limit reached. Retrying in 4 seconds...


  8%|▊         | 20/245 [02:14<18:37,  4.97s/it]

Rate limit reached. Retrying in 8 seconds...


  9%|▊         | 21/245 [02:20<19:12,  5.15s/it]

Rate limit reached. Retrying in 2 seconds...
Rate limit reached. Retrying in 16 seconds...


  9%|▉         | 22/245 [02:27<21:30,  5.79s/it]

Rate limit reached. Retrying in 32 seconds...
Rate limit reached. Retrying in 2 seconds...
Rate limit reached. Retrying in 32 seconds...


 10%|▉         | 24/245 [02:54<32:15,  8.76s/it]

Rate limit reached. Retrying in 2 seconds...
Rate limit reached. Retrying in 2 seconds...
Rate limit reached. Retrying in 2 seconds...
Rate limit reached. Retrying in 4 seconds...


 10%|█         | 25/245 [03:07<36:46, 10.03s/it]

Rate limit reached. Retrying in 2 seconds...


 11%|█         | 26/245 [03:12<30:51,  8.46s/it]

Rate limit reached. Retrying in 2 seconds...
Rate limit reached. Retrying in 8 seconds...
Rate limit reached. Retrying in 4 seconds...
Rate limit reached. Retrying in 2 seconds...


 11%|█         | 27/245 [03:21<31:09,  8.58s/it]

Rate limit reached. Retrying in 8 seconds...
Rate limit reached. Retrying in 16 seconds...
Rate limit reached. Retrying in 2 seconds...


 11%|█▏        | 28/245 [03:25<25:58,  7.18s/it]

Rate limit reached. Retrying in 2 seconds...


 12%|█▏        | 29/245 [03:31<24:34,  6.82s/it]

Rate limit reached. Retrying in 16 seconds...
Rate limit reached. Retrying in 2 seconds...
Rate limit reached. Retrying in 2 seconds...
Rate limit reached. Retrying in 32 seconds...
Rate limit reached. Retrying in 4 seconds...


 12%|█▏        | 30/245 [03:47<34:38,  9.67s/it]

Rate limit reached. Retrying in 8 seconds...
Rate limit reached. Retrying in 32 seconds...
Rate limit reached. Retrying in 2 seconds...


 13%|█▎        | 31/245 [03:58<35:43, 10.02s/it]

Rate limit reached. Retrying in 16 seconds...


 14%|█▍        | 34/245 [04:15<24:30,  6.97s/it]

Rate limit reached. Retrying in 32 seconds...


 15%|█▍        | 36/245 [04:21<17:24,  5.00s/it]

Rate limit reached. Retrying in 2 seconds...


 15%|█▌        | 37/245 [04:25<15:50,  4.57s/it]

Rate limit reached. Retrying in 2 seconds...
Rate limit reached. Retrying in 4 seconds...
Rate limit reached. Retrying in 4 seconds...


 16%|█▌        | 38/245 [04:30<16:12,  4.70s/it]

Rate limit reached. Retrying in 8 seconds...
Rate limit reached. Retrying in 8 seconds...
Rate limit reached. Retrying in 16 seconds...
Rate limit reached. Retrying in 2 seconds...


 16%|█▌        | 39/245 [04:51<32:43,  9.53s/it]

Rate limit reached. Retrying in 4 seconds...


 16%|█▋        | 40/245 [04:55<26:47,  7.84s/it]

Rate limit reached. Retrying in 8 seconds...
Rate limit reached. Retrying in 2 seconds...


 17%|█▋        | 41/245 [05:03<26:54,  7.92s/it]

Rate limit reached. Retrying in 32 seconds...


 18%|█▊        | 43/245 [05:10<19:27,  5.78s/it]

Rate limit reached. Retrying in 2 seconds...


100%|██████████| 245/245 [24:17<00:00,  5.95s/it]


Enhanced RAG Model Evaluation Results with GPT-4-turbo:
Accuracy: 0.80
Precision: 0.79
Recall: 0.82
F1-Score: 0.80





In [None]:
import json
import numpy as np
import pandas as pd
import openai
import faiss
import time
from tqdm import tqdm
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Step 1: Load and Preprocess Data
file_path = "final_patient_summaries_with_embeddings.json"
data = [json.loads(line) for line in open(file_path, "r")]
df = pd.DataFrame(data)
df['readmission_status'] = df['readmission_status'].map({'yes': 1, 'no': 0})
df.dropna(subset=['readmission_status'], inplace=True)

# Step 2: Normalize Embeddings
embeddings = normalize(np.array(df['embeddings'].tolist()))
labels = df['readmission_status'].values

# Step 3: Split Data
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['readmission_status'])
train_embeddings = normalize(np.array(train_df['embeddings'].tolist()))
test_embeddings = normalize(np.array(test_df['embeddings'].tolist()))
test_labels = test_df['readmission_status'].values

# Step 4: Build FAISS Index (HNSW)
embedding_dim = train_embeddings.shape[1]
index = faiss.IndexHNSWFlat(embedding_dim, 32)
index.hnsw.efSearch = 100
index.add(train_embeddings)

# Step 5: Train Logistic Regression Model
log_reg = LogisticRegression(random_state=42, max_iter=1000)
log_reg.fit(train_embeddings, train_df['readmission_status'])

# Step 6: Define Caching for GPT-4-Turbo Predictions
response_cache = {}

def hash_prompt(prompt):
    return hash(prompt)

# Function for GPT-4-Turbo Prediction with Enhanced Prompt
def gpt4_turbo_predict_with_cache(patient_summary, retrieved_summaries):
    prompt = (
        f"Patient's detailed summary:\n{patient_summary}\n"
        f"Retrieved similar patient cases:\n{retrieved_summaries}\n"
        "Based on medical history, risk factors, and retrieved cases, will the patient likely be readmitted within 30 days? "
        "Answer 'yes' or 'no' and explain your reasoning."
    )
    prompt_hash = hash_prompt(prompt)

    # Check if the response is already cached
    if prompt_hash in response_cache:
        return response_cache[prompt_hash]

    max_retries = 5
    retries = 0
    while retries < max_retries:
        try:
            response = openai.ChatCompletion.create(
                model="gpt-4-turbo",
                messages=[{"role": "user", "content": prompt}],
                temperature=0.2
            )
            answer = response['choices'][0]['message']['content'].strip().lower()
            prediction = 1 if "yes" in answer else 0

            # Cache the response
            response_cache[prompt_hash] = prediction
            return prediction
        except openai.error.RateLimitError:
            wait_time = 2 ** (retries + 1)
            print(f"Rate limit reached. Retrying in {wait_time} seconds...")
            time.sleep(wait_time)
            retries += 1
        except Exception as e:
            print(f"Error: {e}")
            return None
    return None

# Function for Ensemble Prediction
def process_sample(i, test_embedding, test_df, train_df, index, k):
    D, I = index.search(np.array([test_embedding]), k)
    neighbor_labels = np.array(train_df.iloc[I[0]]['readmission_status'].tolist(), dtype=int)
    weights = np.exp(-D[0])
    knn_prediction = np.dot(weights, neighbor_labels) / weights.sum()
    knn_prediction = int(np.round(knn_prediction))

    log_reg_prediction = log_reg.predict([test_embedding])[0]

    retrieved_summaries = " ".join(train_df.iloc[I[0]]['enhanced_summary'].tolist())
    patient_summary = test_df.iloc[i]['enhanced_summary']
    gpt_prediction = gpt4_turbo_predict_with_cache(patient_summary, retrieved_summaries)

    # Adaptive Hybrid Ensemble
    if gpt_prediction is not None:
        ensemble_prediction = int(np.round(
            0.5 * knn_prediction + 0.3 * gpt_prediction + 0.2 * log_reg_prediction
        ))
        return ensemble_prediction
    return knn_prediction

# Evaluation
k = 5
predictions = []

print("Evaluating the enhanced ensemble model with GPT-4-turbo predictions...")
for i, test_embedding in tqdm(enumerate(test_embeddings), total=len(test_embeddings)):
    prediction = process_sample(i, test_embedding, test_df, train_df, index, k)
    predictions.append(prediction)

# Calculate Evaluation Metrics
accuracy = accuracy_score(test_labels, predictions)
precision = precision_score(test_labels, predictions)
recall = recall_score(test_labels, predictions)
f1 = f1_score(test_labels, predictions)

# Display the Results
print("\nEnhanced Ensemble Model Evaluation Results with GPT-4-turbo:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")


Evaluating the enhanced ensemble model with GPT-4-turbo predictions...


100%|██████████| 245/245 [36:17<00:00,  8.89s/it]


Enhanced Ensemble Model Evaluation Results with GPT-4-turbo:
Accuracy: 0.85
Precision: 0.87
Recall: 0.82
F1-Score: 0.84





In [None]:
import json
import numpy as np
import pandas as pd
import openai
import faiss
import time
from tqdm import tqdm
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Step 1: Load and Preprocess Data
file_path = "final_patient_summaries_with_embeddings.json"
data = [json.loads(line) for line in open(file_path, "r")]
df = pd.DataFrame(data)
df['readmission_status'] = df['readmission_status'].map({'yes': 1, 'no': 0})
df.dropna(subset=['readmission_status'], inplace=True)

# Step 2: Normalize Embeddings
embeddings = normalize(np.array(df['embeddings'].tolist()))
labels = df['readmission_status'].values

# Step 3: Split Data
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['readmission_status'])
train_embeddings = normalize(np.array(train_df['embeddings'].tolist()))
test_embeddings = normalize(np.array(test_df['embeddings'].tolist()))
test_labels = test_df['readmission_status'].values

# Step 3.1: Use Only a 20% Subset of the Test Data
subset_size = int(0.2 * len(test_embeddings))
test_embeddings = test_embeddings[:subset_size]
test_labels = test_labels[:subset_size]

print(f"Evaluating on a subset of size: {subset_size}")

# Step 4: Build FAISS Index (HNSW)
embedding_dim = train_embeddings.shape[1]
index = faiss.IndexHNSWFlat(embedding_dim, 32)
index.hnsw.efSearch = 100
index.add(train_embeddings)

# Step 5: Train Logistic Regression Model
log_reg = LogisticRegression(random_state=42, max_iter=1000)
log_reg.fit(train_embeddings, train_df['readmission_status'])

# Step 6: Define Caching for GPT-4-Turbo Predictions
response_cache = {}

def hash_prompt(prompt):
    return hash(prompt)

# Function for GPT-4-Turbo Prediction with Caching
def gpt4_turbo_predict_with_cache(patient_summary, retrieved_summaries):
    prompt = (
        f"Patient's detailed summary:\n{patient_summary}\n"
        f"Retrieved similar patient cases:\n{retrieved_summaries}\n"
        "Based on medical history, risk factors, and retrieved cases, will the patient likely be readmitted within 30 days? "
        "Answer 'yes' or 'no' and explain your reasoning."
    )
    prompt_hash = hash_prompt(prompt)

    # Check if the response is already cached
    if prompt_hash in response_cache:
        return response_cache[prompt_hash]

    max_retries = 5
    retries = 0
    while retries < max_retries:
        try:
            response = openai.ChatCompletion.create(
                model="gpt-4-turbo",
                messages=[{"role": "user", "content": prompt}],
                temperature=0.2
            )
            answer = response['choices'][0]['message']['content'].strip().lower()
            prediction = 1 if "yes" in answer else 0

            # Cache the response
            response_cache[prompt_hash] = prediction
            return prediction
        except openai.error.RateLimitError:
            wait_time = 2 ** (retries + 1)
            print(f"Rate limit reached. Retrying in {wait_time} seconds...")
            time.sleep(wait_time)
            retries += 1
        except Exception as e:
            print(f"Error: {e}")
            return None
    return None

# Function for Ensemble Prediction
def process_sample(i, test_embedding, test_df, train_df, index, k):
    D, I = index.search(np.array([test_embedding]), k)
    neighbor_labels = np.array(train_df.iloc[I[0]]['readmission_status'].tolist(), dtype=int)
    weights = np.exp(-D[0])
    knn_prediction = np.dot(weights, neighbor_labels) / weights.sum()
    knn_prediction = int(np.round(knn_prediction))

    # Logistic Regression Prediction
    log_reg_prediction = log_reg.predict([test_embedding])[0]

    # GPT-4-turbo Prediction
    retrieved_summaries = " ".join(train_df.iloc[I[0]]['enhanced_summary'].tolist())
    patient_summary = test_df.iloc[i]['enhanced_summary']
    gpt_prediction = gpt4_turbo_predict_with_cache(patient_summary, retrieved_summaries)

    # Adaptive Hybrid Ensemble
    if gpt_prediction is not None:
        ensemble_prediction = int(np.round(
            0.5 * knn_prediction + 0.3 * gpt_prediction + 0.2 * log_reg_prediction
        ))
        return ensemble_prediction
    return knn_prediction

# Evaluation with Progress Indicator
k = 5
predictions = []

print("Evaluating the enhanced ensemble model with GPT-4-turbo predictions on a subset...")
for i, test_embedding in tqdm(enumerate(test_embeddings), total=len(test_embeddings)):
    prediction = process_sample(i, test_embedding, test_df, train_df, index, k)
    predictions.append(prediction)

# Calculate Evaluation Metrics
accuracy = accuracy_score(test_labels, predictions)
precision = precision_score(test_labels, predictions)
recall = recall_score(test_labels, predictions)
f1 = f1_score(test_labels, predictions)

# Display the Results
print("\nSubset Evaluation Results with GPT-4-turbo:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")


Evaluating on a subset of size: 49
Evaluating the enhanced ensemble model with GPT-4-turbo predictions on a subset...


100%|██████████| 49/49 [06:57<00:00,  8.53s/it]


Subset Evaluation Results with GPT-4-turbo:
Accuracy: 0.80
Precision: 0.83
Recall: 0.68
F1-Score: 0.75





In [None]:
import json
import numpy as np
import pandas as pd
import openai
import faiss
import time
from tqdm import tqdm
from sklearn.preprocessing import normalize
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Step 1: Load and Preprocess Data
file_path = "final_patient_summaries_with_embeddings.json"
data = [json.loads(line) for line in open(file_path, "r")]
df = pd.DataFrame(data)
df['readmission_status'] = df['readmission_status'].map({'yes': 1, 'no': 0})
df.dropna(subset=['readmission_status'], inplace=True)

# Step 2: Normalize Embeddings
embeddings = normalize(np.array(df['embeddings'].tolist()))
labels = df['readmission_status'].values

# Step 3: Split Data
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df['readmission_status'])
train_embeddings = normalize(np.array(train_df['embeddings'].tolist()))
test_embeddings = normalize(np.array(test_df['embeddings'].tolist()))
test_labels = test_df['readmission_status'].values

# Step 3.1: Use Only a 20% Subset of the Test Data
subset_size = int(0.2 * len(test_embeddings))
test_embeddings = test_embeddings[:subset_size]
test_labels = test_labels[:subset_size]

print(f"Evaluating on a subset of size: {subset_size}")

# Step 4: Build FAISS Index (HNSW)
embedding_dim = train_embeddings.shape[1]
index = faiss.IndexHNSWFlat(embedding_dim, 32)
index.hnsw.efSearch = 100
index.add(train_embeddings)

# Step 5: Train Logistic Regression Model
log_reg = LogisticRegression(random_state=42, max_iter=1000)
log_reg.fit(train_embeddings, train_df['readmission_status'])

# Step 6: Define Caching for GPT-4-Turbo Predictions
response_cache = {}

def hash_prompt(prompt):
    return hash(prompt)

# Function for GPT-4-Turbo Prediction with Caching
def gpt4_turbo_predict_with_cache(patient_summary, retrieved_summaries):
    prompt = (
        f"Patient's detailed summary:\n{patient_summary}\n"
        f"Retrieved similar patient cases:\n{retrieved_summaries}\n"
        "Based on medical history, risk factors, and retrieved cases, will the patient likely be readmitted within 30 days? "
        "Answer 'yes' or 'no' and explain your reasoning."
    )
    prompt_hash = hash_prompt(prompt)

    # Check if the response is already cached
    if prompt_hash in response_cache:
        return response_cache[prompt_hash]

    max_retries = 5
    retries = 0
    while retries < max_retries:
        try:
            response = openai.ChatCompletion.create(
                model="gpt-4-turbo",
                messages=[{"role": "user", "content": prompt}],
                temperature=0.2
            )
            answer = response['choices'][0]['message']['content'].strip().lower()
            prediction = 1 if "yes" in answer else 0

            # Cache the response
            response_cache[prompt_hash] = prediction
            return prediction
        except openai.error.RateLimitError:
            wait_time = 2 ** (retries + 1)
            print(f"Rate limit reached. Retrying in {wait_time} seconds...")
            time.sleep(wait_time)
            retries += 1
        except Exception as e:
            print(f"Error: {e}")
            return None
    return None

# Function for Ensemble Prediction
def process_sample(i, test_embedding, test_df, train_df, index, k):
    D, I = index.search(np.array([test_embedding]), k)
    neighbor_labels = np.array(train_df.iloc[I[0]]['readmission_status'].tolist(), dtype=int)
    weights = np.exp(-D[0])
    knn_prediction = np.dot(weights, neighbor_labels) / weights.sum()
    knn_prediction = int(np.round(knn_prediction))

    # Logistic Regression Prediction
    log_reg_prediction = log_reg.predict([test_embedding])[0]

    # GPT-4-turbo Prediction
    retrieved_summaries = " ".join(train_df.iloc[I[0]]['enhanced_summary'].tolist())
    patient_summary = test_df.iloc[i]['enhanced_summary']
    gpt_prediction = gpt4_turbo_predict_with_cache(patient_summary, retrieved_summaries)

    # Adaptive Hybrid Ensemble
    if gpt_prediction is not None:
        ensemble_prediction = int(np.round(
        0.3 * knn_prediction + 0.5 * gpt_prediction + 0.2 * log_reg_prediction
        ))

        return ensemble_prediction
    return knn_prediction

# Evaluation with Progress Indicator
k = 5
predictions = []

print("Evaluating the enhanced ensemble model with GPT-4-turbo predictions on a subset...")
for i, test_embedding in tqdm(enumerate(test_embeddings), total=len(test_embeddings)):
    prediction = process_sample(i, test_embedding, test_df, train_df, index, k)
    predictions.append(prediction)

# Calculate Evaluation Metrics
accuracy = accuracy_score(test_labels, predictions)
precision = precision_score(test_labels, predictions)
recall = recall_score(test_labels, predictions)
f1 = f1_score(test_labels, predictions)

# Display the Results
print("\nSubset Evaluation Results with GPT-4-turbo:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-Score: {f1:.2f}")


Evaluating on a subset of size: 49
Evaluating the enhanced ensemble model with GPT-4-turbo predictions on a subset...


100%|██████████| 49/49 [07:08<00:00,  8.74s/it]


Subset Evaluation Results with GPT-4-turbo:
Accuracy: 0.90
Precision: 0.95
Recall: 0.82
F1-Score: 0.88





FINAL

In [None]:
import openai

openai.api_key = "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"
print("API Key Set:", openai.api_key)


API Key Set: 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


In [None]:
import json
import numpy as np
import pandas as pd
import openai
import faiss
import time
from tqdm import tqdm
from sklearn.preprocessing import normalize
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import concurrent.futures

class EnhancedRAGModel:
    def __init__(self, file_path):
        self.file_path = file_path
        self.response_cache = {}
        self._load_data()
        self._initialize_models()

    def _load_data(self):
        print("Loading and preprocessing data...")
        data = [json.loads(line) for line in open(self.file_path, "r")]
        self.df = pd.DataFrame(data)
        self.df['readmission_status'] = self.df['readmission_status'].map({'yes': 1, 'no': 0})
        self.df.dropna(subset=['readmission_status'], inplace=True)

        # Normalize embeddings
        self.embeddings = normalize(np.array(self.df['embeddings'].tolist()))
        self.labels = self.df['readmission_status'].values
        self.summaries = self.df['enhanced_summary'].values

    def _initialize_models(self):
        print("Initializing models...")
        train_embeddings = self.embeddings
        self.embedding_dim = train_embeddings.shape[1]

        # Build FAISS HNSW Index
        self.index = faiss.IndexHNSWFlat(self.embedding_dim, 32)
        self.index.hnsw.efSearch = 100
        self.index.add(train_embeddings)

        # Train Logistic Regression Model
        self.log_reg = LogisticRegression(random_state=42, max_iter=1000)
        self.log_reg.fit(train_embeddings, self.labels)

    def hash_prompt(self, prompt):
        return hash(prompt)

    def gpt4_turbo_predict(self, patient_summary, retrieved_summaries):
        prompt = (
            f"Patient's detailed summary:\n{patient_summary}\n"
            f"Retrieved similar patient cases:\n{retrieved_summaries}\n"
            "Based on medical history, risk factors, and retrieved cases, will the patient likely be readmitted within 30 days? "
            "Answer 'yes' or 'no' and explain your reasoning."
        )
        prompt_hash = self.hash_prompt(prompt)

        # Check cache
        if prompt_hash in self.response_cache:
            return self.response_cache[prompt_hash]

        # API call with retries
        max_retries = 5
        retries = 0
        while retries < max_retries:
            try:
                response = openai.ChatCompletion.create(
                    model="gpt-4-turbo",
                    messages=[{"role": "user", "content": prompt}],
                    temperature=0.2
                )
                answer = response['choices'][0]['message']['content'].strip().lower()
                prediction = 1 if "yes" in answer else 0

                # Cache response
                self.response_cache[prompt_hash] = prediction
                return prediction
            except openai.error.RateLimitError:
                wait_time = 2 ** (retries + 1)
                print(f"Rate limit reached. Retrying in {wait_time} seconds...")
                time.sleep(wait_time)
                retries += 1
            except Exception as e:
                print(f"Error: {e}")
                return None
        return None

    def process_sample(self, i):
        test_embedding = self.embeddings[i]
        patient_summary = self.summaries[i]

        # Precompute FAISS Neighbors
        D, I = self.index.search(np.array([test_embedding]), k=5)
        neighbor_labels = np.array([self.labels[idx] for idx in I[0]])
        weights = np.exp(-D[0])
        knn_prediction = int(np.round(np.dot(weights, neighbor_labels) / weights.sum()))

        # Logistic Regression Prediction
        log_reg_prediction = self.log_reg.predict([test_embedding])[0]

        # GPT-4-Turbo Prediction
        retrieved_summaries = " ".join(self.summaries[idx] for idx in I[0])
        gpt_prediction = self.gpt4_turbo_predict(patient_summary, retrieved_summaries)

        # Ensemble Prediction
        if gpt_prediction is not None:
            final_prediction = int(np.round(
                0.3 * knn_prediction + 0.5 * gpt_prediction + 0.2 * log_reg_prediction
            ))
            return final_prediction
        return knn_prediction

    def evaluate_model(self):
        predictions = []
        print("Evaluating the model with parallel processing...")

        # Use ThreadPoolExecutor for parallel processing
        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = [executor.submit(self.process_sample, i) for i in range(len(self.embeddings))]
            for future in tqdm(concurrent.futures.as_completed(futures), total=len(self.embeddings)):
                predictions.append(future.result())

        # Calculate metrics
        accuracy = accuracy_score(self.labels, predictions)
        precision = precision_score(self.labels, predictions)
        recall = recall_score(self.labels, predictions)
        f1 = f1_score(self.labels, predictions)

        print("\nOptimized Model Evaluation Results:")
        print(f"Accuracy: {accuracy:.2f}")
        print(f"Precision: {precision:.2f}")
        print(f"Recall: {recall:.2f}")
        print(f"F1-Score: {f1:.2f}")

# Usage Example
if __name__ == "__main__":
    file_path = "final_patient_summaries_with_embeddings.json"
    model = EnhancedRAGModel(file_path)
    model.evaluate_model()


Loading and preprocessing data...
Initializing models...
Evaluating the model with parallel processing...


100%|██████████| 1224/1224 [30:57<00:00,  1.52s/it]


Optimized Model Evaluation Results:
Accuracy: 0.91
Precision: 0.92
Recall: 0.90
F1-Score: 0.91





Save the Model Artifacts

In [None]:
import pickle

# Save the trained model, FAISS index, and response cache
model_artifacts = {
    "log_reg": model.log_reg,
    "faiss_index": model.index,
    "response_cache": model.response_cache,
    "embeddings": model.embeddings,
    "labels": model.labels,
    "summaries": model.summaries
}

# Save the model artifacts to a file
save_path = "enhanced_rag_model_artifacts.pkl"
with open(save_path, "wb") as f:
    pickle.dump(model_artifacts, f)

print(f"Model artifacts saved to {save_path}")


Model artifacts saved to enhanced_rag_model_artifacts.pkl


In [None]:
import pickle

# Load the model artifacts
with open("enhanced_rag_model_artifacts.pkl", "rb") as f:
    loaded_artifacts = pickle.load(f)

# Access components
log_reg = loaded_artifacts["log_reg"]
faiss_index = loaded_artifacts["faiss_index"]
response_cache = loaded_artifacts["response_cache"]
embeddings = loaded_artifacts["embeddings"]
labels = loaded_artifacts["labels"]
summaries = loaded_artifacts["summaries"]

print("Model artifacts loaded successfully.")


Model artifacts loaded successfully.
