# Before FineTuning

In [19]:
import pandas as pd
import google.generativeai as genai
import nltk
from nltk.corpus import stopwords
from typing import Dict
import time

class DiagnosisEvaluator:
    def __init__(self, api_key: str):
        self.setup_nlp()
        genai.configure(api_key=api_key)
        self.model = genai.GenerativeModel("gemini-pro")
        self.instruction = "You are a medical diagnosis assistant. Given the following condition, provide only the most probable diagnosis. Start with 'Diagnosis:'"

    @staticmethod
    def setup_nlp():
        try:
            nltk.data.find('corpora/stopwords')
        except LookupError:
            nltk.download('stopwords', quiet=True)

    def evaluate_case(self, condition: str, correct_diagnosis: str) -> Dict:
        # Get model's diagnosis
        response = self.model.generate_content(f"{self.instruction} {condition}")
        model_output = " ".join([part.text for part in response.parts])
        first_diagnosis = model_output.split('.')[0].strip()  # Only take first sentence
        
        # Compare with correct diagnosis using keyword matching
        stop_words = set(stopwords.words('english'))
        correct_keywords = set(correct_diagnosis.lower().split()) - stop_words
        output_keywords = set(first_diagnosis.lower().split()) - stop_words
        correct_count = len(correct_keywords & output_keywords)
        accuracy = (correct_count / len(correct_keywords)) * 100 if correct_keywords else 0
        
        return {
            'diagnosis': first_diagnosis,
            'is_correct': accuracy >= 50
        }

def evaluate_diagnoses(file_path: str, api_key: str):
    evaluator = DiagnosisEvaluator(api_key)
    test_data = pd.read_excel(file_path)
    correct_count = 0
    
    print("\nEvaluating diagnoses...\n")
    for idx, row in test_data.iterrows():
        result = evaluator.evaluate_case(row['Patient Conditions'], row['Correct Diagnosis'])
        
        print(f"Case {idx + 1}:")
        print(f"Correct: {row['Correct Diagnosis']}")
        print(f"Model: {result['diagnosis']}")
        print(f"Result: {'Correct' if result['is_correct'] else 'Wrong'}\n")
        
        if result['is_correct']:
            correct_count += 1
        time.sleep(3)

    accuracy = (correct_count / len(test_data)) * 100
    print("=" * 40)
    print(f"Overall Accuracy: {accuracy:.2f}%")
    print(f"Correct Cases: {correct_count}/{len(test_data)}")

if __name__ == "__main__":
    API_KEY = "AIzaSyBUq85-ab1E9yzpmue7s0CG_3ZhwhU6z7w"
    FILE_PATH = "Test_Data.xlsx"
    evaluate_diagnoses(FILE_PATH, API_KEY)


Evaluating diagnoses...

Case 1:
Correct: Cerebral Malaria
Model: Diagnosis: Malaria
Result: Correct

Case 2:
Correct: Sepsis
Model: Diagnosis: Dysphagia secondary to esophageal adenocarcinoma
Result: Wrong

Case 3:
Correct: type I necrotizing fasciitis, sepsis 
Model: Diagnosis: Perforated appendicitis with abscess
Result: Wrong

Case 4:
Correct: Septic arthritis, impending avascular necrosis, brucella infection 
Model: Diagnosis: Septic arthritis
Result: Wrong

Case 5:
Correct: sepsis, cellulitis, aortitis, tricuspid insufficiency, septic vegetation, endocarditis 
Model: Diagnosis: Necrotizing fasciitis
Result: Wrong

Case 6:
Correct: salmonella minor infection, sepsis, mycotic aortic aneurysm, aortitis, abscess, drug allergy 
Model: Diagnosis: Salmonella mycotic aneurysm
Result: Wrong

Case 7:
Correct:  indigestion 
Model: Diagnosis: Vasovagal syncope
Result: Wrong

Case 8:
Correct: urosepsis
Model: Diagnosis: Fournier's gangrene
Result: Wrong

Case 9:
Correct: Septic arthritis
Mod

# Setting 1 

In [11]:
import pandas as pd
import google.generativeai as genai
import nltk
from nltk.corpus import stopwords
from typing import Dict
import time

class DiagnosisEvaluator:
    def __init__(self, api_key: str):
        self.setup_nlp()
        genai.configure(api_key=api_key)
        # Changed to use the Setting 2 fine-tuned model
        self.model = genai.GenerativeModel("tunedModels/finetuningdata-setting1-vfpsv26d5h48")
        self.instruction = "You are a medical diagnosis assistant. Given the following condition, provide only the most probable diagnosis. Start with 'Diagnosis:'"

    @staticmethod
    def setup_nlp():
        try:
            nltk.data.find('corpora/stopwords')
        except LookupError:
            nltk.download('stopwords', quiet=True)

    def evaluate_case(self, condition: str, correct_diagnosis: str) -> Dict:
        # Get model's diagnosis
        response = self.model.generate_content(f"{self.instruction} {condition}")
        model_output = " ".join([part.text for part in response.parts])
        first_diagnosis = model_output.split('.')[0].strip()  # Only take first sentence
        
        # Compare with correct diagnosis using keyword matching
        stop_words = set(stopwords.words('english'))
        correct_keywords = set(correct_diagnosis.lower().split()) - stop_words
        output_keywords = set(first_diagnosis.lower().split()) - stop_words
        correct_count = len(correct_keywords & output_keywords)
        accuracy = (correct_count / len(correct_keywords)) * 100 if correct_keywords else 0
        
        return {
            'diagnosis': first_diagnosis,
            'is_correct': accuracy >= 50
        }

def evaluate_diagnoses(file_path: str, api_key: str):
    evaluator = DiagnosisEvaluator(api_key)
    test_data = pd.read_excel(file_path)
    correct_count = 0
    
    print("\nEvaluating diagnoses...\n")
    for idx, row in test_data.iterrows():
        result = evaluator.evaluate_case(row['Patient Conditions'], row['Correct Diagnosis'])
        
        print(f"Case {idx + 1}:")
        print(f"Correct: {row['Correct Diagnosis']}")
        print(f"Model: {result['diagnosis']}")
        print(f"Result: {'Correct' if result['is_correct'] else 'Wrong'}\n")
        
        if result['is_correct']:
            correct_count += 1
        time.sleep(1)

    accuracy = (correct_count / len(test_data)) * 100
    print("=" * 40)
    print(f"Overall Accuracy: {accuracy:.2f}%")
    print(f"Correct Cases: {correct_count}/{len(test_data)}")

if __name__ == "__main__":
    API_KEY = "AIzaSyBUq85-ab1E9yzpmue7s0CG_3ZhwhU6z7w"
    FILE_PATH = "Test_Data.xlsx"
    evaluate_diagnoses(FILE_PATH, API_KEY)


Evaluating diagnoses...

Case 1:
Correct: Cerebral Malaria
Model: Diagnosis: Meningitis
Result: Wrong

Case 2:
Correct: Sepsis
Model: Diagnosis: Esophageal cancer
Result: Wrong

Case 3:
Correct: type I necrotizing fasciitis, sepsis 
Model: Diagnosis: Perforated Appendicitis with Retroperitoneal Abscess
Result: Wrong

Case 4:
Correct: Septic arthritis, impending avascular necrosis, brucella infection 
Model: Diagnosis: Septic Arthritis
Result: Wrong

Case 5:
Correct: sepsis, cellulitis, aortitis, tricuspid insufficiency, septic vegetation, endocarditis 
Model: Diagnosis: Alcohol-induced Septic Shock with Endocarditis
Result: Wrong

Case 6:
Correct: salmonella minor infection, sepsis, mycotic aortic aneurysm, aortitis, abscess, drug allergy 
Model: Diagnosis: Salmonella Mycotic Aneurysm
Result: Wrong

Case 7:
Correct:  indigestion 
Model: Diagnosis: Vasovagal Syncope
Result: Wrong

Case 8:
Correct: urosepsis
Model: Diagnosis: Septic Shock (complication of UTI)

This is a very serious di

# Setting 2

In [9]:
import pandas as pd
import google.generativeai as genai
import nltk
from nltk.corpus import stopwords
from typing import Dict
import time

class DiagnosisEvaluator:
    def __init__(self, api_key: str):
        self.setup_nlp()
        genai.configure(api_key=api_key)
        # Changed to use the Setting 2 fine-tuned model
        self.model = genai.GenerativeModel("tunedModels/finetuningdata-setting2-1t2z30z27i4w")
        self.instruction = "You are a medical diagnosis assistant. Given the following condition, provide only the most probable diagnosis. Start with 'Diagnosis:'"

    @staticmethod
    def setup_nlp():
        try:
            nltk.data.find('corpora/stopwords')
        except LookupError:
            nltk.download('stopwords', quiet=True)

    def evaluate_case(self, condition: str, correct_diagnosis: str) -> Dict:
        # Get model's diagnosis
        response = self.model.generate_content(f"{self.instruction} {condition}")
        model_output = " ".join([part.text for part in response.parts])
        first_diagnosis = model_output.split('.')[0].strip()  # Only take first sentence
        
        # Compare with correct diagnosis using keyword matching
        stop_words = set(stopwords.words('english'))
        correct_keywords = set(correct_diagnosis.lower().split()) - stop_words
        output_keywords = set(first_diagnosis.lower().split()) - stop_words
        correct_count = len(correct_keywords & output_keywords)
        accuracy = (correct_count / len(correct_keywords)) * 100 if correct_keywords else 0
        
        return {
            'diagnosis': first_diagnosis,
            'is_correct': accuracy >= 50
        }

def evaluate_diagnoses(file_path: str, api_key: str):
    evaluator = DiagnosisEvaluator(api_key)
    test_data = pd.read_excel(file_path)
    correct_count = 0
    
    print("\nEvaluating diagnoses...\n")
    for idx, row in test_data.iterrows():
        result = evaluator.evaluate_case(row['Patient Conditions'], row['Correct Diagnosis'])
        
        print(f"Case {idx + 1}:")
        print(f"Correct: {row['Correct Diagnosis']}")
        print(f"Model: {result['diagnosis']}")
        print(f"Result: {'Correct' if result['is_correct'] else 'Wrong'}\n")
        
        if result['is_correct']:
            correct_count += 1
        time.sleep(1)

    accuracy = (correct_count / len(test_data)) * 100
    print("=" * 40)
    print(f"Overall Accuracy: {accuracy:.2f}%")
    print(f"Correct Cases: {correct_count}/{len(test_data)}")

if __name__ == "__main__":
    API_KEY = "AIzaSyBUq85-ab1E9yzpmue7s0CG_3ZhwhU6z7w"
    FILE_PATH = "Test_Data.xlsx"
    evaluate_diagnoses(FILE_PATH, API_KEY)


Evaluating diagnoses...

Case 1:
Correct: Cerebral Malaria
Model: Diagnosis:

Chagas Disease

* Probability: High (Patient from an endemic area, travel history)
* Emergency: Immediate medical attention is needed
* Rationale:  The patient's recent travel to Central America and his current condition align with the clinical manifestations of Trypanosoma cruzi infection, which is prevalent in that region
Result: Wrong

Case 2:
Correct: Sepsis
Model: Diagnosis:

Sepsis with Multiorgan Dysfunction Syndrome

* Probability: High
* Emergency: High
* Rationale: The patient presented with hypothermia, tachycardia, leukocytosis, and organ dysfunction (hypotension, atrial fibrillation, and left femoral artery occlusion)
Result: Correct

Case 3:
Correct: type I necrotizing fasciitis, sepsis 
Model: Diagnosis:

Retroperitoneal Gas Gangrene
Result: Wrong

Case 4:
Correct: Septic arthritis, impending avascular necrosis, brucella infection 
Model: Diagnosis: Septic Arthritis

* Probability: High
* Emer