In [None]:
import logging
from smolagents import Tool, CodeAgent, LiteLLMModel

In [None]:
# Import all tools
from reclaim_tools import *

In [None]:
#2d image tools
pytorch_resnet_inference_tool = PyTorchResNetInferenceTool()
pytorch_resnet_training_tool = PyTorchResNetTrainingTool()
pytorch_vgg16_inference_tool = PyTorchVGG16InferenceTool()
pytorch_vgg16_training_tool = PyTorchVGG16TrainingTool()
pytorch_inceptionv3_inference_tool = PyTorchInceptionV3InferenceTool()
pytorch_inceptionv3_training_tool = PyTorchInceptionV3TrainingTool()
pytorch_efficientnet_training_tool = PyTorchEfficientNetV2TrainingTool()
pytorch_efficientnet_inference_tool = PyTorchEfficientNetV2InferenceTool()
performance_comparison_tool = ModelPerformanceComparisonTool()
fine_tuning_tool = PyTorchModelFineTuningTool()

#3d image tools
pytorch_3d_resnet_training_tool = PyTorch3DResNetTrainingTool()
pytorch_3d_resnet_inference_tool = PyTorch3DResNetInferenceTool()
pytorch_3d_densenet_training_tool = PyTorch3DDenseNetTrainingTool()
pytorch_3d_densenet_inference_tool = PyTorch3DDenseNetInferenceTool()
pytorch_3d_fine_tuning_tool = PyTorch3DModelFineTuningTool()

In [None]:
#gpt-4.1
model = LiteLLMModel(model_id="openai/gpt-4.1", api_key="YOUR_API_KEY")

In [None]:
image_classification_agent = CodeAgent(
    tools = [pytorch_resnet_training_tool, pytorch_resnet_inference_tool, 
             pytorch_inceptionv3_training_tool, pytorch_inceptionv3_inference_tool,
             pytorch_vgg16_training_tool, pytorch_vgg16_inference_tool,
             pytorch_efficientnet_training_tool, pytorch_efficientnet_inference_tool, 
             pytorch_3d_resnet_training_tool, pytorch_3d_resnet_inference_tool,
             pytorch_3d_densenet_training_tool, pytorch_3d_densenet_inference_tool],
    model=model,
    add_base_tools=True,
    additional_authorized_imports=['os'],
    name="image_classification_agent",
    description="Trains and deploys 2D and 3D image classification models",
    max_steps = 15
)

performance_comparison_agent = CodeAgent(
    tools = [performance_comparison_tool],
    model=model,
    add_base_tools=True,
    additional_authorized_imports=['os'],
    name="performance_comparison_agent",
    description="Compares the inference results with the training test results",
    max_steps = 15
)


fine_tuning_agent = CodeAgent(
    tools = [fine_tuning_tool, pytorch_3d_fine_tuning_tool],
    model=model,
    add_base_tools=True,
    additional_authorized_imports=['os'],
    name="fine_tuning_agent",
    description="Performs fine tuning of an existing 2D or 3D image classification model",
    max_steps = 15
)

#Master Agent
master_agent = CodeAgent(
    tools = [],
    managed_agents=[image_classification_agent,
                    performance_comparison_agent,
                    fine_tuning_agent],
    model=model,
    add_base_tools=True,
    additional_authorized_imports=['os'],
    name="master_agent",
    max_steps = 20
)

Training prompts

In [None]:
# 3d nodulemnist dataset ct 3D-Resnet50

nodule_training_prompt_resnet50 = """
Train a 3d resnet-50 model. 
The dataset is available here: "splitted_data/nodule_3d_ct/model_development/data.npz". 
Number of classes 2. 
Set patience to 10, number of epochs to 200 and batch size to 32. Use pretrained weights.
Handle class imbalance with weighted cross entropy loss and set augmentation level to moderate.
Output directory: "tests/model_development/nodule_3d_ct/3d_resnet50/training_output"
"""

nodule_inference_prompt_resnet50 = """
Use the 3D ResNet-50 model at: "tests/model_development/nodule_3d_ct/3d_resnet50/training_output/best_model.pt"
to perform inference on: "splitted_data/nodule_3d_ct/inference_dataset/data.npz".
Number of classes: 2. Use ground truth labels to compute performance metrics.
Save results to: "tests/model_development/nodule_3d_ct/3d_resnet50/inference_output"
"""

nodule_performance_prompt_resnet50 = """
Check if the performance of the model has declined. 
The training test metrics are in: "tests/model_development/nodule_3d_ct/3d_resnet50/training_output/test_metrics.json"
The inference evaluation metrics are in: "tests/model_development/nodule_3d_ct/3d_resnet50/inference_output/metrics.json"
Output folder: tests/compare_performance/nodule_3d_ct/3d_resnet50/
If the performance of the model has declined significantly use these data to fine tune it: "splitted_data/nodule_3d_ct/fine_tuning_dataset/data.npz"
Path to the model: "tests/model_development/nodule_3d_ct/3d_resnet50/training_output/best_model.pt". Path to the config file: tests/model_development/nodule_3d_ct/3d_resnet50/training_output/model_config.json'
Save the fine tuned model in: tests/fine_tuned_models/nodule_3d_ct/3d_resnet50
"""

nodule_fine_tuned_inference_prompt_resnet50 = """
Use the 3d resnet-50 model available here: "tests/fine_tuned_models/nodule_3d_ct/3d_resnet50/best_finetuned_model.pt", 
to perform inference on: "splitted_data/nodule_3d_ct/inference_dataset/data.npz".
Number of classes: 2. Use ground truth labels to compute performance metrics.
Save the evaluation output in this directory: "tests/fine_tuned_models/nodule_3d_ct/3d_resnet50/inference_output". 
"""

nodule_fine_tuned_performance_prompt_resnet50 = """
Check if the performance of the model has declined. 
The training test metrics are in: "tests/model_development/nodule_3d_ct/3d_resnet50/training_output/test_metrics.json"
The inference evaluation metrics are in: "tests/fine_tuned_models/nodule_3d_ct/3d_resnet50/inference_output/metrics.json"
Output folder: tests/fine_tuned_models/nodule_3d_ct/3d_resnet50/compare_fine_tuned_with_training_results
"""


prompts_resnet50 = [
    nodule_training_prompt_resnet50,
    nodule_inference_prompt_resnet50,
    nodule_performance_prompt_resnet50,
    nodule_fine_tuned_inference_prompt_resnet50,
    nodule_fine_tuned_performance_prompt_resnet50
]

for prompt in prompts_resnet50:
    master_agent.run(prompt)

In [None]:
# 3d nodulemnist dataset ct 3D-Densenet121

nodule_training_prompt_densenet121 = """
Train a 3d Densenet-121 model. 
The dataset is available here: "splitted_data/nodule_3d_ct/model_development/data.npz". 
Number of classes 2. 
Set patience to 10, number of epochs to 100 and batch size to 32.
Handle class imbalance and set augmentation level to moderate. Use accuracy as the evaluation metric.
Output directory: "tests/model_development/nodule_3d_ct/3d_densenet121/training_output"
"""

nodule_inference_prompt_densenet121 = """
Use the 3D Densenet-121 model at: "tests/model_development/nodule_3d_ct/3d_densenet121/training_output/best_model.pt"
to perform inference on: "splitted_data/nodule_3d_ct/inference_dataset/data.npz".
Number of classes: 2. Use ground truth labels to compute performance metrics.
Save results to: "tests/model_development/nodule_3d_ct/3d_densenet121/inference_output"
"""

nodule_performance_prompt_densenet121 = """
Check if the performance of the model has declined. 
The training test metrics are in: "tests/model_development/nodule_3d_ct/3d_densenet121/training_output/test_metrics.json"
The inference evaluation metrics are in: "tests/model_development/nodule_3d_ct/3d_densenet121/inference_output/metrics.json"
Output folder: tests/compare_performance/nodule_3d_ct/3d_densenet121/
"""

nodule_fine_tune_prompt_densenet121 = """
Use these data to fine tune the model: "splitted_data/nodule_3d_ct/fine_tuning_dataset/data.npz". Set number of epochs to 100 and patience to 10. Use accuracy as the evaluation metric. 
Use heavy augmentation. 
Set batch size to 32 and oversampling to handle class imbalance.
Path to the model: "tests/model_development/nodule_3d_ct/3d_densenet121/training_output/best_model.pt". Path to the config file: tests/model_development/nodule_3d_ct/3d_densenet121/training_output/model_config.json'
Save the fine tuned model in: tests/fine_tuned_models/nodule_3d_ct/3d_densenet121
"""

nodule_fine_tuned_inference_prompt_densenet121 = """
Use the 3d Densenet-121 model available here: "tests/fine_tuned_models/nodule_3d_ct/3d_densenet121/best_finetuned_model.pt", 
to perform inference on: "splitted_data/nodule_3d_ct/inference_dataset/data.npz".
Number of classes: 2. Use ground truth labels to compute performance metrics.
Save the evaluation output in this directory: "tests/fine_tuned_models/nodule_3d_ct/3d_densenet121/inference_output". 
"""

nodule_fine_tuned_performance_prompt_densenet121 = """
Check if the performance of the model has declined. 
The training test metrics are in: "tests/model_development/nodule_3d_ct/3d_densenet121/training_output/test_metrics.json"
The inference evaluation metrics are in: "tests/fine_tuned_models/nodule_3d_ct/3d_densenet121/inference_output/metrics.json"
Output folder: tests/fine_tuned_models/nodule_3d_ct/3d_densenet121/compare_fine_tuned_with_training_results
"""

prompts_densenet121 = [
    nodule_training_prompt_densenet121,
    nodule_inference_prompt_densenet121,
    nodule_performance_prompt_densenet121,
    nodule_fine_tune_prompt_densenet121,
    nodule_fine_tuned_inference_prompt_densenet121,
    nodule_fine_tuned_performance_prompt_densenet121
]

for prompt in prompts_densenet121:
    master_agent.run(prompt)

In [None]:
#Cardiomegaly classification models X ray

cardiomegaly_dataset_prompt_1 = """
Train a classification efficientnet model. 
The train, validation and test datasets: "splitted_data/cardiomegaly_dataset/model_development". 
Number of classes 2. 
Set the number of epochs to 100 and patience to 5. Set augmentation level to heavy. Set batch size to 64.
Output directory: tests/model_development/cardiomegaly/efficientnet/training_output
"""
cardiomegaly_dataset_prompt_2 = """
Train a classification resnet50 model. 
The train, validation and test datasets: "splitted_data/cardiomegaly_dataset/model_development". 
Number of classes 2. 
Set the number of epochs to 100 and patience to 5. Set augmentation level to heavy. Set batch size to 64.
Output directory: tests/model_development/cardiomegaly/resnet50/training_output
"""
cardiomegaly_dataset_prompt_3 = """
Train a classification vgg16 model. 
The train, validation and test datasets: "splitted_data/cardiomegaly_dataset/model_development". 
Number of classes 2.
Set the number of epochs to 100 and patience to 5. Set augmentation level to heavy. Set batch size to 64.
Output directory: tests/model_development/cardiomegaly/vgg16/training_output
"""
cardiomegaly_dataset_prompt_4 = """
Train a classification inceptionv3 model. 
The train, validation and test datasets: "splitted_data/cardiomegaly_dataset/model_development". 
Number of classes 2.
Set the number of epochs to 100 and patience to 5. Set augmentation level to heavy. Set batch size to 64.
Output directory: tests/model_development/cardiomegaly/inceptionv3/training_output
"""

prompts_cardiomegaly_dataset = [
    cardiomegaly_dataset_prompt_1,
    cardiomegaly_dataset_prompt_2,
    cardiomegaly_dataset_prompt_3,
    cardiomegaly_dataset_prompt_4
]

for prompt in prompts_cardiomegaly_dataset:
    master_agent.run(prompt)

In [None]:
#brain_tumor dataset classification models (MRI)

brain_tumor_new_dataset_training_prompt_1 = """
Train a classification efficientnet model. 
The train, validation and test datasets: "splitted_data/brain_tumor_new_dataset/model_development". 
Number of classes 3. Set batch size to 4. 
Set patience to 10 and the number of epochs to 100. Set augmentation level to moderate.
Output directory: tests/model_development/brain_tumor_new_dataset/efficientnet/training_output
"""
brain_tumor_new_dataset_training_prompt_2 = """
Train a classification resnet50 model. 
The train, validation and test datasets: "splitted_data/brain_tumor_new_dataset/model_development". 
Number of classes 3. 
Set patience to 10 and the number of epochs to 100. Set augmentation level to moderate.
Output directory: tests/model_development/brain_tumor_new_dataset/resnet50/training_output
"""
brain_tumor_new_dataset_training_prompt_3 = """
Train a classification vgg16 model. 
The train, validation and test datasets: "splitted_data/brain_tumor_new_dataset/model_development". 
Number of classes 3. 
Set patience to 10 and the number of epochs to 100. Set augmentation level to moderate.
Output directory: tests/model_development/brain_tumor_new_dataset/vgg16/training_output
"""
brain_tumor_new_dataset_training_prompt_4 = """
Train a classification inceptionv3 model. 
The train, validation and test datasets: "splitted_data/brain_tumor_new_dataset/model_development". 
Number of classes 3.
Set patience to 10 and the number of epochs to 100. Set augmentation level to moderate.
Output directory: tests/model_development/brain_tumor_new_dataset/inceptionv3/training_output
"""

prompts_brain_tumor_new_dataset = [
    brain_tumor_new_dataset_training_prompt_1,
    brain_tumor_new_dataset_training_prompt_2,
    brain_tumor_new_dataset_training_prompt_3,
    brain_tumor_new_dataset_training_prompt_4
]
for prompt in prompts_brain_tumor_new_dataset:
    master_agent.run(prompt)

In [None]:
#covid_new_dataset classification models (CT)

covid_new_dataset_training_prompt_1 = """
Train a classification efficientnet model. 
The train, validation and test datasets: "splitted_data/covid_new_dataset/model_development". 
Number of classes 2. 
Set patience to 5 and the number of epochs to 100.
Output directory: tests/model_development/covid_new_dataset/efficientnet/training_output
"""
covid_new_dataset_training_prompt_2 = """
Train a classification resnet50 model. 
The train, validation and test datasets: "splitted_data/covid_new_dataset/model_development". 
Number of classes 2. 
Use early stopping and set number of epochs to 100.
Output directory: tests/model_development/covid_new_dataset/resnet50/training_output
"""
covid_new_dataset_training_prompt_3 = """
Train a classification vgg16 model. 
The train, validation and test datasets: "splitted_data/covid_new_dataset/model_development". 
Number of classes 2. 
Use early stopping and set number of epochs to 100.
Output directory: tests/model_development/covid_new_dataset/vgg16/training_output
"""
covid_new_dataset_training_prompt_4 = """
Train a classification inceptionv3 model. 
The train, validation and test datasets: "splitted_data/covid_new_dataset/model_development". 
Number of classes 2.
Use early stopping and set number of epochs to 100. Set patience to 3. Do not use auxiliary logits.
Output directory: tests/model_development/covid_new_dataset/inceptionv3/training_output
"""

prompts_covid_new_dataset = [
    covid_new_dataset_training_prompt_1,
    covid_new_dataset_training_prompt_2,
    covid_new_dataset_training_prompt_3,
    covid_new_dataset_training_prompt_4
]
for prompt in prompts_covid_new_dataset:
    image_classification_agent.run(prompt)

In [None]:
#Pneumoniamnist_28 classification models

pneumoniamnist_training_prompt_1 = """
Train a classification efficientnet model. 
The train, validation and test datasets: "splitted_data/pneumoniamnist_28/model_development". 
Number of classes 2. 
Set the number of epochs to 150 and patience to 5.
Do not use pretrained weights. Use basic augmentation.
Output directory: tests/model_development/pneumoniamnist_28/efficientnet/training_output
"""
pneumoniamnist_training_prompt_2 = """
Train a classification resnet50 model. 
The train, validation and test datasets: "splitted_data/pneumoniamnist_28/model_development". 
Number of classes 2. 
Set the number of epochs to 150 and patience to 5.
Do not use pretrained weights. Use basic augmentation.
Output directory: tests/model_development/pneumoniamnist_28/resnet50/training_output
"""
pneumoniamnist_training_prompt_3 = """
Train a classification vgg16 model. 
The train, validation and test datasets: "splitted_data/pneumoniamnist_28/model_development". 
Number of classes 2. 
Set the number of epochs to 150 and patience to 5.
Do not use pretrained weights. Use basic augmentation.
Output directory: tests/model_development/pneumoniamnist_28/vgg16/training_output
"""
pneumoniamnist_training_prompt_4 = """
Train a classification inceptionv3 model. 
The train, validation and test datasets: "splitted_data/pneumoniamnist_28/model_development". 
Number of classes 2. 
Set the number of epochs to 150 and patience to 5.
Do not use pretrained weights. Use basic augmentation.
Output directory: tests/model_development/pneumoniamnist_28/inceptionv3/training_output
"""

pneumoniamnist_training_prompts = [
    pneumoniamnist_training_prompt_1,
    pneumoniamnist_training_prompt_2,
    pneumoniamnist_training_prompt_3,
    pneumoniamnist_training_prompt_4
]

for prompt in pneumoniamnist_training_prompts:
    master_agent.run(prompt)

Inference prompts

In [None]:
#cardiomegaly dataset

cardiomegaly_dataset_inference_prompt_1 = """
Use the efficientnet model available here: "tests/model_development/cardiomegaly/efficientnet/training_output/best_model.pt", 
to classify the images in this folder: "splitted_data/cardiomegaly_dataset/inference_dataset/inference_test".
The number of classes is 2. 
Evaluate the predictions using the ground truth labels available in:"splitted_data/cardiomegaly_dataset/inference_dataset/inference_labels.csv".
Save the evaluation output in this directory: "tests/model_development/cardiomegaly/efficientnet/inference_output". 
"""

cardiomegaly_dataset_inference_prompt_2 = """
Use the resnet50 model available here: "tests/model_development/cardiomegaly/resnet50/training_output/best_model.pt", 
to classify the images in this folder: "splitted_data/cardiomegaly_dataset/inference_dataset/inference_test".
The number of classes is 2. 
Evaluate the predictions using the ground truth labels available in:"splitted_data/cardiomegaly_dataset/inference_dataset/inference_labels.csv".
Save the evaluation output in this directory: "tests/model_development/cardiomegaly/resnet50/inference_output". 
"""

cardiomegaly_dataset_inference_prompt_3 = """
Use the vgg16 model available here: "tests/model_development/cardiomegaly/vgg16/training_output/best_model.pt", 
to classify the images in this folder: "splitted_data/cardiomegaly_dataset/inference_dataset/inference_test". Activate batch normalization.
The number of classes is 2. Model configuration file: tests/model_development/cardiomegaly/vgg16/training_output/model_config.json. Do not use batch normalization.
Evaluate the predictions using the ground truth labels available in:"splitted_data/cardiomegaly_dataset/inference_dataset/inference_labels.csv".
Save the evaluation output in this directory: "tests/model_development/cardiomegaly/vgg16/inference_output". 
"""

cardiomegaly_dataset_inference_prompt_4 = """
Use the inceptionv3 model available here: "tests/model_development/cardiomegaly/inceptionv3/training_output/best_model.pt", 
to classify the images in this folder: "splitted_data/cardiomegaly_dataset/inference_dataset/inference_test".
The number of classes is 2. Auxiliary logits set to true.
Evaluate the predictions using the ground truth labels available in:"splitted_data/cardiomegaly_dataset/inference_dataset/inference_labels.csv".
Save the evaluation output in this directory: "tests/model_development/cardiomegaly/inceptionv3/inference_output". 
"""

prompts_cardiomegaly_dataset_inference = [
    cardiomegaly_dataset_inference_prompt_1,
    cardiomegaly_dataset_inference_prompt_2,
    cardiomegaly_dataset_inference_prompt_3,
    cardiomegaly_dataset_inference_prompt_4
]

for prompt in prompts_cardiomegaly_dataset_inference:
    master_agent.run(prompt)

In [None]:
#brain_tumor_new_dataset classification models (MRI)

brain_tumor_new_dataset_inference_prompt_1 = """
Use the efficientnet model available here: "tests/model_development/brain_tumor_new_dataset/efficientnet/training_output/best_model.pt", 
to classify the images in this folder: "splitted_data/brain_tumor_new_dataset/inference_dataset/inference_test".
The number of classes is 3. 
Evaluate the predictions using the ground truth labels available in:"splitted_data/brain_tumor_new_dataset/inference_dataset/inference_labels.csv".
Save the evaluation output in this directory: "tests/model_development/brain_tumor_new_dataset/efficientnet/inference_output". 
"""

brain_tumor_new_dataset_inference_prompt_2 = """
Use the resnet50 model available here: "tests/model_development/brain_tumor_new_dataset/resnet50/training_output/best_model.pt", 
to classify the images in this folder: "splitted_data/brain_tumor_new_dataset/inference_dataset/inference_test".
The number of classes is 3. 
Evaluate the predictions using the ground truth labels available in:"splitted_data/brain_tumor_new_dataset/inference_dataset/inference_labels.csv".
Save the evaluation output in this directory: "tests/model_development/brain_tumor_new_dataset/resnet50/inference_output". 
"""

brain_tumor_new_dataset_inference_prompt_3 = """
Use the vgg16 model available here: "tests/model_development/brain_tumor_new_dataset/vgg16/training_output/best_model.pt", 
to classify the images in this folder: "splitted_data/brain_tumor_new_dataset/inference_dataset/inference_test".
The number of classes is 3. Model configuration file: tests/model_development/brain_tumor_new_dataset/vgg16/training_output/model_config.json
Evaluate the predictions using the ground truth labels available in:"splitted_data/brain_tumor_new_dataset/inference_dataset/inference_labels.csv".
Save the evaluation output in this directory: "tests/model_development/brain_tumor_new_dataset/vgg16/inference_output". 
"""

brain_tumor_new_dataset_inference_prompt_4 = """
Use the inceptionv3 model available here: "tests/model_development/brain_tumor_new_dataset/inceptionv3/training_output/best_model.pt", 
to classify the images in this folder: "splitted_data/brain_tumor_new_dataset/inference_dataset/inference_test".
The number of classes is 3. Auxiliary logits set to true.
Evaluate the predictions using the ground truth labels available in:"splitted_data/brain_tumor_new_dataset/inference_dataset/inference_labels.csv".
Save the evaluation output in this directory: "tests/model_development/brain_tumor_new_dataset/inceptionv3/inference_output". 
"""

prompts_brain_tumor_new_dataset_inference = [
    brain_tumor_new_dataset_inference_prompt_1,
    brain_tumor_new_dataset_inference_prompt_2,
    brain_tumor_new_dataset_inference_prompt_3,
    brain_tumor_new_dataset_inference_prompt_4
]

for prompt in prompts_brain_tumor_new_dataset_inference:
    master_agent.run(prompt)

In [None]:
#covid_new_dataset classification models (CT)

covid_new_dataset_inference_prompt_1 = """
Use the efficientnet model available here: "tests/model_development/covid_new_dataset/efficientnet/training_output/best_model.pt", 
to classify the images in this folder: "splitted_data/covid_new_dataset/inference_dataset/inference_test".
The number of classes is 2. 
Evaluate the predictions using the ground truth labels available in:"splitted_data/covid_new_dataset/inference_dataset/inference_labels.csv".
Save the evaluation output in this directory: "tests/model_development/covid_new_dataset/efficientnet/inference_output". 
"""

covid_new_dataset_inference_prompt_2 = """
Use the resnet50 model available here: "tests/model_development/covid_new_dataset/resnet50/training_output/best_model.pt", 
to classify the images in this folder: "splitted_data/covid_new_dataset/inference_dataset/inference_test".
The number of classes is 2. 
Evaluate the predictions using the ground truth labels available in:"splitted_data/covid_new_dataset/inference_dataset/inference_labels.csv".
Save the evaluation output in this directory: "tests/model_development/covid_new_dataset/resnet50/inference_output". 
"""

covid_new_dataset_inference_prompt_3 = """
Use the vgg16 model available here: "tests/model_development/covid_new_dataset/vgg16/training_output/best_model.pt", 
to classify the images in this folder: "splitted_data/covid_new_dataset/inference_dataset/inference_test". Activate batch normalization.
The number of classes is 2. Model configuration file: tests/model_development/covid_new_dataset/vgg16/training_output/model_config.json
Evaluate the predictions using the ground truth labels available in:"splitted_data/covid_new_dataset/inference_dataset/inference_labels.csv".
Save the evaluation output in this directory: "tests/model_development/covid_new_dataset/vgg16/inference_output". 
"""

covid_new_dataset_inference_prompt_4 = """
Use the inceptionv3 model available here: "tests/model_development/covid_new_dataset/inceptionv3/training_output/best_model.pt", 
to classify the images in this folder: "splitted_data/covid_new_dataset/inference_dataset/inference_test".
The number of classes is 2. Auxiliary logits set to true.
Evaluate the predictions using the ground truth labels available in:"splitted_data/covid_new_dataset/inference_dataset/inference_labels.csv".
Save the evaluation output in this directory: "tests/model_development/covid_new_dataset/inceptionv3/inference_output". 
"""

prompts_covid_new_dataset_inference = [
    covid_new_dataset_inference_prompt_1,
    covid_new_dataset_inference_prompt_2,
    covid_new_dataset_inference_prompt_3,
    covid_new_dataset_inference_prompt_4
]

for prompt in prompts_covid_new_dataset_inference:
    master_agent.run(prompt)

In [None]:
#pneumoniamnist_28 classification models

pneumoniamnist_inference_prompt_1 = """
Use the efficientnet model available here: "tests/model_development/pneumoniamnist_28/efficientnet/training_output/best_model.pt", 
to classify the images in this folder: "splitted_data/pneumoniamnist_28/inference_dataset/inference_test".
The number of classes is 2. 
Evaluate the predictions using the ground truth labels available in:"splitted_data/pneumoniamnist_28/inference_dataset/inference_labels.csv".
Save the evaluation output in this directory: "tests/model_development/pneumoniamnist_28/efficientnet/inference_output". 
"""

pneumoniamnist_inference_prompt_2 = """
Use the resnet50 model available here: "tests/model_development/pneumoniamnist_28/resnet50/training_output/best_model.pt", 
to classify the images in this folder: "splitted_data/pneumoniamnist_28/inference_dataset/inference_test".
The number of classes is 2. 
Evaluate the predictions using the ground truth labels available in:"splitted_data/pneumoniamnist_28/inference_dataset/inference_labels.csv".
Save the evaluation output in this directory: "tests/model_development/pneumoniamnist_28/resnet50/inference_output". 
"""

pneumoniamnist_inference_prompt_3 = """
Use the vgg16 model available here: "tests/model_development/pneumoniamnist_28/vgg16/training_output/best_model.pt", 
to classify the images in this folder: "splitted_data/pneumoniamnist_28/inference_dataset/inference_test".
The number of classes is 2. 
Evaluate the predictions using the ground truth labels available in:"splitted_data/pneumoniamnist_28/inference_dataset/inference_labels.csv".
Save the evaluation output in this directory: "tests/model_development/pneumoniamnist_28/vgg16/inference_output". 
"""

pneumoniamnist_inference_prompt_4 = """
Use the inceptionv3 model available here: "tests/model_development/pneumoniamnist_28/inceptionv3/training_output/best_model.pt", 
to classify the images in this folder: "splitted_data/pneumoniamnist_28/inference_dataset/inference_test".
The number of classes is 2. 
Evaluate the predictions using the ground truth labels available in:"splitted_data/pneumoniamnist_28/inference_dataset/inference_labels.csv".
Save the evaluation output in this directory: "tests/model_development/pneumoniamnist_28/inceptionv3/inference_output". 
"""

pneumoniamnist_inference_prompts = [
    pneumoniamnist_inference_prompt_1,
    pneumoniamnist_inference_prompt_2,
    pneumoniamnist_inference_prompt_3,
    pneumoniamnist_inference_prompt_4
]

for prompt in pneumoniamnist_inference_prompts:
        master_agent.run(prompt)

Performance checking and fine tuning prompts

In [None]:
#cardiomegaly_dataset

cardiomegaly_dataset_performance_prompt_1 = """
Check if the performance of the model has declined. 
The training test metrics are in: tests/model_development/cardiomegaly/efficientnet/training_output/test_metrics.json
The inference evaluation metrics are in: tests/model_development/cardiomegaly/efficientnet/inference_output/metrics.json
Output folder: tests/compare_performance/cardiomegaly/efficientnet/
If the performance of the model has declined significantly use these data to fine tune it: splitted_data/cardiomegaly_dataset/fine_tuning_dataset/
Path to the model: "tests/model_development/cardiomegaly/efficientnet/training_output/best_model.pt". Path to the config file: tests/model_development/cardiomegaly/efficientnet/training_output/model_config.json'
Save the fine tuned model in: tests/fine_tuned_models/cardiomegaly/efficientnet
"""
cardiomegaly_dataset_performance_prompt_2 = """
Check if the performance of the model has declined. 
The training test metrics are in: tests/model_development/cardiomegaly/inceptionv3/training_output/test_metrics.json
The inference evaluation metrics are in: tests/model_development/cardiomegaly/inceptionv3/inference_output/metrics.json
Output folder: tests/compare_performance/cardiomegaly/inceptionv3/
If the performance of the model has declined significantly use these data to fine tune it: splitted_data/cardiomegaly_dataset/fine_tuning_dataset/
Path to the model: "tests/model_development/cardiomegaly/inceptionv3/training_output/best_model.pt". Path to the config file: tests/model_development/cardiomegaly/inceptionv3/training_output/model_config.json'
Save the fine tuned model in: tests/fine_tuned_models/cardiomegaly/inceptionv3
"""
cardiomegaly_dataset_performance_prompt_3 = """
Check if the performance of the model has declined. 
The training test metrics are in: tests/model_development/cardiomegaly/resnet50/training_output/test_metrics.json
The inference evaluation metrics are in: tests/model_development/cardiomegaly/resnet50/inference_output/metrics.json
Output folder: tests/compare_performance/cardiomegaly/resnet50/
If the performance of the model has declined significantly use these data to fine tune it: splitted_data/cardiomegaly_dataset/fine_tuning_dataset/
Path to the model: "tests/model_development/cardiomegaly/resnet50/training_output/best_model.pt". Path to the config file: tests/model_development/cardiomegaly/resnet50/training_output/model_config.json'
Save the fine tuned model in: tests/fine_tuned_models/cardiomegaly/resnet50
"""

cardiomegaly_dataset_performance_prompt_4 = """
Check if the performance of the model has declined. 
The training test metrics are in: tests/model_development/cardiomegaly/vgg16/training_output/test_metrics.json
The inference evaluation metrics are in: tests/model_development/cardiomegaly/vgg16/inference_output/metrics.json
Output folder: tests/compare_performance/cardiomegaly/vgg16/
If the performance of the model has declined significantly use these data to fine tune it: splitted_data/cardiomegaly_dataset/fine_tuning_dataset/
Path to the model: "tests/model_development/cardiomegaly/vgg16/training_output/best_model.pt". Path to the config file: tests/model_development/cardiomegaly/vgg16/training_output/model_config.json'
Save the fine tuned model in: tests/fine_tuned_models/cardiomegaly/vgg16
"""


cardiomegaly_dataset_performance_prompts = [
    cardiomegaly_dataset_performance_prompt_1,
    cardiomegaly_dataset_performance_prompt_2,
    cardiomegaly_dataset_performance_prompt_3,
    cardiomegaly_dataset_performance_prompt_4
]

for prompt in cardiomegaly_dataset_performance_prompts:
    master_agent.run(prompt)

In [None]:
#brain_tumor_new_dataset

brain_tumor_new_dataset_performance_prompt_1 = """
Check if the performance of the model has declined. 
The training test metrics are in: tests/model_development/brain_tumor_new_dataset/efficientnet/training_output/test_metrics.json
The inference evaluation metrics are in: tests/model_development/brain_tumor_new_dataset/efficientnet/inference_output/metrics.json
Output folder: tests/compare_performance/brain_tumor_new_dataset/efficientnet/
If the performance of the model has declined significantly use these data to fine tune it: splitted_data/brain_tumor_new_dataset/fine_tuning_dataset/
Fine tuning strategy: full. Augmentation level: moderate. Batch size: 4. Number of epochs: 100. Patience: 10.
Path to the model: "tests/model_development/brain_tumor_new_dataset/efficientnet/training_output/best_model.pt". Path to the config file: tests/model_development/brain_tumor_new_dataset/efficientnet/training_output/model_config.json'
Save the fine tuned model in: tests/fine_tuned_models/brain_tumor_new_dataset/efficientnet
"""
brain_tumor_new_dataset_performance_prompt_2 = """
Check if the performance of the model has declined. 
The training test metrics are in: tests/model_development/brain_tumor_new_dataset/inceptionv3/training_output/test_metrics.json
The inference evaluation metrics are in: tests/model_development/brain_tumor_new_dataset/inceptionv3/inference_output/metrics.json
Output folder: tests/compare_performance/brain_tumor_new_dataset/inceptionv3/
If the performance of the model has declined significantly use these data to fine tune it: splitted_data/brain_tumor_new_dataset/fine_tuning_dataset/
Path to the model: "tests/model_development/brain_tumor_new_dataset/inceptionv3/training_output/best_model.pt". Path to the config file: tests/model_development/brain_tumor_new_dataset/inceptionv3/training_output/model_config.json'
Save the fine tuned model in: tests/fine_tuned_models/brain_tumor_new_dataset/inceptionv3
"""
brain_tumor_new_dataset_performance_prompt_3 = """
Check if the performance of the model has declined. 
The training test metrics are in: tests/model_development/brain_tumor_new_dataset/resnet50/training_output/test_metrics.json
The inference evaluation metrics are in: tests/model_development/brain_tumor_new_dataset/resnet50/inference_output/metrics.json
Output folder: tests/compare_performance/brain_tumor_new_dataset/resnet50/
If the performance of the model has declined significantly use these data to fine tune it: splitted_data/brain_tumor_new_dataset/fine_tuning_dataset/
Fine tuning strategy: full. Augmentation level: moderate. Batch size: 4. Number of epochs: 100. Patience: 10.
Path to the model: "tests/model_development/brain_tumor_new_dataset/resnet50/training_output/best_model.pt". Path to the config file: tests/model_development/brain_tumor_new_dataset/resnet50/training_output/model_config.json'
Save the fine tuned model in: tests/fine_tuned_models/brain_tumor_new_dataset/resnet50
"""

brain_tumor_new_dataset_performance_prompt_4 = """
Check if the performance of the model has declined. 
The training test metrics are in: tests/model_development/brain_tumor_new_dataset/vgg16/training_output/test_metrics.json
The inference evaluation metrics are in: tests/model_development/brain_tumor_new_dataset/vgg16/inference_output/metrics.json
Output folder: tests/compare_performance/brain_tumor_new_dataset/vgg16/
If the performance of the model has declined significantly use these data to fine tune it: splitted_data/brain_tumor_new_dataset/fine_tuning_dataset/
Path to the model: "tests/model_development/brain_tumor_new_dataset/vgg16/training_output/best_model.pt". Path to the config file: tests/model_development/brain_tumor_new_dataset/vgg16/training_output/model_config.json'
Save the fine tuned model in: tests/fine_tuned_models/brain_tumor_new_dataset/vgg16
"""


brain_tumor_new_dataset_performance_prompts = [
    brain_tumor_new_dataset_performance_prompt_1,
    brain_tumor_new_dataset_performance_prompt_2,
    brain_tumor_new_dataset_performance_prompt_3,
    brain_tumor_new_dataset_performance_prompt_4
]

for prompt in brain_tumor_new_dataset_performance_prompts:
    master_agent.run(prompt)

In [None]:
#covid_new_dataset

covid_new_dataset_performance_prompt_1 = """
Check if the performance of the model has declined. 
The training test metrics are in: tests/model_development/covid_new_dataset/efficientnet/training_output/test_metrics.json
The inference evaluation metrics are in: tests/model_development/covid_new_dataset/efficientnet/inference_output/metrics.json
Output folder: tests/compare_performance/covid_new_dataset/efficientnet/
If the performance of the model has declined significantly use these data to fine tune it: splitted_data/covid_new_dataset/fine_tuning_dataset/
Fine tuning strategy: full and set patient to 15.
Path to the model: "tests/model_development/covid_new_dataset/efficientnet/training_output/best_model.pt". Path to the config file: tests/model_development/covid_new_dataset/efficientnet/training_output/model_config.json'
Save the fine tuned model in: tests/fine_tuned_models/covid_new_dataset/efficientnet
"""
covid_new_dataset_performance_prompt_2 = """
Check if the performance of the model has declined. 
The training test metrics are in: tests/model_development/covid_new_dataset/inceptionv3/training_output/test_metrics.json
The inference evaluation metrics are in: tests/model_development/covid_new_dataset/inceptionv3/inference_output/metrics.json
Output folder: tests/compare_performance/covid_new_dataset/inceptionv3/
If the performance of the model has declined significantly use these data to fine tune it: splitted_data/covid_new_dataset/fine_tuning_dataset/
Path to the model: "tests/model_development/covid_new_dataset/inceptionv3/training_output/best_model.pt". Path to the config file: tests/model_development/covid_new_dataset/inceptionv3/training_output/model_config.json'
Save the fine tuned model in: tests/fine_tuned_models/covid_new_dataset/inceptionv3
"""
covid_new_dataset_performance_prompt_3 = """
Check if the performance of the model has declined. 
The training test metrics are in: tests/model_development/covid_new_dataset/resnet50/training_output/test_metrics.json
The inference evaluation metrics are in: tests/model_development/covid_new_dataset/resnet50/inference_output/metrics.json
Output folder: tests/compare_performance/covid_new_dataset/resnet50/
If the performance of the model has declined significantly use these data to fine tune it: splitted_data/covid_new_dataset/fine_tuning_dataset/
Path to the model: "tests/model_development/covid_new_dataset/resnet50/training_output/best_model.pt". Path to the config file: tests/model_development/covid_new_dataset/resnet50/training_output/model_config.json'
Save the fine tuned model in: tests/fine_tuned_models/covid_new_dataset/resnet50
"""

covid_new_dataset_performance_prompt_4 = """
Check if the performance of the model has declined. 
The training test metrics are in: tests/model_development/covid_new_dataset/vgg16/training_output/test_metrics.json
The inference evaluation metrics are in: tests/model_development/covid_new_dataset/vgg16/inference_output/metrics.json
Output folder: tests/compare_performance/covid_new_dataset/vgg16/
If the performance of the model has declined significantly use these data to fine tune it: splitted_data/covid_new_dataset/fine_tuning_dataset/
Path to the model: "tests/model_development/covid_new_dataset/vgg16/training_output/best_model.pt". Path to the config file: tests/model_development/covid_new_dataset/vgg16/training_output/model_config.json'
Save the fine tuned model in: tests/fine_tuned_models/covid_new_dataset/vgg16
"""


covid_new_dataset_performance_prompts = [
    covid_new_dataset_performance_prompt_1,
    covid_new_dataset_performance_prompt_2,
    covid_new_dataset_performance_prompt_3,
    covid_new_dataset_performance_prompt_4
]

for prompt in covid_new_dataset_performance_prompts:
    master_agent.run(prompt)

In [None]:
#pneumoniamnist_28 dataset

pneumoniamnist_28_performance_prompt_1 = """
Check if the performance of the model has declined. 
The training test metrics are in: tests/model_development/pneumoniamnist_28/efficientnet/training_output/test_metrics.json
The inference evaluation metrics are in: tests/model_development/pneumoniamnist_28/efficientnet/inference_output/metrics.json
Save the comparison results in: tests/compare_performance/pneumoniamnist_28/efficientnet/
If the performance of the model has declined significantly use these data to fine tune it: splitted_data/pneumoniamnist_28/fine_tuning_dataset/
Path to the model: "tests/model_development/pneumoniamnist_28/efficientnet/training_output/best_model.pt". Path to the config file: tests/model_development/pneumoniamnist_28/efficientnet/training_output/model_config.json'
Save the fine tuned model in: tests/fine_tuned_models/pneumoniamnist_28/efficientnet
"""
pneumoniamnist_28_performance_prompt_2 = """
Check if the performance of the model has declined. 
The training test metrics are in: tests/model_development/pneumoniamnist_28/inceptionv3/training_output/test_metrics.json
The inference evaluation metrics are in: tests/model_development/pneumoniamnist_28/inceptionv3/inference_output/metrics.json
Save the comparison results in: tests/compare_performance/pneumoniamnist_28/inceptionv3/
If the performance of the model has declined significantly use these data to fine tune it: splitted_data/pneumoniamnist_28/fine_tuning_dataset/
Path to the model: "tests/model_development/pneumoniamnist_28/inceptionv3/training_output/best_model.pt". Path to the config file: tests/model_development/pneumoniamnist_28/inceptionv3/training_output/model_config.json'
Save the fine tuned model in: tests/fine_tuned_models/pneumoniamnist_28/inceptionv3
"""
pneumoniamnist_28_performance_prompt_3 = """
Check if the performance of the model has declined. 
The training test metrics are in: tests/model_development/pneumoniamnist_28/resnet50/training_output/test_metrics.json
The inference evaluation metrics are in: tests/model_development/pneumoniamnist_28/resnet50/inference_output/metrics.json
Save the comparison results in: tests/compare_performance/pneumoniamnist_28/resnet50/
If the performance of the model has declined significantly use these data to fine tune it: splitted_data/pneumoniamnist_28/fine_tuning_dataset/
Path to the model: "tests/model_development/pneumoniamnist_28/resnet50/training_output/best_model.pt". Path to the config file: tests/model_development/pneumoniamnist_28/resnet50/training_output/model_config.json'
Save the fine tuned model in: tests/fine_tuned_models/pneumoniamnist_28/resnet50
"""

pneumoniamnist_28_performance_prompt_4 = """
Check if the performance of the model has declined. 
The training test metrics are in: tests/model_development/pneumoniamnist_28/vgg16/training_output/test_metrics.json
The inference evaluation metrics are in: tests/model_development/pneumoniamnist_28/vgg16/inference_output/metrics.json
Save the comparison results in: tests/compare_performance/pneumoniamnist_28/vgg16/
If the performance of the model has declined significantly use these data to fine tune it: splitted_data/pneumoniamnist_28/fine_tuning_dataset/
Set patient to 20 and number of epochs to 200. Do not handle class imbalance. Use accuracy as evaluation metric to monitor.
Path to the model: "tests/model_development/pneumoniamnist_28/vgg16/training_output/best_model.pt". Path to the config file: tests/model_development/pneumoniamnist_28/vgg16/training_output/model_config.json'
Save the fine tuned model in: tests/fine_tuned_models/pneumoniamnist_28/vgg16
"""

pneumoniamnist_28_performance_prompts = [
    pneumoniamnist_28_performance_prompt_1,
    pneumoniamnist_28_performance_prompt_2,
    pneumoniamnist_28_performance_prompt_3,
    pneumoniamnist_28_performance_prompt_4
]

for prompt in pneumoniamnist_28_performance_prompts:
    master_agent.run(prompt)

Inference with fine tuned models

In [None]:
#cardiomegaly_dataset fine-tuned models

cardiomegaly_dataset_ft_inference_prompt_1 = """
Use the inceptionv3 model available here: "tests/fine_tuned_models/cardiomegaly/inceptionv3/best_finetuned_model.pt", 
to classify the images in this folder: "splitted_data/cardiomegaly_dataset/inference_dataset/inference_test".
The number of classes is 2. Auxiliary logits: True
Path to the ground truth labels:"splitted_data/cardiomegaly_dataset/inference_dataset/inference_labels.csv".
Save the evaluation output in this directory: "tests/fine_tuned_models/cardiomegaly/inceptionv3/inference_output". 
"""

cardiomegaly_dataset_ft_inference_prompt_2 = """
Use the vgg16 model available here: "tests/fine_tuned_models/cardiomegaly/vgg16/best_finetuned_model.pt", 
to classify the images in this folder: "splitted_data/cardiomegaly_dataset/inference_dataset/inference_test".
The number of classes is 2. Batch normalization = True.
Path to the ground truth labels:"splitted_data/cardiomegaly_dataset/inference_dataset/inference_labels.csv". 
Save the evaluation output in this directory: "tests/fine_tuned_models/cardiomegaly/vgg16/inference_output". 
"""

for prompt in [
    cardiomegaly_dataset_ft_inference_prompt_1,
    cardiomegaly_dataset_ft_inference_prompt_2
]:
    master_agent.run(prompt)

In [None]:
#brain_tumor_new_dataset fine-tuned models

brain_tumor_new_dataset_ft_inference_prompt_1 = """
Use the inceptionv3 model available here: "tests/fine_tuned_models/brain_tumor_new_dataset/inceptionv3/best_finetuned_model.pt", 
to classify the images in this folder: "splitted_data/brain_tumor_new_dataset/inference_dataset/inference_test".
The number of classes is 3. Auxiliary logits: True
Path to the ground truth labels:"splitted_data/brain_tumor_new_dataset/inference_dataset/inference_labels.csv".
Save the evaluation output in this directory: "tests/fine_tuned_models/brain_tumor_new_dataset/inceptionv3/inference_output". 
"""

brain_tumor_new_dataset_ft_inference_prompt_2 = """
Use the efficientnet model available here: "tests/fine_tuned_models/brain_tumor_new_dataset/efficientnet/best_finetuned_model.pt", 
to classify the images in this folder: "splitted_data/brain_tumor_new_dataset/inference_dataset/inference_test".
The number of classes is 3.
Path to the ground truth labels:"splitted_data/brain_tumor_new_dataset/inference_dataset/inference_labels.csv".
Save the evaluation output in this directory: "tests/fine_tuned_models/brain_tumor_new_dataset/efficientnet/inference_output". 
"""

brain_tumor_new_dataset_ft_inference_prompt_3 = """
Use the resnet50 model available here: "tests/fine_tuned_models/brain_tumor_new_dataset/resnet50/best_finetuned_model.pt", 
to classify the images in this folder: "splitted_data/brain_tumor_new_dataset/inference_dataset/inference_test".
The number of classes is 3.
Path to the ground truth labels:"splitted_data/brain_tumor_new_dataset/inference_dataset/inference_labels.csv".
Save the evaluation output in this directory: "tests/fine_tuned_models/brain_tumor_new_dataset/resnet50/inference_output". 
"""

for prompt in [
    brain_tumor_new_dataset_ft_inference_prompt_1,
    brain_tumor_new_dataset_ft_inference_prompt_2,
    brain_tumor_new_dataset_ft_inference_prompt_3,
]:
    master_agent.run(prompt)

In [None]:
#covid_new_dataset fine-tuned models

covid_new_dataset_ft_inference_prompt_1 = """
Use the inceptionv3 model available here: "tests/fine_tuned_models/covid_new_dataset/inceptionv3/best_finetuned_model.pt", 
to classify the images in this folder: "splitted_data/covid_new_dataset/inference_dataset/inference_test".
The number of classes is 2. Auxiliary logits: True
Path to the ground truth labels:"splitted_data/covid_new_dataset/inference_dataset/inference_labels.csv".
Save the evaluation output in this directory: "tests/fine_tuned_models/covid_new_dataset/inceptionv3/inference_output". 
"""

master_agent.run(covid_new_dataset_ft_inference_prompt_1)

In [None]:
#Pneumonia fine-tuned models

pneumonia_ft_inference_prompt_1 = """
Use the vgg16 model available here: "tests/fine_tuned_models/pneumoniamnist_28/vgg16/best_finetuned_model.pt", 
to classify the images in this folder: "splitted_data/pneumoniamnist_28/inference_dataset/inference_test".
The number of classes is 2. 
Path to the ground truth labels:"splitted_data/pneumoniamnist_28/inference_dataset/inference_labels.csv".
Save the evaluation output in this directory: "tests/fine_tuned_models/pneumoniamnist_28/vgg16/inference_output". 
"""

master_agent.run(pneumonia_ft_inference_prompt_1)

Performance comparison with fine-tuned models

In [None]:
#cardiomegaly_dataset - compare fine-tuned with training results

cardiomegaly_dataset_performance_finetuned_1 = """
Check if the performance of the model has declined. 
The training test metrics are in: tests/model_development/cardiomegaly/inceptionv3/training_output/test_metrics.json
The inference evaluation metrics are in: tests/fine_tuned_models/cardiomegaly/inceptionv3/inference_output/metrics.json
Save the evaluation output in this directory: tests/fine_tuned_models/cardiomegaly/inceptionv3/compare_fine_tuned_with_training_results". 
"""


cardiomegaly_dataset_performance_finetuned_2 = """
Check if the performance of the model has declined. 
The training test metrics are in: tests/model_development/cardiomegaly/vgg16/training_output/test_metrics.json
The inference evaluation metrics are in: tests/fine_tuned_models/cardiomegaly/vgg16/inference_output/metrics.json
Save the evaluation output in this directory: tests/fine_tuned_models/cardiomegaly/vgg16/compare_fine_tuned_with_training_results". 
"""

master_agent.run(cardiomegaly_dataset_performance_finetuned_2)

In [None]:
#brain_tumor_new_dataset - compare fine-tuned with training results

brain_tumor_new_dataset_performance_finetuned_1 = """
Check if the performance of the model has declined. 
The training test metrics are in: tests/model_development/brain_tumor_new_dataset/efficientnet/training_output/test_metrics.json
The inference evaluation metrics are in: tests/fine_tuned_models/brain_tumor_new_dataset/efficientnet/inference_output/metrics.json
Save the evaluation output in this directory: tests/fine_tuned_models/brain_tumor_new_dataset/efficientnet/compare_fine_tuned_with_training_results". 
"""

brain_tumor_new_dataset_performance_finetuned_2 = """
Check if the performance of the model has declined. 
The training test metrics are in: tests/model_development/brain_tumor_new_dataset/resnet50/training_output/test_metrics.json
The inference evaluation metrics are in: tests/fine_tuned_models/brain_tumor_new_dataset/resnet50/inference_output/metrics.json
Save the evaluation output in this directory: tests/fine_tuned_models/brain_tumor_new_dataset/resnet50/compare_fine_tuned_with_training_results". 
"""

brain_tumor_new_dataset_performance_finetuned_3 = """
Check if the performance of the model has declined. 
The training test metrics are in: tests/model_development/brain_tumor_new_dataset/inceptionv3/training_output/test_metrics.json
The inference evaluation metrics are in: tests/fine_tuned_models/brain_tumor_new_dataset/inceptionv3/inference_output/metrics.json
Save the evaluation output in this directory: tests/fine_tuned_models/brain_tumor_new_dataset/inceptionv3/compare_fine_tuned_with_training_results". 
"""

for prompt in [
    brain_tumor_new_dataset_performance_finetuned_1,
    brain_tumor_new_dataset_performance_finetuned_2,
    brain_tumor_new_dataset_performance_finetuned_3
]:
    master_agent.run(prompt)

In [None]:
#covid_new_dataset classification models (CT) - compare fine-tuned with training results

covid_new_dataset_performance_finetuned_1 = """
Check if the performance of the model has declined. 
The training test metrics are in: tests/model_development/covid_new_dataset/inceptionv3/training_output/test_metrics.json
The inference evaluation metrics are in: tests/fine_tuned_models/covid_new_dataset/inceptionv3/inference_output/metrics.json
Save the evaluation output in this directory: tests/fine_tuned_models/covid_new_dataset/inceptionv3/compare_fine_tuned_with_training_results". 
"""

master_agent.run(covid_new_dataset_performance_finetuned_1)

In [None]:
#pneumonia classification models (x-ray) - compare fine-tuned with training results

pneumonia_performance_finetuned_1 = """
Check if the performance of the model has declined. 
The training test metrics are in: tests/model_development/pneumoniamnist_28/vgg16/training_output/test_metrics.json
The inference evaluation metrics are in: tests/fine_tuned_models/pneumoniamnist_28/vgg16/inference_output/metrics.json
Save the evaluation output in this directory: "tests/fine_tuned_models/pneumoniamnist_28/vgg16/compare_fine_tuned_with_training_results". 
"""

master_agent.run(pneumonia_performance_finetuned_1)