In [None]:
import pandas as pd
import numpy as np

# Mock implementations of models for demonstration purposes
class Model:
    def __init__(self, name):
        self.name = name

    def preprocess(self, dataset):
        print(f"Preprocessing dataset: {dataset} for {self.name}")
        return f"processed_{dataset}_{self.name}"

    def pretrain(self, data):
        print(f"Pretraining {self.name} with data: {data}")
        return f"pretrained_{data}_{self.name}"

    def fine_tune(self, data, task):
        print(f"Fine-tuning {self.name} on task: {task} with data: {data}")
        return f"fine_tuned_{task}_{data}_{self.name}"

    def evaluate(self, dataset):
        print(f"Evaluating {self.name} on dataset: {dataset}")
        # Mock evaluation metrics
        return {
            "f1_score": np.random.uniform(0.8, 1.0),
            "accuracy": np.random.uniform(0.8, 1.0),
        }

# Instantiate models
models = [
    Model("TOSICA"),
    Model("ScMMT"),
    #Model("ScBERT"),
    #Model("XTrimoGene"),
    #Model("ScGPT"),
]

def select_task():
    tasks = ["Cell Type Annotation", "Protein Prediction"]
    print("Available tasks:")
    for i, task in enumerate(tasks):
        print(f"{i + 1}. {task}")
    
    choice = int(input("Select the task (1 or 2): "))
    return tasks[choice - 1]

def select_dataset():
    datasets = ["Dataset_A", "Dataset_B", "Dataset_C"]
    print("Available datasets:")
    for i, dataset in enumerate(datasets):
        print(f"{i + 1}. {dataset}")

    choice = int(input("Select the dataset (1, 2, or 3): "))
    return datasets[choice - 1]

def run_pipeline(task, dataset):
    results = []

    for model in models:
        print(f"\nRunning pipeline for model: {model.name}")

        # Step 1: Preprocessing
        processed_data = model.preprocess(dataset)

        # Step 2: Pretraining
        pretrained_data = model.pretrain(processed_data)

        # Step 3: Fine-tuning
        fine_tuned_model = model.fine_tune(pretrained_data, task)

        # Step 4: Evaluation
        metrics = model.evaluate(dataset)

        # Collect results
        results.append({
            "Dataset Name": dataset,
            "Classifier Used": task,
            "F1 Score": metrics["f1_score"],
            "Accuracy": metrics["accuracy"],
            "Model Name": model.name,
        })

    return results

def save_results_to_csv(results, output_file="pipeline_results.csv"):
    df = pd.DataFrame(results)
    df.to_csv(output_file, index=False)
    print(f"Results saved to {output_file}")

if __name__ == "__main__":
    # User selects task and dataset
    selected_task = select_task()
    selected_dataset = select_dataset()

    # Run pipeline
    pipeline_results = run_pipeline(selected_task, selected_dataset)

    # Save results
    save_results_to_csv(pipeline_results)
