In [1]:
import argparse
import os
import torch
import csv
from scgpt import SCGPTModel, preprocess_scgpt_input
from tosica import TOSICAModel, preprocess_tosica_input

# Define a function to load the models
def load_models():
    scgpt_model = SCGPTModel.load_pretrained("path_to_scgpt_model")
    tosica_model = TOSICAModel.load_pretrained("path_to_tosica_model")
    return scgpt_model, tosica_model

# Function to preprocess input for scGPT
def preprocess_input_scgpt(dataset_path):
    return preprocess_scgpt_input(dataset_path)

# Function to preprocess input for TOSICA
def preprocess_input_tosica(dataset_path):
    return preprocess_tosica_input(dataset_path)

# Function to save results to a CSV file
def save_results_to_csv(results, dataset, task, output_file="results.csv"):
    with open(output_file, mode="w", newline="") as file:
        writer = csv.writer(file)
        # Write the header
        writer.writerow(["Dataset", "Task", "Classifier", "Accuracy", "F1 Score"])
        
        # Write results for scGPT
        writer.writerow([
            dataset, task, "scGPT", results["scGPT_output"]["accuracy"], results["scGPT_output"]["f1_score"]
        ])
        
        # Write results for TOSICA
        writer.writerow([
            dataset, task, "TOSICA", results["TOSICA_output"]["accuracy"], results["TOSICA_output"]["f1_score"]
        ])

    print(f"Results saved to {output_file}")

# Function to perform the pipeline
def run_pipeline(dataset, task):
    # Load the models
    scgpt_model, tosica_model = load_models()

    # Preprocess the dataset for each model
    scgpt_input = preprocess_input_scgpt(dataset)
    tosica_input = preprocess_input_tosica(dataset)

    # Perform the task using scGPT
    if task == "cell_type_annotation":
        print("Running scGPT for cell type annotation...")
        scgpt_output = scgpt_model.annotate_cell_types(scgpt_input)
        print("scGPT output:", scgpt_output)

        print("\nRunning TOSICA for cell type annotation...")
        tosica_output = tosica_model.annotate_cell_types(tosica_input)
        print("TOSICA output:", tosica_output)

    else:
        raise ValueError(f"Task '{task}' is not supported by this pipeline.")

    # Combine and return the outputs
    return {
        "scGPT_output": {
            "accuracy": scgpt_output.get("accuracy", "N/A"),
            "f1_score": scgpt_output.get("f1_score", "N/A")
        },
        "TOSICA_output": {
            "accuracy": tosica_output.get("accuracy", "N/A"),
            "f1_score": tosica_output.get("f1_score", "N/A")
        }
    }

if __name__ == "__main__":
    # Parse input arguments
    parser = argparse.ArgumentParser(description="Pipeline for scGPT and TOSICA models.")
    parser.add_argument("--dataset", type=str, required=True, help="Path to the input dataset.")
    parser.add_argument("--task", type=str, required=True, choices=["cell_type_annotation"], help="Task to perform.")
    parser.add_argument("--output", type=str, default="results.csv", help="Path to save the output CSV file.")

    args = parser.parse_args()

    # Run the pipeline
    results = run_pipeline(args.dataset, args.task)

    # Save results to a CSV file
    save_results_to_csv(results, args.dataset, args.task, args.output)

    # Save or print the results
    print("\nPipeline results:")
    print(results)


ModuleNotFoundError: No module named 'scgpt'