# This is a tutorial detailing a zero-scratch implementation of the CellAgent for batch effect removal

- We extracted the core code related to the batch correction process in CellAgent and simplified it to reproduce and demonstrate the entire workflow.
- The performance of CellAgent's batch correction is primarily evaluated by calculating the batch correction metrics using Scib(https://github.com/theislab/scib). Simultaneously, a visual assessment of the Bio conservation metrics is performed by the Evaluator(GPT-4o) on the clustering images generated after de-batching. The method achieving the highest score, obtained by weighting these two metrics, will be selected by CellAgent.


**In our experiments, we found that the best performance was achieved when simultaneously using SCIB (for Batch Correction metrics) and Visual Assessment (for Bio Conservation metrics), with the weighting coefficient set to $0.01$.**


### Initial Setup

In [1]:
import requests
import os
import base64
import os
import json
import re
import pandas as pd

In [2]:
def encode_image(image_path):
    """Read an image and convert it to Base64 encoding"""
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

def load_images_from_dataset(dataset_name, root_dir="analysis_results"):
    """
    Automatically read all PNG images from the specified dataset folder and convert them to Base64.
    
    Parameters:
    - dataset_name: Dataset name, e.g. "processed_Lung_atlas_public"
    - root_dir: Root directory, default is "analysis_results"

    Returns:
    - image_files: List of image file names
    - base64_images: List containing Base64 encodings of all images
    """
    dataset_path = os.path.join(root_dir, dataset_name)  # Dataset folder path
    base64_images = []

    if not os.path.exists(dataset_path):
        print(f"Error: Directory {dataset_path} does not exist!")
        return base64_images

    # Get all .png files and sort them by name
    image_files = sorted([f for f in os.listdir(dataset_path) if f.endswith(".png")])

    for image_file in image_files:
        image_path = os.path.join(dataset_path, image_file)
        base64_image = encode_image(image_path)
        base64_images.append(base64_image)
        print(f"Processed: {image_file}")  # Print progress

    return image_files, base64_images

def extract_json_code(content):
    # Attempt to parse JSON directly
    try:
        json_string = json.loads(content)
        return json_string
    except:
        # Extract code blocks using regex
        code_block_regex = re.compile(r"```(.*?)```", re.DOTALL)
        code_blocks = code_block_regex.findall(content)
        if code_blocks:
            full_code = "\n".join(code_blocks)
            # Remove language prefix if present
            if full_code.startswith("json") or full_code.startswith("Json"):
                full_code = full_code[5:]
            return json.loads(full_code)
        else:
            return json.loads("{}")


In [None]:
def debatch_visions_eval(image_files,base64_images):
    template_English='''
    As an expert in single-cell data analysis, you understand that an optimal batch correction algorithm in single-cell RNA sequencing data analysis should minimize technical variability or batch effects while conserving biological variability. For scoring, focus on:
    Cluster integrity: When using UMAP to visualize Leiden clustering, the visual evaluation criteria mainly include within-cluster compactness (points within the same cluster should be tightly packed and form a compact shape) and between-cluster separation (different clusters should have clear boundaries with noticeable gaps between them).
    Data structure conservation: Evaluate if the corrected data maintains true biological structure without adding non-biological noise or distortions.
    Your output should be formatted as a JSON, with two keys: "rates" for rating various batch correction algorithms based on these visual criteria, "best" for the name of the algorithm with the highest score in lowercase.

    For example:
    GPT: 
    {
        "rates": {
            "integrate_method_1": {int(your rate for this method)},
            "integrate_method_2": {int(your rate for this method)},
            "integrate_method_3": {int(your rate for this method)}
        },
        "best": {the best one name chosn from keys in rates.}
    }

    An example of output is:
    {
        "rates": {
            "integrate_method_1": 6,
            "integrate_method_2": 6,
            "integrate_method_3": 6
        },
        "best": integrate_method_3
    }
    '''
    
    api_key = "OpenAI API Key"
    api_base = "https://openai.arnotho.com/v1"
    
    headers = {
    "Content-Type": "application/json",
    "Authorization": "Bearer {}".format(api_key)
    }
    content = []
    for base64_image in base64_images:
        image_dict = {
            "type": "image_url",
            "image_url": {
                "url": "data:image/jpeg;base64,{}".format(base64_image)

            }
        }
        content.append(image_dict)


    content.append({
        "type": "text",
        "text": template_English,
    })

    payload = {
        "model": "chatgpt-4o-latest",
        "messages": [
            {
            "role": "user",
            "content": content
            }
        ],
        "max_tokens": 1000,
        "temperature": 0,
        "seed":42,
    }
    response = requests.post(f"{api_base}/chat/completions", headers=headers, json=payload)
    print("The response:\n", response)
    response_json = response.json()
    restlt_text = response_json["choices"][0]["message"]["content"]
    result = extract_json_code(restlt_text)
    print("The evaluation result for debatch:\n", result)
    print("The evaluation methods for debatch:\n", image_files)
    return result 

In [4]:
def update_score_info(score_info):
    """
    This function replaces 'integrate_method_1', 'integrate_method_2', 'integrate_method_3' 
    with their corresponding integration methods, and removes the 'best' key.

    Parameters:
    - score_info (dict): Dictionary containing scores and best method information
    
    Returns:
    - dict: Updated dictionary with replaced method names and without the 'best' key
    """
    # Method name mapping
    method_mapping = {
        'integrate_method_1': 'X_pca_harmony',
        'integrate_method_2': 'X_pca_liger',
        'integrate_method_3': 'X_pca_scvi'
    }

    # Replace keys in 'rates'
    updated_rates = {method_mapping[key]: value for key, value in score_info['rates'].items()}

    # Return updated dictionary (without 'best' key)
    return {'rates': updated_rates}


In [5]:
dataset_name = "processed_Lung_atlas_public"
image_files,base64_images = load_images_from_dataset(dataset_name)
processed_Lung_atlas_public_score = debatch_visions_eval(image_files,base64_images)
processed_Lung_atlas_public_score = update_score_info(processed_Lung_atlas_public_score)

Processed: processed_Lung_atlas_public_X_pca_harmony_umap.png
Processed: processed_Lung_atlas_public_X_pca_liger_umap.png
Processed: processed_Lung_atlas_public_X_pca_scvi_umap.png
The response:
 <Response [200]>
The evaluation result for debatch:
 {'rates': {'integrate_method_1': 7, 'integrate_method_2': 6, 'integrate_method_3': 8}, 'best': 'integrate_method_3'}
The evaluation methods for debatch:
 ['processed_Lung_atlas_public_X_pca_harmony_umap.png', 'processed_Lung_atlas_public_X_pca_liger_umap.png', 'processed_Lung_atlas_public_X_pca_scvi_umap.png']


In [6]:
dataset_name = "processed_Perinhrinal_cortex"
image_files,base64_images = load_images_from_dataset(dataset_name)
processed_Perinhrinal_cortex_score = debatch_visions_eval(image_files,base64_images)
processed_Perinhrinal_cortex_score = update_score_info(processed_Perinhrinal_cortex_score)

Processed: processed_Perinhrinal_cortex_X_pca_harmony_umap.png
Processed: processed_Perinhrinal_cortex_X_pca_liger_umap.png
Processed: processed_Perinhrinal_cortex_X_pca_scvi_umap.png
The response:
 <Response [200]>
The evaluation result for debatch:
 {'rates': {'integrate_method_1': 7, 'integrate_method_2': 5, 'integrate_method_3': 9}, 'best': 'integrate_method_3'}
The evaluation methods for debatch:
 ['processed_Perinhrinal_cortex_X_pca_harmony_umap.png', 'processed_Perinhrinal_cortex_X_pca_liger_umap.png', 'processed_Perinhrinal_cortex_X_pca_scvi_umap.png']


In [7]:
dataset_name = "processed_human_pancreas_norm_complexBatch"
image_files,base64_images = load_images_from_dataset(dataset_name)
processed_human_pancreas_norm_complexBatch_score = debatch_visions_eval(image_files,base64_images)
processed_human_pancreas_norm_complexBatch_score = update_score_info(processed_human_pancreas_norm_complexBatch_score)
processed_human_pancreas_norm_complexBatch_score

Processed: processed_human_pancreas_norm_complexBatch_X_pca_harmony_umap.png
Processed: processed_human_pancreas_norm_complexBatch_X_pca_liger_umap.png
Processed: processed_human_pancreas_norm_complexBatch_X_pca_scvi_umap.png
The response:
 <Response [200]>
The evaluation result for debatch:
 {'rates': {'integrate_method_1': 8, 'integrate_method_2': 6, 'integrate_method_3': 9}, 'best': 'integrate_method_3'}
The evaluation methods for debatch:
 ['processed_human_pancreas_norm_complexBatch_X_pca_harmony_umap.png', 'processed_human_pancreas_norm_complexBatch_X_pca_liger_umap.png', 'processed_human_pancreas_norm_complexBatch_X_pca_scvi_umap.png']


{'rates': {'X_pca_harmony': 8, 'X_pca_liger': 6, 'X_pca_scvi': 9}}

In [8]:
dataset_name = "processed_Immune_ALL_human"
image_files,base64_images = load_images_from_dataset(dataset_name)
processed_Immune_ALL_human_score = debatch_visions_eval(image_files,base64_images)
processed_Immune_ALL_human_score = update_score_info(processed_Immune_ALL_human_score)

Processed: processed_Immune_ALL_human_X_pca_harmony_umap.png
Processed: processed_Immune_ALL_human_X_pca_liger_umap.png
Processed: processed_Immune_ALL_human_X_pca_scvi_umap.png
The response:
 <Response [200]>
The evaluation result for debatch:
 {'rates': {'integrate_method_1': 6, 'integrate_method_2': 7, 'integrate_method_3': 9}, 'best': 'integrate_method_3'}
The evaluation methods for debatch:
 ['processed_Immune_ALL_human_X_pca_harmony_umap.png', 'processed_Immune_ALL_human_X_pca_liger_umap.png', 'processed_Immune_ALL_human_X_pca_scvi_umap.png']


In [9]:
dataset_name = "processed_Immune_ALL_hum_mou"
image_files,base64_images = load_images_from_dataset(dataset_name)
processed_Immune_ALL_hum_mou_score = debatch_visions_eval(image_files,base64_images)
processed_Immune_ALL_hum_mou_score = update_score_info(processed_Immune_ALL_hum_mou_score)

Processed: processed_Immune_ALL_hum_mou_X_pca_harmony_umap.png
Processed: processed_Immune_ALL_hum_mou_X_pca_liger_umap.png
Processed: processed_Immune_ALL_hum_mou_X_pca_scvi_umap.png
The response:
 <Response [200]>
The evaluation result for debatch:
 {'rates': {'integrate_method_1': 8, 'integrate_method_2': 6, 'integrate_method_3': 9}, 'best': 'integrate_method_3'}
The evaluation methods for debatch:
 ['processed_Immune_ALL_hum_mou_X_pca_harmony_umap.png', 'processed_Immune_ALL_hum_mou_X_pca_liger_umap.png', 'processed_Immune_ALL_hum_mou_X_pca_scvi_umap.png']


In [10]:
# Store each score dictionary in a single dictionary
scores_dict = {
    'processed_Lung_atlas_public': processed_Lung_atlas_public_score,
    'processed_Perinhrinal_cortex': processed_Perinhrinal_cortex_score,
    'processed_human_pancreas_norm_complexBatch': processed_human_pancreas_norm_complexBatch_score,
    'processed_Immune_ALL_human': processed_Immune_ALL_human_score,
    'processed_Immune_ALL_hum_mou': processed_Immune_ALL_hum_mou_score
}

# Initialize CellAgent_bio_data dictionary
CellAgent_bio_data = {
    'Dataset': [],
    'X_pca_harmony': [],
    'X_pca_liger': [],
    'X_pca_scvi': []
}

# Iterate through scores_dict and extract scores for each method, storing them in CellAgent_bio_data
for dataset, score_data in scores_dict.items():
    rates = score_data['rates']
    CellAgent_bio_data['Dataset'].append(dataset)
    CellAgent_bio_data['X_pca_harmony'].append(rates.get('X_pca_harmony', None))
    CellAgent_bio_data['X_pca_liger'].append(rates.get('X_pca_liger', None))
    CellAgent_bio_data['X_pca_scvi'].append(rates.get('X_pca_scvi', None))

# Convert CellAgent_bio_data into a DataFrame
CellAgent_bio = pd.DataFrame(CellAgent_bio_data)

# Display the result
CellAgent_bio

Unnamed: 0,Dataset,X_pca_harmony,X_pca_liger,X_pca_scvi
0,processed_Lung_atlas_public,7,6,8
1,processed_Perinhrinal_cortex,7,5,9
2,processed_human_pancreas_norm_complexBatch,8,6,9
3,processed_Immune_ALL_human,6,7,9
4,processed_Immune_ALL_hum_mou,8,6,9


## Option 1: Batch Correction(SCIB) and Visual Assessment,The weighting coefficient is set to $0.01$.

In [11]:
# Step 2: Define a function to calculate the weighted score
def calculate_final_score(dataset_name, method_name, original_score, benchmark_results, gpt4o_weight=0.01):
    # Get the batch_correction value
    batch_correction_value = benchmark_results.loc[
        'Batch correction',  # Row selection: 'Batch correction'
        benchmark_results.loc['Embedding'] == method_name  # Column selection: 'Embedding' equals method_name
    ]
    
    # Convert batch_correction_value to numeric type and ensure scalar extraction
    batch_correction_value = pd.to_numeric(batch_correction_value.values[0], errors='coerce')  # Extract scalar and convert to numeric
    # batch_correction_value = round(batch_correction_value, 2)

    # Calculate the weighted score
    final_score = original_score * gpt4o_weight + batch_correction_value
    return final_score, batch_correction_value  # Return weighted score and batch_correction value

# Step 3: For each dataset, dynamically load the corresponding benchmark file and calculate weighted scores
final_scores = []
for idx, row in CellAgent_bio.iterrows():
    dataset_name = row['Dataset']
    
    # Dynamically load the benchmark data based on dataset_name
    benchmark_file = f'{dataset_name}_benchmark_results.csv'  # Assume file name is Dataset_name + '_benchmark_results.csv'
    benchmark_results = pd.read_csv(benchmark_file).T  # Read and transpose
    
    # For each method, calculate weighted score and save the corresponding Batch correction score
    for method_name in ['X_pca_harmony', 'X_pca_liger', 'X_pca_scvi']:
        original_score = row[method_name]
        final_score, batch_correction_value = calculate_final_score(
            dataset_name, method_name, original_score, benchmark_results, gpt4o_weight=0.01
        )
        final_scores.append({
            'Dataset': dataset_name,
            'Method': method_name,
            'Original Score': original_score,
            'Batch correction': batch_correction_value,  # Add batch_correction score
            'Final Score': final_score
        })

# Convert results into a DataFrame
final_scores_df = pd.DataFrame(final_scores)
final_scores_df

Unnamed: 0,Dataset,Method,Original Score,Batch correction,Final Score
0,processed_Lung_atlas_public,X_pca_harmony,7,0.379411,0.449411
1,processed_Lung_atlas_public,X_pca_liger,6,0.438996,0.498996
2,processed_Lung_atlas_public,X_pca_scvi,8,0.521022,0.601022
3,processed_Perinhrinal_cortex,X_pca_harmony,7,0.894673,0.964673
4,processed_Perinhrinal_cortex,X_pca_liger,5,0.883716,0.933716
5,processed_Perinhrinal_cortex,X_pca_scvi,9,0.894764,0.984764
6,processed_human_pancreas_norm_complexBatch,X_pca_harmony,8,0.660754,0.740754
7,processed_human_pancreas_norm_complexBatch,X_pca_liger,6,0.718173,0.778173
8,processed_human_pancreas_norm_complexBatch,X_pca_scvi,9,0.692652,0.782652
9,processed_Immune_ALL_human,X_pca_harmony,6,0.583921,0.643921


In [12]:
# Step 1: Get the highest Final Score and corresponding Method for each dataset
best_methods_df = final_scores_df.loc[final_scores_df.groupby('Dataset')['Final Score'].idxmax()]

# Step 2: Create dictionary cell_agent_selections
cell_agent_selections = {
    f"{row['Dataset']}_benchmark_results.csv": row['Method']
    for _, row in best_methods_df.iterrows()
}

# Display the result
cell_agent_selections

{'processed_Immune_ALL_hum_mou_benchmark_results.csv': 'X_pca_scvi',
 'processed_Immune_ALL_human_benchmark_results.csv': 'X_pca_liger',
 'processed_Lung_atlas_public_benchmark_results.csv': 'X_pca_scvi',
 'processed_Perinhrinal_cortex_benchmark_results.csv': 'X_pca_scvi',
 'processed_human_pancreas_norm_complexBatch_benchmark_results.csv': 'X_pca_scvi'}

In [13]:
# Read all files and merge data
dfs = []
for file in cell_agent_selections.keys():
    df = pd.read_csv(file)
    dfs.append(df)

# Merge all DataFrames
merged_df = pd.concat(dfs, ignore_index=True)

# Remove rows where "Embedding" equals "Metric Type"
merged_df = merged_df[merged_df["Embedding"] != "Metric Type"]

# Copy selected rows and modify the Embedding field to "CellAgent"
selected_rows = []
for file, embedding in cell_agent_selections.items():
    file_df = pd.read_csv(file)
    selected_row = file_df[file_df["Embedding"] == embedding].copy()
    selected_row.loc[:, "Embedding"] = "CellAgent"  # Change Embedding to "CellAgent"
    selected_rows.append(selected_row)

# Merge selected rows into merged_df
cell_agent_df = pd.concat(selected_rows, ignore_index=True)
merged_df = pd.concat([merged_df, cell_agent_df], ignore_index=True)

# Define target order and keep only these methods
desired_order = [
    'CellAgent',
    'X_pca_combat', 
    'X_pca_harmony', 
    'X_pca_scanorama', 
    'X_pca_scvi',  
    'X_pca_liger',
]
merged_df = merged_df[merged_df["Embedding"].isin(desired_order)]

# Select columns to calculate mean and standard deviation
columns_to_calculate = ["Batch correction", "Bio conservation", "Total"]

# Convert target columns to numeric, invalid values become NaN
for col in columns_to_calculate:
    merged_df[col] = pd.to_numeric(merged_df[col], errors='coerce')

# Calculate mean and standard deviation
embedding_stats = merged_df.groupby("Embedding")[columns_to_calculate].agg(['mean', 'std'])

# Sort by mean of "Total"
embedding_stats_sorted_filtered = embedding_stats.sort_values(by=("Total", "mean"), ascending=False)
embedding_stats_sorted_filtered

Unnamed: 0_level_0,Batch correction,Batch correction,Bio conservation,Bio conservation,Total,Total
Unnamed: 0_level_1,mean,std,mean,std,mean,std
Embedding,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
CellAgent,0.671704,0.145095,0.662295,0.043075,0.666059,0.076575
X_pca_scvi,0.661987,0.14569,0.651484,0.038807,0.655685,0.075739
X_pca_liger,0.659296,0.166334,0.642375,0.109343,0.649144,0.114431
X_pca_scanorama,0.554489,0.209679,0.672965,0.040305,0.625574,0.08265
X_pca_harmony,0.596621,0.198612,0.606919,0.120546,0.6028,0.122499
X_pca_combat,0.55532,0.149011,0.582913,0.073709,0.571876,0.071487


## Option 2: Batch Correction(SCIB) and Visual Assessment,The weighting coefficient is set to $0$.
This means that CellAgent will assess different de-batching methods solely by calculating the hard metrics (Batch Correction metrics) via SCIB.

In [14]:
# Step 3: For each dataset, dynamically load the corresponding benchmark file and calculate weighted scores
final_scores = []
for idx, row in CellAgent_bio.iterrows():
    dataset_name = row['Dataset']
    
    # Dynamically load benchmark data based on dataset_name
    benchmark_file = f'{dataset_name}_benchmark_results.csv'  # Assume file name is Dataset_name + '_benchmark_results.csv'
    benchmark_results = pd.read_csv(benchmark_file).T  # Read and transpose
    
    # For each method, calculate weighted score and save the corresponding Batch correction score
    for method_name in ['X_pca_harmony', 'X_pca_liger', 'X_pca_scvi']:
        original_score = row[method_name]
        final_score, batch_correction_value = calculate_final_score(
            dataset_name, method_name, original_score, benchmark_results, gpt4o_weight=0.0
        )
        final_scores.append({
            'Dataset': dataset_name,
            'Method': method_name,
            'Original Score': original_score,
            'Batch correction': batch_correction_value,  # Add batch_correction score
            'Final Score': final_score
        })

# Convert results into a DataFrame
final_scores_df = pd.DataFrame(final_scores)
final_scores_df

Unnamed: 0,Dataset,Method,Original Score,Batch correction,Final Score
0,processed_Lung_atlas_public,X_pca_harmony,7,0.379411,0.379411
1,processed_Lung_atlas_public,X_pca_liger,6,0.438996,0.438996
2,processed_Lung_atlas_public,X_pca_scvi,8,0.521022,0.521022
3,processed_Perinhrinal_cortex,X_pca_harmony,7,0.894673,0.894673
4,processed_Perinhrinal_cortex,X_pca_liger,5,0.883716,0.883716
5,processed_Perinhrinal_cortex,X_pca_scvi,9,0.894764,0.894764
6,processed_human_pancreas_norm_complexBatch,X_pca_harmony,8,0.660754,0.660754
7,processed_human_pancreas_norm_complexBatch,X_pca_liger,6,0.718173,0.718173
8,processed_human_pancreas_norm_complexBatch,X_pca_scvi,9,0.692652,0.692652
9,processed_Immune_ALL_human,X_pca_harmony,6,0.583921,0.583921


In [15]:
# Step 1: Get the highest Final Score and corresponding Method for each dataset
best_methods_df = final_scores_df.loc[final_scores_df.groupby('Dataset')['Final Score'].idxmax()]

# Step 2: Create dictionary cell_agent_selections
cell_agent_selections = {
    f"{row['Dataset']}_benchmark_results.csv": row['Method']
    for _, row in best_methods_df.iterrows()
}

# Display the result
cell_agent_selections

{'processed_Immune_ALL_hum_mou_benchmark_results.csv': 'X_pca_liger',
 'processed_Immune_ALL_human_benchmark_results.csv': 'X_pca_liger',
 'processed_Lung_atlas_public_benchmark_results.csv': 'X_pca_scvi',
 'processed_Perinhrinal_cortex_benchmark_results.csv': 'X_pca_scvi',
 'processed_human_pancreas_norm_complexBatch_benchmark_results.csv': 'X_pca_liger'}

In [16]:
# Read all files and merge data
dfs = []
for file in cell_agent_selections.keys():
    df = pd.read_csv(file)
    dfs.append(df)

# Merge all DataFrames
merged_df = pd.concat(dfs, ignore_index=True)

# Remove rows where "Embedding" equals "Metric Type"
merged_df = merged_df[merged_df["Embedding"] != "Metric Type"]

# Copy selected rows and modify the Embedding field to "CellAgent"
selected_rows = []
for file, embedding in cell_agent_selections.items():
    file_df = pd.read_csv(file)
    selected_row = file_df[file_df["Embedding"] == embedding].copy()
    selected_row.loc[:, "Embedding"] = "CellAgent"  # Change Embedding to "CellAgent"
    selected_rows.append(selected_row)

# Merge selected rows into merged_df
cell_agent_df = pd.concat(selected_rows, ignore_index=True)
merged_df = pd.concat([merged_df, cell_agent_df], ignore_index=True)

# Define target order and keep only these methods
desired_order = [
    'CellAgent',
    'X_pca_combat', 
    'X_pca_harmony', 
    'X_pca_scanorama', 
    'X_pca_scvi',  
    'X_pca_liger',
]
merged_df = merged_df[merged_df["Embedding"].isin(desired_order)]

# Select columns to calculate mean and standard deviation
columns_to_calculate = ["Batch correction", "Bio conservation", "Total"]

# Convert target columns to numeric, invalid values become NaN
for col in columns_to_calculate:
    merged_df[col] = pd.to_numeric(merged_df[col], errors='coerce')

# Calculate mean and standard deviation
embedding_stats = merged_df.groupby("Embedding")[columns_to_calculate].agg(['mean', 'std'])

# Sort by mean of "Total"
embedding_stats_sorted_filtered = embedding_stats.sort_values(by=("Total", "mean"), ascending=False)
embedding_stats_sorted_filtered

Unnamed: 0_level_0,Batch correction,Batch correction,Bio conservation,Bio conservation,Total,Total
Unnamed: 0_level_1,mean,std,mean,std,mean,std
Embedding,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
X_pca_scvi,0.661987,0.14569,0.651484,0.038807,0.655685,0.075739
CellAgent,0.677911,0.145434,0.634356,0.102673,0.651778,0.103699
X_pca_liger,0.659296,0.166334,0.642375,0.109343,0.649144,0.114431
X_pca_scanorama,0.554489,0.209679,0.672965,0.040305,0.625574,0.08265
X_pca_harmony,0.596621,0.198612,0.606919,0.120546,0.6028,0.122499
X_pca_combat,0.55532,0.149011,0.582913,0.073709,0.571876,0.071487


## Option 3: Batch Correction(SCIB) and Visual Assessment,The weighting coefficient is set to $99$.
This means that CellAgent will almost exclusively use Visual Assessment  to select the best method, while ignoring the metrics calculated by SCIB (Batch Correction metrics).

In [17]:
# Step 3: For each dataset, dynamically load the corresponding benchmark file and calculate weighted scores
final_scores = []
for idx, row in CellAgent_bio.iterrows():
    dataset_name = row['Dataset']
    
    # Dynamically load benchmark data based on dataset_name
    benchmark_file = f'{dataset_name}_benchmark_results.csv'  # Assume file name is Dataset_name + '_benchmark_results.csv'
    benchmark_results = pd.read_csv(benchmark_file).T  # Read and transpose
    
    # For each method, calculate weighted score and save the corresponding Batch correction score
    for method_name in ['X_pca_harmony', 'X_pca_liger', 'X_pca_scvi']:
        original_score = row[method_name]
        final_score, batch_correction_value = calculate_final_score(
            dataset_name, method_name, original_score, benchmark_results, gpt4o_weight=99
        )
        final_scores.append({
            'Dataset': dataset_name,
            'Method': method_name,
            'Original Score': original_score,
            'Batch correction': batch_correction_value,  # Add batch_correction score
            'Final Score': final_score
        })

# Convert results into a DataFrame
final_scores_df = pd.DataFrame(final_scores)
final_scores_df

Unnamed: 0,Dataset,Method,Original Score,Batch correction,Final Score
0,processed_Lung_atlas_public,X_pca_harmony,7,0.379411,693.379411
1,processed_Lung_atlas_public,X_pca_liger,6,0.438996,594.438996
2,processed_Lung_atlas_public,X_pca_scvi,8,0.521022,792.521022
3,processed_Perinhrinal_cortex,X_pca_harmony,7,0.894673,693.894673
4,processed_Perinhrinal_cortex,X_pca_liger,5,0.883716,495.883716
5,processed_Perinhrinal_cortex,X_pca_scvi,9,0.894764,891.894764
6,processed_human_pancreas_norm_complexBatch,X_pca_harmony,8,0.660754,792.660754
7,processed_human_pancreas_norm_complexBatch,X_pca_liger,6,0.718173,594.718173
8,processed_human_pancreas_norm_complexBatch,X_pca_scvi,9,0.692652,891.692652
9,processed_Immune_ALL_human,X_pca_harmony,6,0.583921,594.583921


In [18]:
# Step 1: Get the highest Final Score and corresponding Method for each dataset
best_methods_df = final_scores_df.loc[final_scores_df.groupby('Dataset')['Final Score'].idxmax()]

# Step 2: Create dictionary cell_agent_selections
cell_agent_selections = {
    f"{row['Dataset']}_benchmark_results.csv": row['Method']
    for _, row in best_methods_df.iterrows()
}

# Display the result
cell_agent_selections

{'processed_Immune_ALL_hum_mou_benchmark_results.csv': 'X_pca_scvi',
 'processed_Immune_ALL_human_benchmark_results.csv': 'X_pca_scvi',
 'processed_Lung_atlas_public_benchmark_results.csv': 'X_pca_scvi',
 'processed_Perinhrinal_cortex_benchmark_results.csv': 'X_pca_scvi',
 'processed_human_pancreas_norm_complexBatch_benchmark_results.csv': 'X_pca_scvi'}

In [19]:
# Read all files and merge data
dfs = []
for file in cell_agent_selections.keys():
    df = pd.read_csv(file)
    dfs.append(df)

# Merge all DataFrames
merged_df = pd.concat(dfs, ignore_index=True)

# Remove rows where "Embedding" equals "Metric Type"
merged_df = merged_df[merged_df["Embedding"] != "Metric Type"]

# Copy selected rows and modify the Embedding field to "CellAgent"
selected_rows = []
for file, embedding in cell_agent_selections.items():
    file_df = pd.read_csv(file)
    selected_row = file_df[file_df["Embedding"] == embedding].copy()
    selected_row.loc[:, "Embedding"] = "CellAgent"  # Change Embedding to "CellAgent"
    selected_rows.append(selected_row)

# Merge selected rows into merged_df
cell_agent_df = pd.concat(selected_rows, ignore_index=True)
merged_df = pd.concat([merged_df, cell_agent_df], ignore_index=True)

# Define target order and keep only these methods
desired_order = [
    'CellAgent',
    'X_pca_combat', 
    'X_pca_harmony', 
    'X_pca_scanorama', 
    'X_pca_scvi',  
    'X_pca_liger',
]
merged_df = merged_df[merged_df["Embedding"].isin(desired_order)]

# Select columns to calculate mean and standard deviation
columns_to_calculate = ["Batch correction", "Bio conservation", "Total"]

# Convert target columns to numeric, invalid values become NaN
for col in columns_to_calculate:
    merged_df[col] = pd.to_numeric(merged_df[col], errors='coerce')

# Calculate mean and standard deviation
embedding_stats = merged_df.groupby("Embedding")[columns_to_calculate].agg(['mean', 'std'])

# Sort by mean of "Total"
embedding_stats_sorted_filtered = embedding_stats.sort_values(by=("Total", "mean"), ascending=False)
embedding_stats_sorted_filtered

Unnamed: 0_level_0,Batch correction,Batch correction,Bio conservation,Bio conservation,Total,Total
Unnamed: 0_level_1,mean,std,mean,std,mean,std
Embedding,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
CellAgent,0.661987,0.14569,0.651484,0.038807,0.655685,0.075739
X_pca_scvi,0.661987,0.14569,0.651484,0.038807,0.655685,0.075739
X_pca_liger,0.659296,0.166334,0.642375,0.109343,0.649144,0.114431
X_pca_scanorama,0.554489,0.209679,0.672965,0.040305,0.625574,0.08265
X_pca_harmony,0.596621,0.198612,0.606919,0.120546,0.6028,0.122499
X_pca_combat,0.55532,0.149011,0.582913,0.073709,0.571876,0.071487
