In [25]:
import sys
print(sys.executable)

/opt/anaconda3/envs/groq/bin/python


In [26]:
#test groq
#！export GROQ_API_KEY="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"
#！echo $GROQ_API_KEY


import os
from groq import Groq

# Initialize the client with the API key from environment variables
client = Groq(api_key=os.environ.get("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"))

try:
    # Create a chat completion request
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": "Explain the differences between supervised and unsupervised learning."
            }
        ],
        model="llama-3.3-70b-versatile",
    )
    # Extract and print the response content
    response = chat_completion.choices[0].message.content
    print(response)
    # Save the response to a text file
    with open("response_output.txt", "w") as file:
        file.write(response)
except Exception as e:
    print(f"An error occurred: {e}")


# Initialize the client with the API key from environment variables
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

try:
    # Create a chat completion request
    chat_completion = client.chat.completions.create(
        messages=[
            {
                "role": "user",
                "content": "Help me do cell-cell communication analysis by CellChat using data_input.csv and meta.csv in the current folder."
            }
        ],
        model="llama-3.3-70b-versatile",
    )
    # Extract and print the response content
    response = chat_completion.choices[0].message.content
    print(response)
    # Save the response to a text file
    with open("response_output.txt", "w") as file:
        file.write(response)
except Exception as e:
    print(f"An error occurred: {e}")

Supervised and unsupervised learning are two fundamental types of machine learning approaches. The primary difference between them lies in the way the model is trained and the type of data used.

**Supervised Learning:**

In supervised learning, the model is trained on labeled data, where each example is accompanied by a target or response variable. The goal is to learn a mapping between input data and the corresponding output labels, so the model can make predictions on new, unseen data. The model is trained to minimize the error between its predictions and the actual labels.

Key characteristics of supervised learning:

1. Labeled data: The training data is labeled, and the model learns from these labels.
2. Prediction: The model predicts a specific output for a given input.
3. Error measurement: The model is evaluated using metrics such as accuracy, precision, and recall.
4. Examples: Image classification, sentiment analysis, and regression tasks.

**Unsupervised Learning:**

In uns

In [27]:
# cellchat agent api setup
#！export GROQ_API_KEY="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"
#！echo $GROQ_API_KEY
# Importing required libraries
from groq import Groq
import os
import json
from rpy2.robjects import r as ro

# Optionally, load the API key from an environment variable or a file
# If you are storing the API key in an environment variable:
api_key = "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"
client = Groq(api_key=api_key)




MODEL = "llama3-70b-8192"

In [28]:
# CellChat agent Functions define

In [29]:
###format checker
import rpy2.robjects as ro

def format_checker(input_file, meta_file):
    """Checks whether the format of data_input.csv and meta.csv meets the requirements for CellChat analysis."""
    ro.r(f"""
        # Load required libraries
        library(Matrix)
        library(CellChat)
        # Read input files
        data.input <- tryCatch({{
            read.csv("{input_file}", row.names = 1)
        }}, error = function(e) {{
            stop(paste("Error in reading data_input.csv:", e$message))
        }})
        meta <- tryCatch({{
            read.csv("{meta_file}", row.names = 1)
        }}, error = function(e) {{
            stop(paste("Error in reading meta.csv:", e$message))
        }})
        # Check if the number of row names in meta matches the number of column names in data.input
        if (!all(rownames(meta) %in% colnames(data.input)) || !all(colnames(data.input) %in% rownames(meta))) {{
            stop("Error: Row names of meta.csv do not match column names of data_input.csv.")
        }}
        # Check if 'labels' column exists in meta
        if (!"labels" %in% colnames(meta)) {{
            stop("Error: 'labels' column is missing in meta.csv.")
        }}
        # Convert data.input to sparse matrix (dgCMatrix) and check if successful
        data.input <- as(as.matrix(data.input), "dgCMatrix")
        # If all checks pass
        print("Format check passed: Files are correctly formatted for CellChat analysis.")
    }}, error = function(e) {{
        print(e$message)
    """)

In [30]:
def host_workflow(input_file, meta_file):
    """
    Host function that explains the CellChat workflow and allows users to choose steps before execution.
    """
    print("\n📢 **Welcome to the CellChat Analysis Pipeline!**")
    print("This workflow consists of multiple steps, each performing a critical function.")
    print("\n🔍 **Step 1: Format Checking**")
    print("   - Ensures the input CSV files are formatted correctly for CellChat analysis.")
    print("\n📚 **Step 2: Database Selection**")
    print("   - Selects the appropriate species (human/mouse) and communication subsets.")
    print("\n📊 **Step 3: CellChat Analysis**")
    print("   - Runs the full CellChat workflow, including preprocessing, communication inference, and visualization.")
    print("\n📈 **Step 4: Visualization of Signaling Pathways**")
    print("   - Generates signaling pathway plots such as Hierarchy plot, Circle plot or Chord diagram.")
    print("\n📈 **Step 5: Visualization of Ligand-Receptor and Pathways**")
    print("   - Visualizes cell-cell communication mediated by multiple ligand-receptors or signaling pathways.")
    print("\n📈 **Step 6: Visualization of Gene Expression**")
    print("   - Displays signaling gene expression distribution using violin/dot plots.")
    print("\n📈 **Step 7: Visualization of Signaling Roles**")
    print("   - Identifies signaling roles (e.g., dominant senders, receivers) of cell groups as well as the major contributing signaling.")
    print("\n💾 **Step 8: Save CellChat Object**")
    print("   - Saves the final CellChat object to an RDS file for further analysis.")
    # Ask users which steps to include
    steps_to_run = {
        "format_checker": True,  # Always needed
        "database_identifier": input("\nDo you want to run Database Selection? (yes/no): ").strip().lower() == "yes",
        "cellchat_analysis": input("\nDo you want to run CellChat Analysis? (yes/no): ").strip().lower() == "yes",
        "visual_signal_pathway": input("\nDo you want to run Visualization of Signaling Pathways? (yes/no): ").strip().lower() == "yes",
        "visual_LR": input("\nDo you want to run Visualization of Ligand-Receptor and Pathways? (yes/no): ").strip().lower() == "yes",
        "visual_gene": input("\nDo you want to plot the Signaling Gene Expression Distribution? (yes/no): ").strip().lower() == "yes",
        "identify_signaling_roles": input("\nDo you want to analyze Signaling Roles of Cell Groups? (yes/no): ").strip().lower() == "yes",
        "save_cellchat": input("\nDo you want to save the CellChat Object? (yes/no): ").strip().lower() == "yes",
    }
    print("\n✅ **Starting Analysis with Selected Steps...**\n")
    # Step 1: Format Checking
    print("🔎 **Running Format Checker...**")
    format_checker(input_file, meta_file)
    print("✅ Format check completed.\n")
    # Step 2: Database Selection
    if steps_to_run["database_identifier"]:
        print("📚 **Running Database Selection...**")
        species = input("Enter species ('human' or 'mouse'): ").strip().lower()
        subsets = input("Enter subsets (comma-separated from 'Secreted Signaling', 'ECM-Receptor', 'Cell-Cell Contact', 'Non-protein Signaling'): ").split(',')
        subsets = [s.strip() for s in subsets if s.strip()]
        database_identifier(species=species, subsets=subsets)
        print(f"✅ Database setup completed: Species = {species}, Subsets = {', '.join(subsets)}\n")
    # Step 3: CellChat Analysis
    if steps_to_run["cellchat_analysis"]:
        print("🔬 **Running CellChat Analysis...**")
        cellchat_analysis(input_file, meta_file)
        print("✅ CellChat analysis completed.\n")
    # Step 4: Visualization of Signaling Pathways
    if steps_to_run["visual_signal_pathway"]:
        print("📈 **Running Visualization of Signaling Pathways...**")
        visual_signal_pathway()
        print("✅ Visualization completed.\n")
    # Step 5: Visualization of Ligand-Receptor and Pathways
    if steps_to_run["visual_LR"]:
        print("📈 **Running Visualization of Ligand-Receptor and Pathways...**")
        visual_LR()
        print("✅ Visualization completed.\n")
    # Step 6: Visualization of Gene Expression
    if steps_to_run["visual_gene"]:
        print("📈 **Running Visualization of Gene Expression...**")
        visual_gene()
        print("✅ Visualization completed.\n")
    # Step 7: Visualization of Signaling Roles
    if steps_to_run["identify_signaling_roles"]:
        print("📈 **Running Analysis of Signaling Roles...**")
        identify_signaling_roles()
        print("✅ Analysis completed.\n")
    # Step 8: Save CellChat Object
    if steps_to_run["save_cellchat"]:
        print("💾 **Saving CellChat Object...**")
        save_path = input("Enter path to save CellChat RDS file (e.g., 'cellchat_object.rds'): ").strip()
        save_cellchat(output_file=save_path)
        print(f"✅ CellChat analysis and results saved at {save_path}\n")
    print("\n🎉 **Workflow Completed!** All selected CellChat analysis steps executed successfully.")


In [41]:
import rpy2.robjects as ro

def format_checker(input_file, meta_file):
    """
    Checks whether the format of data_input.csv and meta.csv meets the requirements for CellChat analysis.
    """
    try:
        ro.r(f"""
            library(Matrix)
            library(CellChat)
            # Read input files
            data.input <- tryCatch({{
                read.csv("{input_file}", row.names = 1)
            }}, error = function(e) {{
                stop(paste("Error in reading data_input.csv:", e$message))
            }})
            meta <- tryCatch({{
                read.csv("{meta_file}", row.names = 1)
            }}, error = function(e) {{
                stop(paste("Error in reading meta.csv:", e$message))
            }})
            # Check if row names in meta match column names in data.input
            if (!all(rownames(meta) %in% colnames(data.input)) || !all(colnames(data.input) %in% rownames(meta))) {{
                stop("Error: Row names of meta.csv do not match column names of data_input.csv.")
            }}
            # Check if 'labels' column exists in meta
            if (!"labels" %in% colnames(meta)) {{
                stop("Error: 'labels' column is missing in meta.csv.")
            }}
            # Convert data.input to sparse matrix
            data.input <- as(as.matrix(data.input), "dgCMatrix")
            # Success message
            print("Format check passed: Files are correctly formatted for CellChat analysis.")
        """)
    except Exception as e:
        print(f"R Parsing Error: {e}")


In [31]:
##databased identifier
def database_identifier(species: str = None, subsets: list = None, exclude: str = None):
    """Identify the species and subset of CellChatDB for use."""
    valid_species = {"human", "mouse"}
    valid_subsets = {"Cell-Cell Contact", "ECM-Receptor", "Non-protein Signaling", "Secreted Signaling"}
    # Ensure species selection
    while species is None or species.lower() not in valid_species:
        species = input("Please classify the species as 'human' or 'mouse': ").strip().lower()
    # Ensure subset selection
    while not subsets:
        print("Please choose one or more database subsets from the following:")
        print(", ".join(valid_subsets))
        subset_input = input("Enter subsets as comma-separated values: ")
        subsets = [s.strip() for s in subset_input.split(',') if s.strip() in valid_subsets]
    if exclude and exclude in valid_subsets:
        subsets = [s for s in subsets if s != exclude]
    print(f"Selected species: {species.capitalize()}")
    print(f"Selected subsets: {', '.join(subsets)}")
    # Define the R script dynamically based on user input
    ro.r(f'''
        library(CellChat)
        # Choose species
        if ("{species}" == "mouse") {{
            CellChatDB <- CellChatDB.mouse
        }} else {{
            CellChatDB <- CellChatDB.human
        }}
        # Choose subset of CellChatDB
        selected_categories <- c({', '.join([f'"{s}"' for s in subsets])})
        CellChatDB.use <- subsetDB(CellChatDB, search = selected_categories, key = "annotation")
        print("Database_identifier completed.")
    ''')

In [42]:
##whole process of cellchat analysis
def cellchat_analysis(input_file, meta_file):
    ro.r(f"""
        # Load required libraries
        library(CellChat)
        library(Matrix)
        library(ggplot2)
        library(future)
        # Read input data
        data.input <- read.csv("{input_file}", row.names = 1)
        meta <- read.csv("{meta_file}", row.names = 1)
        # Convert data formats
        data.input <- as(as.matrix(data.input), "dgCMatrix")
        meta <- as.data.frame(meta)
        print("Data and metadata prepared.")
        # Create CellChat object
        cellchat <- createCellChat(object = data.input, meta = meta, group.by = "labels")
        print("CellChat object created.")
        # Load CellChat database
        CellChatDB <- CellChatDB.human  # Change to CellChatDB.mouse if needed
        cellchat@DB <- CellChatDB
        # Preprocessing steps
        cellchat <- subsetData(cellchat)
        future::plan("multisession", workers = 4)  # Enable parallel processing
        cellchat <- identifyOverExpressedGenes(cellchat)
        cellchat <- identifyOverExpressedInteractions(cellchat)
        print("Preprocessing completed.")
        # Compute communication probabilities
        ptm <- Sys.time()
        cellchat <- computeCommunProb(cellchat, type = "triMean")
        # Optional: Filter communication based on minimum number of cells
        cellchat <- filterCommunication(cellchat, min.cells = 10)
        # Compute pathway-level communication probabilities
        cellchat <- computeCommunProbPathway(cellchat)
        # Extract communication data
        df.net <- subsetCommunication(cellchat)
        write.csv(df.net, "net.csv")
        df.netP <- subsetCommunication(cellchat, slot.name = "netP")
        write.csv(df.netP, "netP.csv")
        print("Cell-cell communication inference completed.")
        # Aggregate cell-cell communication network
        cellchat <- aggregateNet(cellchat)
        execution.time <- Sys.time() - ptm
        print(as.numeric(execution.time, units = "secs"))
        # Get group sizes
        groupSize <- as.numeric(table(cellchat@idents))
        # Generate and save network visualization plots
        pdf("Number_of_interactions.pdf")
        netVisual_circle(cellchat@net$count, vertex.weight = groupSize, weight.scale = TRUE, label.edge = FALSE,
                         title.name = "Number of interactions")
        dev.off()
        pdf("Interaction_weights_strength.pdf")
        netVisual_circle(cellchat@net$weight, vertex.weight = groupSize, weight.scale = TRUE, label.edge = FALSE,
                         title.name = "Interaction weights/strength")
        dev.off()
        # Generate interaction-specific network plots
        mat <- cellchat@net$weight
        for (i in 1:nrow(mat)) {{
            pdf(paste0("Interaction_", rownames(mat)[i], ".pdf"))
            mat2 <- matrix(0, nrow = nrow(mat), ncol = ncol(mat), dimnames = dimnames(mat))
            mat2[i, ] <- mat[i, ]
            netVisual_circle(mat2, vertex.weight = groupSize, weight.scale = TRUE, edge.weight.max = max(mat),
                             title.name = rownames(mat)[i])
            dev.off()
        }}
        saveRDS(cellchat, "cellchat_object.rds")
        print("CellChat analysis completed.")
    """)



In [43]:
#Visualize each signaling pathway using Hierarchy plot, Circle plot or Chord diagram
def visual_signal_pathway():
    """
    Function to visualize CellChat results based on user input.
    """
    ro.r('''
        cellchat <- readRDS("cellchat_object.rds")
        cat("Visualization options:\n")
        cat("1) Hierarchy plot - Shows autocrine and paracrine signaling between cell groups.\n")
        cat("2) Circle plot - Displays network communication among all cell types.\n")
        cat("3) Chord diagram - Represents signaling relationships in a flexible format.\n")
        cat("4) Heatmap - Shows pathway interactions in heatmap format.\n")
        plot_type <- as.numeric(readline(prompt="Enter the number of the plot type you want to visualize: "))
        cat("Available cell types:", unique(cellchat@idents), "\n")
        vertex_receiver <- as.numeric(readline(prompt="Enter indices of receiver cell types (comma-separated): "))
        cat("Available pathways:", cellchat@netP$pathways, "\n")
        pathways_show <- readline(prompt="Enter signaling pathway(s) to visualize (comma-separated or 'all'): ")
        if (pathways_show == "all") {
            pathways_show.all <- cellchat@netP$pathways
            levels(cellchat@idents)
            vertex.receiver = seq(1,4)
            for (i in 1:length(pathways_show.all)) {
                netVisual(cellchat, signaling = pathways_show.all[i], vertex.receiver = vertex.receiver, layout = "hierarchy")
                gg <- netAnalysis_contribution(cellchat, signaling = pathways_show.all[i])
                ggsave(filename=paste0(pathways_show.all[i], "_L-R_contribution.pdf"), plot=gg, width = 3, height = 2, units = 'in', dpi = 300)
            }
        } else {
            pathways_show <- unlist(strsplit(pathways_show, ","))
            for (pathway in pathways_show) {
                if (plot_type == 1) {
                    pdf(paste0(pathway, "_hierarchy.pdf"))
                    netVisual_aggregate(cellchat, signaling = pathway, vertex.receiver = vertex_receiver)
                    dev.off()
                } else if (plot_type == 2) {
                    pdf(paste0(pathway, "_circle.pdf"))
                    netVisual_aggregate(cellchat, signaling = pathway, layout = "circle")
                    dev.off()
                } else if (plot_type == 3) {
                    pdf(paste0(pathway, "_chord.pdf"))
                    netVisual_aggregate(cellchat, signaling = pathway, layout = "chord")
                    dev.off()
                } else if (plot_type == 4) {
                    pdf(paste0(pathway, "_heatmap.pdf"))
                    netVisual_heatmap(cellchat, signaling = pathway, color.heatmap = "Reds")
                    dev.off()
                } else {
                    cat("Invalid selection. Please choose a valid plot type.\n")
                }
                # Ask user if they want to compute contribution of each ligand-receptor pair
                compute_contribution <- readline(prompt="Do you want to compute and visualize ligand-receptor pair contributions? (yes/no): ")
                if (tolower(compute_contribution) == "yes") {
                    gg <- netAnalysis_contribution(cellchat, signaling = pathway)
                    ggsave(filename=paste0(pathway, "_L-R_contribution.pdf"), plot=gg, width = 3, height = 2, units = 'in', dpi = 300)
                    pairLR <- extractEnrichedLR(cellchat, signaling = pathway, geneLR.return = FALSE)
                    LR.show <- pairLR[1,] # Show one ligand-receptor pair
                    pdf(paste0(pathway, "_LR_hierarchy.pdf"))
                    netVisual_individual(cellchat, signaling = pathway, pairLR.use = LR.show, vertex.receiver = vertex_receiver)
                    dev.off()
                    pdf(paste0(pathway, "_LR_circle.pdf"))
                    netVisual_individual(cellchat, signaling = pathway, pairLR.use = LR.show, layout = "circle")
                    dev.off()
                }
            }
        }
    ''')

In [44]:
#Visualize cell-cell communication mediated by multiple ligand-receptors or signaling pathways
def visual_LR():
    """
    Function to identify sender and receiver cell groups and visualize cell-cell communication.
    """
    ro.r('''
        cellchat <- readRDS("cellchat_object.rds")
        cat("Available cell types:", unique(cellchat@idents), "\n")
        sources_use <- as.numeric(readline(prompt="Enter indices of sender cell types (comma-separated): "))
        targets_use <- as.numeric(readline(prompt="Enter indices of receiver cell types (comma-separated): "))
        cat("Available pathways:", cellchat@netP$pathways, "\n")
        signaling <- readline(prompt="Enter signaling pathway(s) to visualize (comma-separated or 'all'): ")
        sort_choice <- readline(prompt="Do you want to set the order of interacting cell pairs on x-axis? (yes/no): ")
        sort_by_target <- FALSE
        sort_by_source <- FALSE
        source_priority <- TRUE
        if (tolower(sort_choice) == "yes") {
            sort_by_target <- readline(prompt="Sort by target cell types? (yes/no): ") == "yes"
            sort_by_source <- readline(prompt="Sort by source cell types? (yes/no): ") == "yes"
            if (sort_by_target and sort_by_source) {
                source_priority <- readline(prompt="Prioritize source order? (yes/no): ") == "yes"
            }
        }
        # Ask user which type of plot they want
        cat("Visualization options:\n")
        cat("1) Bubble plot - Shows significant L-R pairs and interactions.\n")
        cat("2) Chord diagram - Displays interactions between defined sender and receiver groups.\n")
        plot_type <- as.numeric(readline(prompt="Enter the number of the plot type you want to visualize: "))
        if (signaling == "all") {
            signaling <- cellchat@netP$pathways
        } else {
            signaling <- unlist(strsplit(signaling, ","))
        }
        if (plot_type == 1) {
            pdf("bubble_plot_false.pdf")
            netVisual_bubble(cellchat, sources.use = sources_use, targets.use = targets_use, signaling = signaling, remove.isolate = FALSE, sort.by.target = sort_by_target, sort.by.source = sort_by_source, sort.by.source.priority = source_priority)
            dev.off()
            pdf("bubble_plot_true.pdf")
            pairLR.use <- extractEnrichedLR(cellchat, signaling = signaling)
            netVisual_bubble(cellchat, sources.use = sources_use, targets.use = targets_use, pairLR.use = pairLR.use, remove.isolate = TRUE, sort.by.target = sort_by_target, sort.by.source = sort_by_source, sort.by.source.priority = source_priority)
            dev.off()
        } else if (plot_type == 2) {
            show_all <- readline(prompt="Do you want to show all significant signaling pathways from sources to targets? (yes/no): ")
            if (tolower(show_all) == "yes") {
                pdf("chord_diagram_all.pdf")
                netVisual_chord_gene(cellchat, sources.use = sources_use, targets.use = targets_use, slot.name = "netP", legend.pos.x = 10)
                dev.off()
            } else {
                pdf("chord_diagram.pdf")
                netVisual_chord_gene(cellchat, sources.use = sources_use, targets.use = targets_use, signaling = signaling)
                dev.off()
            }
        } else {
            cat("Invalid selection. Please choose a valid plot type.\n")
        }
    ''')

In [45]:
#Plot the signaling gene expression distribution using violin/dot plot
def visual_gene():
    """
    Function to prompt the user and plot the signaling gene expression distribution using violin, dot, or bar plot.
    """
    ro.r('''
        cellchat <- readRDS("cellchat_object.rds")
        cat("Available pathways:", cellchat@netP$pathways, "\n")
        signaling <- readline(prompt="Enter signaling pathway(s) to visualize gene expression (comma-separated or 'all'): ")
        if (signaling == "all") {
            signaling <- cellchat@netP$pathways
        } else {
            signaling <- unlist(strsplit(signaling, ","))
        }
        enriched_only <- readline(prompt="Show only enriched signaling genes? (yes/no): ")
        enriched_only <- tolower(enriched_only) == "yes"
        cat("Visualization options:\n")
        cat("1) Violin plot - Shows distribution of gene expression.\n")
        cat("2) Dot plot - Highlights expression levels across cell groups.\n")
        plot_type <- as.numeric(readline(prompt="Enter the number of the plot type you want to use: "))
        plot_type_map <- c("violin", "dot")
        plot_type_selected <- plot_type_map[plot_type]
        pdf(paste0("gene_expression_", plot_type_selected, ".pdf"))
        plotGeneExpression(cellchat, signaling = signaling, enriched.only = enriched_only, type = plot_type_selected)
        dev.off()
    ''')


In [46]:
#Identify signaling roles (e.g., dominant senders, receivers) of cell groups as well as the major contributing signaling
def identify_signaling_roles():
    """
    Function to draw a series of figures about signaling roles (e.g., dominant senders, receivers) of cell groups as well as major contributing signaling.
    """
    ro.r('''
        cellchat <- readRDS("cellchat_object.rds")
        cat("Available pathways:", cellchat@netP$pathways, "\n")
        signaling <- readline(prompt="Enter signaling pathway(s) to analyze (comma-separated or 'all'): ")
        if (signaling != "all") {
            signaling <- unlist(strsplit(signaling, ","))
        }
        # Compute and visualize the network centrality scores
        cellchat <- netAnalysis_computeCentrality(cellchat, slot.name = "netP")
        pdf("network_centrality_scores.pdf", width = 8, height = 2.5)
        netAnalysis_signalingRole_network(cellchat, signaling = signaling, width = 8, height = 2.5, font.size = 10)
        dev.off()
        # Visualize dominant senders and receivers in a 2D space
        pdf("signaling_role_scatter.pdf")
        if (signaling == "all") {
            gg1 <- netAnalysis_signalingRole_scatter(cellchat)
        } else {
            gg1 <- netAnalysis_signalingRole_scatter(cellchat, signaling = signaling)
        }
        print(gg1)
        dev.off()
        # Identify signals contributing the most to outgoing or incoming signaling of certain cell groups
        pdf("signaling_role_heatmap.pdf")
        if (signaling == "all") {
            ht1 <- netAnalysis_signalingRole_heatmap(cellchat, pattern = "outgoing")
            ht2 <- netAnalysis_signalingRole_heatmap(cellchat, pattern = "incoming")
        } else {
            ht1 <- netAnalysis_signalingRole_heatmap(cellchat, signaling = signaling, pattern = "outgoing")
            ht2 <- netAnalysis_signalingRole_heatmap(cellchat, signaling = signaling, pattern = "incoming")
        }
        print(ht1 + ht2)
        dev.off()
    ''')


In [13]:
#Identify global communication patterns to explore how multiple cell types and signaling pathways coordinate together
for (i in 1:(nrow(df) - 1)) {
  if (as.character(df$Measure[i]) == as.character(df$Measure[i + 1])) {  # Ensure same Measure group
    df$drop_value[i] <- round(df$score[i], 10) - round(df$score[i + 1], 10)  # Force exact comparison
  }
}
# Compute the average drop for each k
# Normalize drop values for each measure
df_scaled <- df %>%
  group_by(Measure) %>%
  mutate(scaled_drop = (drop_value - min(drop_value)) / (max(drop_value) - min(drop_value))) %>%
  ungroup()

# Compute the average scaled drop for each k
df_avg_scaled_drop <- df_scaled %>%
  group_by(k) %>%
  summarize(avg_scaled_drop = mean(scaled_drop, na.rm = TRUE)) %>%
  ungroup()


SyntaxError: invalid syntax (1341714617.py, line 2)

In [47]:
#save object
def save_cellchat(output_file):
    """Saves the CellChat object to an RDS file."""
    ro.r(f"""
        saveRDS(cellchat, file = "{output_file}")
        print("CellChat object saved successfully.")
    """)


In [48]:
tools = [
    {
        "type": "function",
        "function": {
            "name": "format_checker",
            "description": "Check the format of input data files for CellChat analysis.",
            "parameters": {
                "type": "object",
                "properties": {
                    "input_file": {"type": "string", "description": "Path to the gene expression matrix CSV file."},
                    "meta_file": {"type": "string", "description": "Path to the metadata CSV file."}
                },
                "required": ["input_file", "meta_file"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "database_identifier",
            "description": "Identify the appropriate species (human/mouse) and select the CellChat database subsets.",
            "parameters": {
                "type": "object",
                "properties": {
                    "species": {
                        "type": "string",
                        "enum": ["human", "mouse"],
                        "description": "Specify the species for the CellChat analysis."
                    },
                    "subsets": {
                        "type": "array",
                        "items": {
                            "type": "string",
                            "enum": ["Cell-Cell Contact", "ECM-Receptor", "Non-protein Signaling", "Secreted Signaling"]
                        },
                        "description": "List of database subsets to include in the analysis."
                    },
                    "exclude": {
                        "type": "string",
                        "enum": ["Cell-Cell Contact", "ECM-Receptor", "Non-protein Signaling", "Secreted Signaling"],
                        "description": "Database subset to exclude from analysis (optional)."
                    }
                },
                "required": ["species", "subsets"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "cellchat_analysis",
            "description": "Perform the full CellChat analysis pipeline, including data preprocessing, communication inference, and network aggregation.",
            "parameters": {
                "type": "object",
                "properties": {
                    "input_file": {"type": "string", "description": "Path to the gene expression matrix CSV file."},
                    "meta_file": {"type": "string", "description": "Path to the metadata CSV file."}
                },
                "required": ["input_file", "meta_file"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "visual_signal_pathway",
            "description": "Visualize cell signaling pathways using hierarchy, circle, chord, or heatmap plots.",
            "parameters": {
                "type": "object",
                "properties": {
                    "plot_type": {
                        "type": "string",
                        "enum": ["hierarchy", "circle", "chord", "heatmap"],
                        "description": "Type of visualization for signaling pathways."
                    }
                },
                "required": ["plot_type"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "visual_LR",
            "description": "Visualize ligand-receptor interactions across sender and receiver cell types.",
            "parameters": {
                "type": "object",
                "properties": {
                    "sources_use": {
                        "type": "array",
                        "items": {"type": "integer"},
                        "description": "Indices of sender cell types."
                    },
                    "targets_use": {
                        "type": "array",
                        "items": {"type": "integer"},
                        "description": "Indices of receiver cell types."
                    },
                    "signaling": {
                        "type": "string",
                        "description": "Signaling pathway(s) to visualize (comma-separated or 'all')."
                    },
                    "plot_type": {
                        "type": "string",
                        "enum": ["bubble", "chord"],
                        "description": "Type of plot to visualize interactions."
                    }
                },
                "required": ["sources_use", "targets_use", "signaling", "plot_type"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "visual_gene",
            "description": "Plot signaling gene expression distribution using violin or dot plots.",
            "parameters": {
                "type": "object",
                "properties": {
                    "signaling": {
                        "type": "string",
                        "description": "Signaling pathway(s) to visualize gene expression (comma-separated or 'all')."
                    },
                    "plot_type": {
                        "type": "string",
                        "enum": ["violin", "dot"],
                        "description": "Type of gene expression plot."
                    },
                    "enriched_only": {
                        "type": "boolean",
                        "description": "Whether to show only enriched signaling genes."
                    }
                },
                "required": ["signaling", "plot_type", "enriched_only"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "identify_signaling_roles",
            "description": "Analyze and visualize dominant signaling roles (senders/receivers) of cell groups.",
            "parameters": {
                "type": "object",
                "properties": {
                    "signaling": {
                        "type": "string",
                        "description": "Signaling pathway(s) to analyze roles for (comma-separated or 'all')."
                    }
                },
                "required": ["signaling"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "save_cellchat",
            "description": "Save the CellChat object for further analysis.",
            "parameters": {
                "type": "object",
                "properties": {
                    "output_file": {
                        "type": "string",
                        "description": "Path to save the CellChat RDS object."
                    }
                },
                "required": ["output_file"],
            },
        },
    }
]


In [49]:
import json

def run_conversation(user_prompt):
    """
    Function to handle user input, query Groq AI for required function calls, and execute necessary tools.
    """
    messages = [
        {
            "role": "system",
            "content": "You are an AI-powered interactive agent performing automated CellChat analysis, handling data preprocessing, communication inference, visualization, and role identification. "
                       "Explain the workflow at the beginning, then automatically proceed through all required steps. You will guide the user through each step.",
        },
        {
            "role": "user",
            "content": user_prompt,
        }
    ]
    # Step 1: Send initial request to Groq AI
    response = client.chat.completions.create(
        model=MODEL,
        messages=messages,
        tools=tools,
        tool_choice="auto",
        max_tokens=4096
    )
    response_message = response.choices[0].message
    tool_calls = response_message.tool_calls
    # Step 2: If function calls are required, execute them sequentially
    if tool_calls:
        available_functions = {
            "format_checker": format_checker,
            "database_identifier": database_identifier,
            "cellchat_analysis": cellchat_analysis,
            "visual_signal_pathway": visual_signal_pathway,
            "visual_LR": visual_LR,
            "visual_gene": visual_gene,
            "identify_signaling_roles": identify_signaling_roles,
        }
        # Append AI response
        messages.append(response_message)
        # Step 3: Execute function calls
        for tool_call in tool_calls:
            function_name = tool_call.function.name  # Correct attribute for tool name
            function_to_call = available_functions.get(function_name)
            function_args = json.loads(tool_call.function.arguments)
            if function_to_call:
                # Execute the function
                function_response = function_to_call(**function_args)
                # Append tool execution results
                messages.append({
                    "role": "tool",
                    "tool_call_id": tool_call.id,  # REQUIRED FIELD
                    "name": function_name,
                    "content": function_response if isinstance(function_response, str) else json.dumps(function_response),
                })
        # Step 4: Make a second request to process results
        second_response = client.chat.completions.create(
            model=MODEL,
            messages=messages
        )
        final_response = second_response.choices[0].message.content
    else:
        # If no tool calls, return initial response
        final_response = response_message.content
    return final_response


In [50]:
user_prompt = "Perform a full CellChat analysis on my dataset. The input file is 'data_input.csv' and metadata file is 'meta.csv'."
print(run_conversation(user_prompt))

R[write to console]: Loading required package: dplyr

R[write to console]: 
Attaching package: ‘dplyr’


R[write to console]: The following objects are masked from ‘package:stats’:

    filter, lag


R[write to console]: The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


R[write to console]: Loading required package: igraph

R[write to console]: 
Attaching package: ‘igraph’


R[write to console]: The following objects are masked from ‘package:dplyr’:

    as_data_frame, groups, union


R[write to console]: The following objects are masked from ‘package:stats’:

    decompose, spectrum


R[write to console]: The following object is masked from ‘package:base’:

    union


R[write to console]: Loading required package: ggplot2




    an issue that caused a segfault when used with rpy2:
    https://github.com/rstudio/reticulate/pull/1188
    Make sure that you use a version of that package that includes
    the fix.
[1] "Format check passed: Files are correctly formatted for CellChat analysis."


R[write to console]: The non-protein signaling is now included for CellChat analysis, which is usually used for neuron-neuron and metabolic communication!



Selected species: Human
Selected subsets: Cell-Cell Contact, ECM-Receptor, Non-protein Signaling, Secreted Signaling
 "Database_identifier completed."


R[write to console]: 
Attaching package: ‘future’


R[write to console]: The following objects are masked from ‘package:igraph’:

    %->%, %<-%




[1] "Data and metadata prepared."
[1] "Create a CellChat object from a data matrix"
Set cell identities for the new CellChat object 
The cell groups used for CellChat analysis are  APOE+ FIB, CD40LG+ TC, cDC1, cDC2, COL11A1+ FIB, FBN1+ FIB, Inflam. DC, Inflam. FIB, Inflam. TC, LC, NKT, TC 
[1] "CellChat object created."
The number of highly variable ligand-receptor pairs used for signaling inference is 1646 
[1] "Preprocessing completed."
triMean is used for calculating the average gene expression per cell group. 
[1] ">>> Run CellChat on sc/snRNA-seq data <<< [2025-02-18 00:09:41.354582]"


  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |                                                                      |   1%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                     

[1] ">>> CellChat inference is done. Parameter values are stored in `object@options$parameter` <<< [2025-02-18 00:10:56.301239]"
[1] "Cell-cell communication inference completed."
[1] 77.13615
[1] "CellChat analysis completed."


BadRequestError: Error code: 400 - {'error': {'message': "'messages.3' : for 'role:tool' the following must be satisfied[('messages.3.tool_call_id' : property 'tool_call_id' is missing)]", 'type': 'invalid_request_error'}}

In [None]:
# Example Usage
user_prompt = "Perform a full CellChat analysis on my dataset. The input file is 'data_input.csv' and metadata file is 'meta.csv'."
print(run_conversation(user_prompt))



user_prompt = "Help me do cell-cell communication analysis using data_input.csv and meta.csv in the current folder "
print(run_conversation(user_prompt))



user_prompt = "Visualize the communication network as a circle plot."
print(run_conversation(user_prompt))




In [None]:
import matplotlib.pyplot as plt

# Data
methods = ['CellChat Agent', 'CellAgent']
correct_rates = [63.76, 34.82]

# Create bar plot
plt.figure(figsize=(6, 4))
plt.bar(methods, correct_rates)
plt.xlabel('Methods')
plt.ylabel('Correct Rate (%)')
plt.title('Correct Rate Comparison')

# Save as PDF
pdf_filename = "correct_rate_comparison.pdf"
plt.savefig(pdf_filename, format='pdf')