In [None]:
!pip install -q torch torch-geometric pandas duckdb pyarrow networkx gradio -q

In [None]:
import gradio as gr
import pandas as pd
import random
import time
import os
import tempfile  # Import tempfile for creating temporary files
import sys  # Import sys to modify Python path
import abc  # Import abc for abstract base classes
from urllib.parse import urlparse  # Added for URLProcessor's concrete implementation
import logging  # Added for URLProcessor's concrete implementation

# --- Google Colab Drive Mounting ---
from google.colab import drive

drive.mount("/content/drive", force_remount=True)

# Add the project root to the Python path
project_root = "/content/drive/My Drive/WebKnoGraph"
if project_root not in sys.path:
    sys.path.insert(0, project_root)  # Corrected: removed extra 'sys.'

# --- Import Real Classes from WebKnoGraph Project ---
try:
    import torch
    import json

    # Import the real LinkPredictionConfig
    from src.backend.config.link_prediction_config import LinkPredictionConfig
    from src.backend.models.graph_models import GraphSAGEModel

    # Use the exact ILogger definition provided by the user
    from src.shared.interfaces import (
        ILogger as OriginalILogger,
    )  # Alias to avoid name conflict

    # Import the real URLProcessor
    from src.backend.utils.url_processing import URLProcessor as OriginalURLProcessor

    # Define a concrete ConsoleLogger that implements OriginalILogger
    class ConsoleLogger(OriginalILogger):
        def info(self, message: str):
            print(f"INFO: {message}")

        def error(self, message: str):
            print(f"ERROR: {message}")

        def debug(self, message: str):
            print(f"DEBUG: {message}")

        def warning(self, message: str):
            print(f"WARNING: {message}")

        def exception(self, message: str):
            print(f"EXCEPTION: {message}")

    # Use this concrete logger as the ILogger for the application
    ILogger = ConsoleLogger
    # Assign the imported URLProcessor to the expected name
    URLProcessor = OriginalURLProcessor

except ImportError as e:
    print(f"Error importing WebKnoGraph modules: {e}")
    print(
        "Please ensure your 'WebKnoGraph' project is correctly structured in Google Drive at:"
    )
    print(f"'{project_root}' and all dependencies are installed.")

    # Define dummy classes if imports fail, to allow the Gradio app to at least launch
    # This is a fallback for development/debugging, for real use, imports must succeed.
    class LinkPredictionConfig:  # Dummy LinkPredictionConfig
        def __init__(self):
            print("Using dummy LinkPredictionConfig due to import error.")
            # Fallback paths for dummy config
            base_path = "/tmp"  # Use a temporary directory for dummy files
            self.model_state_path = os.path.join(
                base_path, "graphsage_link_predictor.pth"
            )
            self.node_embeddings_path = os.path.join(
                base_path, "final_node_embeddings.pt"
            )
            self.node_mapping_path = os.path.join(base_path, "model_metadata.json")
            self.edge_index_path = os.path.join(base_path, "edge_index.pt")
            self.edge_csv_path = os.path.join(
                base_path, "link_graph_edges.csv"
            )  # Corrected dummy path

    # Dummy ILogger for fallback scenario
    class ILogger:
        def info(self, message: str):
            print(f"INFO (Dummy Logger): {message}")

        def error(self, message: str):
            print(f"ERROR (Dummy Logger): {message}")

        def debug(self, message: str):
            print(f"DEBUG (Dummy Logger): {message}")

        def warning(self, message: str):
            print(f"WARNING (Dummy Logger): {message}")

        def exception(self, message: str):
            print(f"EXCEPTION (Dummy Logger): {message}")

    # Dummy URLProcessor for fallback scenario
    class URLProcessor:
        def get_folder_depth(self, url: str) -> int:
            try:
                # Basic dummy depth calculation
                return len(
                    url.split("://", 1)[-1].split("/", 1)[-1].strip("/").split("/")
                )
            except:
                return -1

    class GraphSAGEModel:  # A very basic mock if the real one can't be imported
        def __init__(self, *args, **kwargs):
            pass

        def predict_link(self, node_embeddings, candidate_edge_index):
            # Ensure candidate_edge_index has a shape attribute for the mock
            if not hasattr(candidate_edge_index, "shape"):
                # Create a dummy shape if not provided, assuming 2 rows
                num_cols = 10  # Default number of candidates for mock
                candidate_edge_index = type(
                    "obj", (object,), {"shape": (2, num_cols)}
                )()
            return [
                random.uniform(0.1, 0.9) for _ in range(candidate_edge_index.shape[1])
            ]

        def load_state_dict(self, state_dict):
            pass

        def eval(self):
            pass

        def to(self, device):
            return self


# --- Real RecommendationEngine Class (as provided by user) ---
class RecommendationEngine:
    """Loads trained artifacts and provides link recommendations using a Top-K strategy."""

    def __init__(
        self, config: LinkPredictionConfig, logger: ILogger, url_processor: URLProcessor
    ):
        self.config = config
        self.logger = logger
        self.url_processor = url_processor
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.model = None
        self.node_embeddings = None
        self.url_to_idx = None
        self.idx_to_url = None
        self.existing_edges = None  # This will still be used by the model for filtering

    def load_artifacts(self):
        """Loads the trained model, embeddings, and mappings into memory."""
        if self.model is not None:
            self.logger.info("Artifacts already loaded.")
            return True

        self.logger.info("Loading trained artifacts for recommendations...")
        try:
            # Ensure the directory exists before attempting to open files
            model_dir = os.path.dirname(self.config.node_mapping_path)
            if not os.path.exists(model_dir):
                raise FileNotFoundError(f"Model directory not found: {model_dir}")

            with open(self.config.node_mapping_path, "r") as f:
                model_metadata = json.load(f)

            self.url_to_idx = model_metadata["url_to_idx"]
            in_channels = model_metadata["in_channels"]
            hidden_channels = model_metadata["hidden_channels"]
            out_channels = model_metadata["out_channels"]

            self.idx_to_url = {v: k for k, v in self.url_to_idx.items()}

            self.node_embeddings = torch.load(
                self.config.node_embeddings_path, map_location=self.device
            ).to(self.device)
            edge_index = torch.load(
                self.config.edge_index_path, map_location=self.device
            )
            self.existing_edges = set(
                zip(edge_index[0].tolist(), edge_index[1].tolist())
            )

            self.model = GraphSAGEModel(in_channels, hidden_channels, out_channels)
            self.model.load_state_dict(
                torch.load(self.config.model_state_path, map_location=self.device)
            )
            self.model.to(self.device)
            self.model.eval()

            self.logger.info("Artifacts loaded successfully.")
            return True
        except FileNotFoundError as fnf_e:
            self.logger.error(
                f"Could not find trained model artifacts. Please run the training pipeline first. Error: {fnf_e}"
            )
            print(
                f"DEBUG: FileNotFoundError during artifact loading: {fnf_e}"
            )  # Added for debugging
            return False
        except Exception as e:
            self.logger.error(f"An error occurred while loading artifacts: {e}")
            print(
                f"DEBUG: General Exception during artifact loading: {e}"
            )  # Added for debugging
            # Re-raise for debugging if needed, but for Gradio, returning False is often better
            # raise
            return False

    def get_recommendations(
        self,
        source_url: str,
        top_n: int = 20,
        min_folder_depth: int = 0,
        max_folder_depth: int = 10,
        folder_path_filter: str = None,
    ):
        # The load_artifacts call is crucial here. If it returns False, we return None.
        if not self.load_artifacts():
            return (
                None,
                "Error: Trained model artifacts not found. Please run the training pipeline first.",
            )
        if source_url not in self.url_to_idx:
            return (
                None,
                f"Error: Source URL '{source_url}' not found in the graph's training data.",
            )

        source_idx = self.url_to_idx[source_url]
        num_nodes = len(self.url_to_idx)

        # 1. Generate scores for all possible links from the source node
        candidate_dest_indices = torch.arange(num_nodes, device=self.device)
        candidate_source_indices = torch.full_like(
            candidate_dest_indices, fill_value=source_idx
        )
        candidate_edge_index = torch.stack(
            [candidate_source_indices, candidate_dest_indices]
        )

        with torch.no_grad():
            scores = self.model.predict_link(self.node_embeddings, candidate_edge_index)

        # 2. Create a DataFrame from all possible candidates
        all_candidates_df = pd.DataFrame(
            {
                "DEST_IDX": candidate_dest_indices.cpu().numpy(),
                "SCORE": torch.sigmoid(scores).cpu().numpy(),
            }
        )

        # 3. Add URL and FOLDER_DEPTH columns
        # Use .get() with a default value to handle missing keys and prevent KeyError
        all_candidates_df["RECOMMENDED_URL"] = all_candidates_df["DEST_IDX"].apply(
            lambda idx: self.idx_to_url.get(idx, None)
        )

        # Drop rows with invalid URLs (where index was not found in mapping)
        all_candidates_df.dropna(subset=["RECOMMENDED_URL"], inplace=True)

        all_candidates_df["FOLDER_DEPTH"] = all_candidates_df["RECOMMENDED_URL"].apply(
            lambda url: self.url_processor.get_folder_depth(url)
        )

        # 4. Filter the DataFrame based on all criteria
        filtered_df = all_candidates_df.copy()

        # Filter out self-links
        filtered_df = filtered_df[filtered_df["DEST_IDX"] != source_idx]

        # Filter out existing links
        # Create a tuple column for easy set membership check
        filtered_df["SOURCE_IDX"] = source_idx
        filtered_df["EDGE_TUPLE"] = list(
            zip(filtered_df["SOURCE_IDX"], filtered_df["DEST_IDX"])
        )
        filtered_df = filtered_df[~filtered_df["EDGE_TUPLE"].isin(self.existing_edges)]

        # Apply the folder depth filter
        filtered_df = filtered_df[
            (filtered_df["FOLDER_DEPTH"] >= min_folder_depth)
            & (filtered_df["FOLDER_DEPTH"] <= max_folder_depth)
        ]

        # Apply the folder path filter if provided
        if folder_path_filter:
            self.logger.info(f"Applying folder path filter: {folder_path_filter}")
            filtered_df = filtered_df[
                filtered_df["RECOMMENDED_URL"].str.startswith(folder_path_filter)
            ]

        # 5. Sort the filtered DataFrame by score and take the top N
        final_recommendations_df = filtered_df.sort_values(
            by="SCORE", ascending=False
        ).head(top_n)

        # 6. Select the final columns and return
        final_recommendations_df = final_recommendations_df[
            ["RECOMMENDED_URL", "SCORE", "FOLDER_DEPTH"]
        ]

        if final_recommendations_df.empty:
            return (
                pd.DataFrame(),  # Return empty DataFrame for consistency
                "No recommendations found matching the criteria (filters, existing links, etc.). Try adjusting filters or source URL.",
            )

        return final_recommendations_df, None


# --- Gradio Application Logic ---

# Instantiate real classes
logger = ILogger()  # Now instantiates the concrete ConsoleLogger
url_processor = URLProcessor()
config = LinkPredictionConfig()  # This will now use the updated paths
recommendation_engine = RecommendationEngine(config, logger, url_processor)


def process_csv_for_recommendations(csv_file, min_depth: int, max_depth: int):
    """
    Gradio function to process the uploaded CSV and generate recommendations.
    Returns the DataFrame for display and the path to the saved CSV for download.
    """
    # Define default empty DataFrame and file paths for error cases
    empty_df_display = pd.DataFrame(
        [["", "", "", "", "", "Please upload a CSV file."]],
        columns=[
            "NEW_FROM",
            "NEW_FROM_DEPTH",
            "NEW_TO",
            "NEW_TO_DEPTH",
            "Candidate Score",
            "Status",
        ],
    )
    empty_file_path = None

    # Initial counts for display
    initial_graph_nodes = 0
    final_graph_nodes = 0
    new_nodes_added = 0
    total_rows_processed = 0
    successful_recommendations_count = 0
    original_edges_count = 0  # New: Count of edges in original graph
    upgraded_edges_count = 0  # New: Count of edges in upgraded graph
    edge_difference = 0  # New: Difference in edges

    # Output placeholders for Markdown
    md_total_rows = "Total Rows Processed: **0**"
    md_successful_recs = "Successful Recommendations: **0**"
    md_initial_graph = "Initial Graph Nodes: **0**"
    md_final_graph = "Final Graph Nodes (after processing input): **0**"
    md_new_nodes_added = "New Nodes from Input (not in graph): **0**"
    md_original_edges = "Original Graph Edges: **0**"  # New Markdown output
    md_upgraded_edges = "Upgraded Graph Edges: **0**"  # New Markdown output
    md_edge_difference = (
        "Difference in Edges (New - Original): **0**"  # New Markdown output
    )

    if csv_file is None:
        return (
            empty_df_display,
            empty_file_path,
            empty_file_path,
            empty_file_path,
            md_total_rows,
            md_successful_recs,
            md_initial_graph,
            md_final_graph,
            md_new_nodes_added,
            md_original_edges,
            md_upgraded_edges,
            md_edge_difference,
        )

    try:
        # Load artifacts once to get initial graph size
        if not recommendation_engine.load_artifacts():
            error_msg = (
                "Error: Model artifacts could not be loaded. Check paths and files."
            )
            return (
                pd.DataFrame(
                    [["", "", "", "", "", error_msg]],
                    columns=[
                        "NEW_FROM",
                        "NEW_FROM_DEPTH",
                        "NEW_TO",
                        "NEW_TO_DEPTH",
                        "Candidate Score",
                        "Status",
                    ],
                ),
                empty_file_path,
                empty_file_path,
                empty_file_path,
                md_total_rows,
                md_successful_recs,
                md_initial_graph,
                md_final_graph,
                md_new_nodes_added,
                md_original_edges,
                md_upgraded_edges,
                md_edge_difference,
            )

        initial_graph_nodes = len(recommendation_engine.url_to_idx)
        md_initial_graph = f"Initial Graph Nodes: **{initial_graph_nodes}**"

        df_input = pd.read_csv(csv_file.name)
        total_rows_processed = len(df_input)
        md_total_rows = f"Total Rows Processed: **{total_rows_processed}**"

        # Validate required columns
        required_cols = ["NEW_FROM", "NEW_FROM_DEPTH", "NEW_TO", "NEW_TO_DEPTH"]
        if not all(col in df_input.columns for col in required_cols):
            missing_cols = [col for col in required_cols if col not in df_input.columns]
            error_msg = f"Error: Missing columns: {', '.join(missing_cols)}."
            return (
                pd.DataFrame(
                    [["", "", "", "", "", error_msg]],
                    columns=[
                        "NEW_FROM",
                        "NEW_FROM_DEPTH",
                        "NEW_TO",
                        "NEW_TO_DEPTH",
                        "Candidate Score",
                        "Status",
                    ],
                ),
                empty_file_path,
                empty_file_path,
                empty_file_path,
                md_total_rows,
                md_successful_recs,
                md_initial_graph,
                md_initial_graph,
                "New Nodes from Input (not in graph): **0**",
                md_original_edges,
                md_upgraded_edges,
                md_edge_difference,
            )

        results = []
        successful_recommendations_count = 0

        # --- Generate Original Graph Edges CSV by loading from config.edge_csv_path ---
        try:
            df_original_edges = pd.read_csv(
                config.edge_csv_path
            )  # Load from the specified path
            # Ensure 'FROM' and 'TO' columns exist in the loaded CSV
            if (
                "FROM" not in df_original_edges.columns
                or "TO" not in df_original_edges.columns
            ):
                raise ValueError(
                    f"Original graph CSV '{config.edge_csv_path}' must contain 'FROM' and 'TO' columns."
                )
            original_edges_count = len(df_original_edges)
            md_original_edges = f"Original Graph Edges: **{original_edges_count}**"
        except FileNotFoundError:
            error_msg = f"Error: Original graph CSV not found at {config.edge_csv_path}. Please check the path in LinkPredictionConfig."
            return (
                pd.DataFrame(
                    [["", "", "", "", "", error_msg]],
                    columns=[
                        "NEW_FROM",
                        "NEW_FROM_DEPTH",
                        "NEW_TO",
                        "NEW_TO_DEPTH",
                        "Candidate Score",
                        "Status",
                    ],
                ),
                empty_file_path,
                empty_file_path,
                empty_file_path,
                md_total_rows,
                md_successful_recs,
                md_initial_graph,
                md_final_graph,
                md_new_nodes_added,
                md_original_edges,
                md_upgraded_edges,
                md_edge_difference,
            )
        except Exception as e:
            error_msg = f"Error loading original graph CSV: {e}"
            return (
                pd.DataFrame(
                    [["", "", "", "", "", error_msg]],
                    columns=[
                        "NEW_FROM",
                        "NEW_FROM_DEPTH",
                        "NEW_TO",
                        "NEW_TO_DEPTH",
                        "Candidate Score",
                        "Status",
                    ],
                ),
                empty_file_path,
                empty_file_path,
                empty_file_path,
                md_total_rows,
                md_successful_recs,
                md_initial_graph,
                md_final_graph,
                md_new_nodes_added,
                md_original_edges,
                md_upgraded_edges,
                md_edge_difference,
            )

        temp_file_original_graph = tempfile.NamedTemporaryFile(
            mode="w", delete=False, suffix="_original_graph.csv", encoding="utf-8"
        )
        original_graph_csv_path = temp_file_original_graph.name
        temp_file_original_graph.close()
        df_original_edges.to_csv(original_graph_csv_path, index=False)

        # --- Process Input CSV for Recommendations ---
        new_recommended_edges_data = []  # To store new links for the upgraded graph

        for index, row in df_input.iterrows():
            original_new_from = row["NEW_FROM"]
            original_new_from_depth = row["NEW_FROM_DEPTH"]
            original_new_to = row["NEW_TO"]
            new_to_depth_value = row["NEW_TO_DEPTH"]

            source_url_for_recommendation = original_new_to

            candidate_url = pd.NA
            candidate_score = pd.NA
            new_from_candidate_depth = pd.NA
            status = ""

            recommendations_df, error_msg = recommendation_engine.get_recommendations(
                source_url=source_url_for_recommendation,
                top_n=50,
                min_folder_depth=min_depth,
                max_folder_depth=max_depth,
            )

            if error_msg:
                status = f"Error: {error_msg}"
            elif recommendations_df.empty:
                status = "No recommendations found by model"
            else:
                recommendations_df["SCORE"] = recommendations_df["SCORE"].apply(
                    lambda x: max(x, 0.001)
                )
                total_score = recommendations_df["SCORE"].sum()

                if total_score > 0:
                    selected_row = recommendations_df.sample(
                        n=1, weights="SCORE", random_state=None
                    )
                    candidate_url = selected_row["RECOMMENDED_URL"].iloc[0]
                    candidate_score = selected_row["SCORE"].iloc[0]
                    new_from_candidate_depth = url_processor.get_folder_depth(
                        candidate_url
                    )
                    status = "Success (weighted random)"
                    successful_recommendations_count += 1

                    # Add this new recommended link to the list for the upgraded graph
                    new_recommended_edges_data.append(
                        {"FROM": source_url_for_recommendation, "TO": candidate_url}
                    )
                else:
                    status = "No valid scores for weighted random selection"

            results.append(
                {
                    "NEW_FROM": candidate_url,
                    "NEW_FROM_DEPTH": new_from_candidate_depth,
                    "NEW_TO": original_new_to,
                    "NEW_TO_DEPTH": new_to_depth_value,
                    "Candidate Score": candidate_score,
                    "Status": status,
                }
            )

        df_output_display = pd.DataFrame(results)
        md_successful_recs = (
            f"Successful Recommendations: **{successful_recommendations_count}**"
        )

        # --- Generate Upgraded Graph Edges CSV ---
        df_new_recommended_edges = pd.DataFrame(new_recommended_edges_data)

        # Combine original edges with new recommended edges
        df_upgraded_edges = (
            pd.concat([df_original_edges, df_new_recommended_edges])
            .drop_duplicates()
            .reset_index(drop=True)
        )

        upgraded_edges_count = len(df_upgraded_edges)  # Count upgraded edges
        md_upgraded_edges = f"Upgraded Graph Edges: **{upgraded_edges_count}**"

        edge_difference = (
            upgraded_edges_count - original_edges_count
        )  # Calculate difference
        md_edge_difference = f"Difference in Edges (New - Original): **{edge_difference}**"  # Display difference

        temp_file_upgraded_graph = tempfile.NamedTemporaryFile(
            mode="w", delete=False, suffix="_upgraded_graph.csv", encoding="utf-8"
        )
        upgraded_graph_csv_path = temp_file_upgraded_graph.name
        temp_file_upgraded_graph.close()
        df_upgraded_edges.to_csv(upgraded_graph_csv_path, index=False)

        # --- Calculate Node Counts for Display ---
        # Initial graph nodes (already calculated)
        # Final graph nodes: unique nodes from original edges + unique nodes from new recommended edges
        all_nodes_in_upgraded_graph = set(
            df_upgraded_edges["FROM"].dropna().unique()
        ) | set(df_upgraded_edges["TO"].dropna().unique())
        final_graph_nodes = len(all_nodes_in_upgraded_graph)
        md_final_graph = (
            f"Final Graph Nodes (after processing input): **{final_graph_nodes}**"
        )

        # New nodes added: nodes in the upgraded graph that were not in the initial graph
        new_nodes_from_input = len(
            set(df_input["NEW_TO"].dropna().unique())
            - set(recommendation_engine.url_to_idx.keys())
        )
        new_nodes_added = (
            final_graph_nodes - initial_graph_nodes
        )  # This is more accurate for total new nodes in graph
        md_new_nodes_added = f"New Nodes Added: **{new_nodes_added}**"

        # --- Save Display DataFrame to a temporary CSV file (for the primary download button) ---
        temp_file_display_output = tempfile.NamedTemporaryFile(
            mode="w",
            delete=False,
            suffix="_recommendations_output.csv",
            encoding="utf-8",
        )
        display_output_csv_path = temp_file_display_output.name
        temp_file_display_output.close()
        df_output_display.to_csv(display_output_csv_path, index=False)

        # Return all outputs
        return (
            df_output_display,
            display_output_csv_path,  # Primary recommendations output
            original_graph_csv_path,  # Original graph edges
            upgraded_graph_csv_path,  # Upgraded graph edges
            md_total_rows,
            md_successful_recs,
            md_initial_graph,
            md_final_graph,
            md_new_nodes_added,
            md_original_edges,  # New output
            md_upgraded_edges,  # New output
            md_edge_difference,  # New output
        )

    except Exception as e:
        error_message = f"An unexpected error occurred: {e}"
        # Return error state for all outputs
        return (
            pd.DataFrame(
                [["", "", "", "", "", error_message]],
                columns=[
                    "NEW_FROM",
                    "NEW_FROM_DEPTH",
                    "NEW_TO",
                    "NEW_TO_DEPTH",
                    "Candidate Score",
                    "Status",
                ],
            ),
            empty_file_path,
            empty_file_path,
            empty_file_path,
            md_total_rows,
            md_successful_recs,
            md_initial_graph,
            md_final_graph,
            md_new_nodes_added,
            md_original_edges,
            md_upgraded_edges,
            md_edge_difference,
        )


# --- Gradio Interface Definition ---
with gr.Blocks(title="WebKnoGraph Link Recommender") as demo:
    gr.Markdown(
        """
        # WebKnoGraph Link Recommender
        Upload a CSV file with columns: `NEW_FROM`, `NEW_FROM_DEPTH`, `NEW_TO`, `NEW_TO_DEPTH`.
        The system will suggest a new candidate URL for each `NEW_TO` URL,
        storing the result in the `NEW_FROM` column.
        The selection uses a weighted random approach from the model's top recommendations.
        """
    )

    with gr.Row():
        csv_input = gr.File(
            label="Upload CSV File", type="filepath", file_types=[".csv"]
        )
        submit_button = gr.Button("Generate Recommendations")

    with gr.Row():
        min_depth_input = gr.Number(label="Minimum Folder Depth", value=0, precision=0)
        max_depth_input = gr.Number(label="Maximum Folder Depth", value=10, precision=0)

    # New components for displaying counts
    with gr.Row():
        total_rows_output = gr.Markdown("Total Rows Processed: **0**")
        successful_recs_output = gr.Markdown("Successful Recommendations: **0**")
    with gr.Row():  # Separate row for graph node counts
        initial_graph_nodes_output = gr.Markdown("Initial Graph Nodes: **0**")
        final_graph_nodes_output = gr.Markdown(
            "Final Graph Nodes (after processing input): **0**"
        )
        new_nodes_added_output = gr.Markdown("New Nodes Added: **0**")
    with gr.Row():  # New row for edge counts
        original_edges_output = gr.Markdown("Original Graph Edges: **0**")
        upgraded_edges_output = gr.Markdown("Upgraded Graph Edges: **0**")
        edge_difference_output = gr.Markdown(
            "Difference in Edges (New - Original): **0**"
        )

    output_dataframe = gr.DataFrame(
        headers=[
            "NEW_FROM",
            "NEW_FROM_DEPTH",
            "NEW_TO",
            "NEW_TO_DEPTH",
            "Candidate Score",
            "Status",
        ],
        row_count=0,  # Gradio will dynamically adjust row count
        col_count=6,  # Explicitly set column count
        wrap=True,
        interactive=False,
        label="Recommendation Results",
    )

    # New Gradio File components for output downloads
    download_recommendations_csv = gr.File(
        label="Download Recommendations Output CSV",
        type="filepath",
        file_types=[".csv"],
        visible=False,
    )
    download_original_graph_csv = gr.File(
        label="Download Original Graph Edges CSV",
        type="filepath",
        file_types=[".csv"],
        visible=False,
    )
    download_upgraded_graph_csv = gr.File(
        label="Download Upgraded Graph Edges CSV",
        type="filepath",
        file_types=[".csv"],
        visible=False,
    )

    submit_button.click(
        fn=process_csv_for_recommendations,
        inputs=[csv_input, min_depth_input, max_depth_input],  # Pass new depth inputs
        outputs=[
            output_dataframe,
            download_recommendations_csv,  # Primary recommendations output
            download_original_graph_csv,  # Original graph edges
            download_upgraded_graph_csv,  # Upgraded graph edges
            total_rows_output,
            successful_recs_output,
            initial_graph_nodes_output,
            final_graph_nodes_output,
            new_nodes_added_output,
            original_edges_output,  # New output
            upgraded_edges_output,  # New output
            edge_difference_output,  # New output
        ],  # Now outputs all components
        api_name="process_csv",
    ).then(
        # Make the download buttons visible only after processing is complete and file paths are returned
        # The lambda function receives all outputs from the previous function.
        # We need to map them correctly to the inputs of the next .then() call.
        # The first output (df_output_display) is not passed to the .then() as it's already displayed.
        # The next three are the file paths.
        lambda rec_csv,
        orig_csv,
        upg_csv,
        *args: (  # Catch extra args from previous outputs
            gr.File(visible=True, value=rec_csv),
            gr.File(visible=True, value=orig_csv),
            gr.File(visible=True, value=upg_csv),
        ),
        inputs=[
            download_recommendations_csv,
            download_original_graph_csv,
            download_upgraded_graph_csv,
            total_rows_output,
            successful_recs_output,
            initial_graph_nodes_output,
            final_graph_nodes_output,
            new_nodes_added_output,
            original_edges_output,
            upgraded_edges_output,
            edge_difference_output,
        ],  # Pass all outputs from previous step
        outputs=[
            download_recommendations_csv,
            download_original_graph_csv,
            download_upgraded_graph_csv,
        ],
    )

if __name__ == "__main__":
    demo.launch()