In [1]:
import os
import pandas as pd
import os
import pandas as pd
import numpy as np
from scipy import stats


# Set1 model large dataset RNA binders vs Protein binders

In [2]:
# df_gated = pd.read_csv('set1_ml_models/gatedgraphconv/test_results_GatedGraphConv.csv')
# df_gatv2 = pd.read_csv('set1_ml_models/gatv2conv/test_results_GATv2Conv.csv')
# df_sage = pd.read_csv('set1_ml_models/sageconv/test_results_SageConv.csv')


In [3]:
# Define the base directory where CSV files are located
base_dir = 'set1_ml_models'

# Models and the associated directories
models = {
    'GatedGraphConv': 'gatedgraphconv',
    'GATv2Conv': 'gatv2conv',
    'SageConv': 'sageconv'
}

# Prepare to collect all DataFrame objects
dataframes = []

# Iterate over each model and their associated directory
for model, dir_name in models.items():
    # Construct the path to the CSV file
    csv_path = os.path.join(base_dir, dir_name, f'test_results_{model}.csv')
    
    # Check if the file exists
    if os.path.exists(csv_path):
        # Load the CSV file
        df = pd.read_csv(csv_path)
        # Add a column for the model
        df['Model'] = model
        # Append the DataFrame to the list
        dataframes.append(df)
    else:
        print(f"File not found: {csv_path}")

# Concatenate all DataFrames into a single DataFrame
all_data = pd.concat(dataframes, ignore_index=True)
# rename col F1-score to F1 score
all_data.rename(columns={'F1-Score': 'F1 Score'}, inplace=True)
all_data.rename(columns={'ROC-AUC': 'ROC AUC'}, inplace=True)
# Display the combined DataFrame
all_data

Unnamed: 0,Run ID,Test Loss,Accuracy,Precision,Recall,F1 Score,ROC AUC,Model
0,3,0.293396,0.873934,0.874418,0.873289,0.873853,0.873934,GatedGraphConv
1,7,0.328256,0.857401,0.865136,0.84681,0.855875,0.857401,GatedGraphConv
2,1,0.318745,0.860049,0.865191,0.85301,0.859057,0.860049,GatedGraphConv
3,5,0.291239,0.876195,0.854362,0.907001,0.879895,0.876195,GatedGraphConv
4,4,0.285744,0.878714,0.854449,0.912942,0.882728,0.878714,GatedGraphConv
5,9,0.283506,0.881361,0.887416,0.873547,0.880427,0.881361,GatedGraphConv
6,2,0.3084,0.872255,0.88581,0.854689,0.869971,0.872255,GatedGraphConv
7,10,0.30482,0.877874,0.889911,0.862439,0.875959,0.877874,GatedGraphConv
8,8,0.300888,0.866249,0.890834,0.834797,0.861906,0.866249,GatedGraphConv
9,6,0.271804,0.888724,0.884601,0.894084,0.889317,0.888724,GatedGraphConv


In [4]:
# Assuming 'accuracy' is the column name that contains accuracy scores
# Find the row with the maximum accuracy
best_model_row = all_data.loc[all_data['Accuracy'].idxmax()]

# Extract information about the best model
best_accuracy = best_model_row['Accuracy']
best_model_run_id = best_model_row['Run ID']
model = best_model_row['Model']

print(f"Set1: Best {model} with the highest accuracy is from run {best_model_run_id} with an accuracy of {best_accuracy}.")


Set1: Best GatedGraphConv with the highest accuracy is from run 6 with an accuracy of 0.8887238439679669.


In [5]:
# Models and metrics to analyze
models = ['GatedGraphConv', 'GATv2Conv', 'SageConv']
metrics = ['Accuracy', 'Precision', 'Recall', 'F1 Score', 'ROC AUC']

results = []

# Calculate metrics for each model
for model in models:
    model_data = all_data[all_data['Model'] == model]
    result_row = {'Model': model}
    for metric in metrics:
        mean = model_data[metric].mean() * 100  # Convert to percentage
        std = model_data[metric].std() * 100  # Convert to percentage
        # Format as mean ± std%
        result_row[metric] = f"{mean:.2f} ± {std:.2f}%"
    results.append(result_row)

# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Identify the best scores and bold them
for metric in metrics:
    best_score = max(results_df[metric], key=lambda x: float(x.split(' ± ')[0].replace('%','')))
    results_df[metric] = results_df[metric].apply(
        lambda x: f"\\textbf{{{x}}}" if x == best_score else x
    )

# Prepare the DataFrame to LaTeX table contents without adding LaTeX table commands
# Escape LaTeX special characters
results_df.replace('%', r'\%', regex=True, inplace=True)
table_contents = results_df.to_latex(index=False, escape=False, header=False)

# Strip unwanted LaTeX commands from the table contents
table_contents = table_contents.replace("\\begin{tabular}{llllll}", "") \
                               .replace("\\toprule", "") \
                               .replace("\\midrule", "") \
                               .replace("\\bottomrule", "") \
                               .replace("\\end{tabular}", "")\
                               .replace("±", "$\pm$")

# Manually construct the LaTeX table using tabularx to fit the width of the text
latex_table = (
    "\\begin{table}[ht]\n"
    "\\centering\n"
    "\\caption{RNA Binders vs Protein Binders (Merged Dataset)}\n"
    "\\label{tab:model_performance}\n"
    "\\scriptsize\n"
    "\\begin{tabularx}{\\textwidth}{Xccccc}\n"  # Changed 'l' to 'X' for the first column
    "\\toprule\n"
    "Model & " + " & ".join(metrics) + " \\\\\n"
    "\\midrule\n"
    + table_contents.strip() +  # Remove leading/trailing whitespace
    "\\bottomrule\n"
    "\\end{tabularx}\n"
    "\\end{table}\n"
)

print(latex_table)


\begin{table}[ht]
\centering
\caption{RNA Binders vs Protein Binders (Merged Dataset)}
\label{tab:model_performance}
\scriptsize
\begin{tabularx}{\textwidth}{Xccccc}
\toprule
Model & Accuracy & Precision & Recall & F1 Score & ROC AUC \\
\midrule
GatedGraphConv & \textbf{87.33 $\pm$ 0.97\%} & \textbf{87.52 $\pm$ 1.44\%} & 87.13 $\pm$ 2.62\% & \textbf{87.29 $\pm$ 1.10\%} & \textbf{87.33 $\pm$ 0.97\%} \\
GATv2Conv & 82.98 $\pm$ 1.88\% & 82.18 $\pm$ 2.72\% & 84.34 $\pm$ 1.29\% & 83.23 $\pm$ 1.65\% & 82.98 $\pm$ 1.88\% \\
SageConv & 86.53 $\pm$ 0.64\% & 85.96 $\pm$ 1.40\% & \textbf{87.37 $\pm$ 1.56\%} & 86.64 $\pm$ 0.62\% & 86.53 $\pm$ 0.64\% \\\bottomrule
\end{tabularx}
\end{table}



# Set2 model small datasets model1 RNA binders RNA nonbinders (ROBIN)

In [6]:
# Define the base directory where CSV files are located
base_dir = 'set2_ml_models/model1_rna_b_nb'

# Models and the associated directories
models = {
    'GatedGraphConv': 'gatedgraphconv',
    'GATv2Conv': 'gatv2conv',
    'SageConv': 'sageconv'
}

# Prepare to collect all DataFrame objects
dataframes = []

# Iterate over each model and their associated directory
for model, dir_name in models.items():
    # Construct the path to the CSV file
    csv_path = os.path.join(base_dir, dir_name, f'test_results_{model}.csv')
    
    # Check if the file exists
    if os.path.exists(csv_path):
        # Load the CSV file
        df = pd.read_csv(csv_path)
        # Add a column for the model
        df['Model'] = model
        # Append the DataFrame to the list
        dataframes.append(df)
    else:
        print(f"File not found: {csv_path}")

# Concatenate all DataFrames into a single DataFrame
all_data = pd.concat(dataframes, ignore_index=True)
# rename col F1-score to F1 score
all_data.rename(columns={'F1-Score': 'F1 Score'}, inplace=True)
all_data.rename(columns={'ROC-AUC': 'ROC AUC'}, inplace=True)
# Display the combined DataFrame
all_data

Unnamed: 0,Run ID,Test Loss,Accuracy,Precision,Recall,F1 Score,ROC AUC,Model
0,10,0.676465,0.562025,0.556064,0.61519,0.584135,0.562025,GatedGraphConv
1,1,0.653418,0.608861,0.618132,0.56962,0.592885,0.608861,GatedGraphConv
2,2,0.677116,0.589873,0.585956,0.612658,0.59901,0.589873,GatedGraphConv
3,3,0.679679,0.611392,0.65942,0.460759,0.542474,0.611392,GatedGraphConv
4,4,0.67571,0.549367,0.549872,0.544304,0.547074,0.549367,GatedGraphConv
5,5,0.659762,0.613924,0.671756,0.44557,0.535769,0.613924,GatedGraphConv
6,6,0.655806,0.6,0.608219,0.562025,0.584211,0.6,GatedGraphConv
7,7,0.657789,0.616456,0.616751,0.61519,0.61597,0.616456,GatedGraphConv
8,8,0.664012,0.598734,0.598485,0.6,0.599241,0.598734,GatedGraphConv
9,9,0.660796,0.597468,0.615616,0.518987,0.563187,0.597468,GatedGraphConv


In [7]:
# Assuming 'accuracy' is the column name that contains accuracy scores
# Find the row with the maximum accuracy
best_model_row = all_data.loc[all_data['Accuracy'].idxmax()]

# Extract information about the best model
best_accuracy = best_model_row['Accuracy']
best_model_run_id = best_model_row['Run ID']
model = best_model_row['Model']

print(f"Set1: Best {model} with the highest accuracy is from run {best_model_run_id} with an accuracy of {best_accuracy}.")


Set1: Best SageConv with the highest accuracy is from run 1 with an accuracy of 0.6329113924050633.


In [8]:
# Models and metrics to analyze
models = ['GatedGraphConv', 'GATv2Conv', 'SageConv']
metrics = ['Accuracy', 'Precision', 'Recall', 'F1 Score', 'ROC AUC']

results = []

# Calculate metrics for each model
for model in models:
    model_data = all_data[all_data['Model'] == model]
    result_row = {'Model': model}
    for metric in metrics:
        mean = model_data[metric].mean() * 100  # Convert to percentage
        std = model_data[metric].std() * 100  # Convert to percentage
        # Format as mean ± std%
        result_row[metric] = f"{mean:.2f} ± {std:.2f}%"
    results.append(result_row)

# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Identify the best scores and bold them
for metric in metrics:
    best_score = max(results_df[metric], key=lambda x: float(x.split(' ± ')[0].replace('%','')))
    results_df[metric] = results_df[metric].apply(
        lambda x: f"\\textbf{{{x}}}" if x == best_score else x
    )

# Prepare the DataFrame to LaTeX table contents without adding LaTeX table commands
# Escape LaTeX special characters
results_df.replace('%', r'\%', regex=True, inplace=True)
table_contents = results_df.to_latex(index=False, escape=False, header=False)

# Strip unwanted LaTeX commands from the table contents
table_contents = table_contents.replace("\\begin{tabular}{llllll}", "") \
                               .replace("\\toprule", "") \
                               .replace("\\midrule", "") \
                               .replace("\\bottomrule", "") \
                               .replace("\\end{tabular}", "")\
                               .replace("±", "$\pm$")

# Manually construct the LaTeX table using tabularx to fit the width of the text
latex_table = (
    "\\begin{table}[ht]\n"
    "\\centering\n"
    "\\caption{RNA Binders vs RNA Non-Binders (ROBIN)}\n"
    "\\label{tab:model_performance}\n"
    "\\scriptsize\n"
    "\\begin{tabularx}{\\textwidth}{Xccccc}\n"  # Changed 'l' to 'X' for the first column
    "\\toprule\n"
    "Model & " + " & ".join(metrics) + " \\\\\n"
    "\\midrule\n"
    + table_contents.strip() +  # Remove leading/trailing whitespace
    "\\bottomrule\n"
    "\\end{tabularx}\n"
    "\\end{table}\n"
)

print(latex_table)


\begin{table}[ht]
\centering
\caption{RNA Binders vs RNA Non-Binders (ROBIN)}
\label{tab:model_performance}
\scriptsize
\begin{tabularx}{\textwidth}{Xccccc}
\toprule
Model & Accuracy & Precision & Recall & F1 Score & ROC AUC \\
\midrule
GatedGraphConv & 59.48 $\pm$ 2.24\% & 60.80 $\pm$ 3.89\% & 55.44 $\pm$ 6.25\% & 57.64 $\pm$ 2.75\% & 59.48 $\pm$ 2.24\% \\
GATv2Conv & 59.49 $\pm$ 1.54\% & 60.05 $\pm$ 2.68\% & \textbf{58.03 $\pm$ 6.22\%} & \textbf{58.76 $\pm$ 2.58\%} & 59.49 $\pm$ 1.54\% \\
SageConv & \textbf{60.48 $\pm$ 1.59\%} & \textbf{63.15 $\pm$ 3.58\%} & 52.78 $\pm$ 10.92\% & 56.62 $\pm$ 5.75\% & \textbf{60.48 $\pm$ 1.59\%} \\\bottomrule
\end{tabularx}
\end{table}



# Set2 model small dataset model2 RNA binder (ROBIN) vs Protein Binder (Probes & Drugs)

In [9]:
# Define the base directory where CSV files are located
base_dir = 'set2_ml_models/model2_rna_b_prot_b'

# Models and the associated directories
models = {
    'GatedGraphConv': 'gatedgraphconv',
    'GATv2Conv': 'gatv2conv',
    'SageConv': 'sageconv'
}

# Prepare to collect all DataFrame objects
dataframes = []

# Iterate over each model and their associated directory
for model, dir_name in models.items():
    # Construct the path to the CSV file
    csv_path = os.path.join(base_dir, dir_name, f'test_results_{model}.csv')
    
    # Check if the file exists
    if os.path.exists(csv_path):
        # Load the CSV file
        df = pd.read_csv(csv_path)
        # Add a column for the model
        df['Model'] = model
        # Append the DataFrame to the list
        dataframes.append(df)
    else:
        print(f"File not found: {csv_path}")

# Concatenate all DataFrames into a single DataFrame
all_data = pd.concat(dataframes, ignore_index=True)
# rename col F1-score to F1 score
all_data.rename(columns={'F1-Score': 'F1 Score'}, inplace=True)
all_data.rename(columns={'ROC-AUC': 'ROC AUC'}, inplace=True)
# Display the combined DataFrame
all_data

Unnamed: 0,Run ID,Test Loss,Accuracy,Precision,Recall,F1 Score,ROC AUC,Model
0,10,0.283463,0.89507,0.924119,0.861111,0.891503,0.895113,GatedGraphConv
1,1,0.308676,0.883692,0.893782,0.871212,0.882353,0.883707,GatedGraphConv
2,2,0.319472,0.855879,0.872679,0.832911,0.852332,0.85585,GatedGraphConv
3,3,0.329741,0.87737,0.884021,0.868354,0.876117,0.877359,GatedGraphConv
4,4,0.363913,0.874842,0.925501,0.815657,0.867114,0.874917,GatedGraphConv
5,5,0.325881,0.89507,0.869976,0.929293,0.898657,0.895026,GatedGraphConv
6,6,0.30889,0.896334,0.924119,0.863291,0.89267,0.896292,GatedGraphConv
7,7,0.354651,0.863464,0.828375,0.916456,0.870192,0.863531,GatedGraphConv
8,8,0.274529,0.884956,0.876238,0.896203,0.886108,0.88497,GatedGraphConv
9,9,0.341584,0.854614,0.836538,0.881013,0.8582,0.854648,GatedGraphConv


In [10]:
# Assuming 'accuracy' is the column name that contains accuracy scores
# Find the row with the maximum accuracy
best_model_row = all_data.loc[all_data['Accuracy'].idxmax()]

# Extract information about the best model
best_accuracy = best_model_row['Accuracy']
best_model_run_id = best_model_row['Run ID']
model = best_model_row['Model']

print(f"Set1: Best {model} with the highest accuracy is from run {best_model_run_id} with an accuracy of {best_accuracy}.")


Set1: Best SageConv with the highest accuracy is from run 3 with an accuracy of 0.9039190897597976.


In [11]:
# Models and metrics to analyze
models = ['GatedGraphConv', 'GATv2Conv', 'SageConv']
metrics = ['Accuracy', 'Precision', 'Recall', 'F1 Score', 'ROC AUC']

results = []

# Calculate metrics for each model
for model in models:
    model_data = all_data[all_data['Model'] == model]
    result_row = {'Model': model}
    for metric in metrics:
        mean = model_data[metric].mean() * 100  # Convert to percentage
        std = model_data[metric].std() * 100  # Convert to percentage
        # Format as mean ± std%
        result_row[metric] = f"{mean:.2f} ± {std:.2f}%"
    results.append(result_row)

# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Identify the best scores and bold them
for metric in metrics:
    best_score = max(results_df[metric], key=lambda x: float(x.split(' ± ')[0].replace('%','')))
    results_df[metric] = results_df[metric].apply(
        lambda x: f"\\textbf{{{x}}}" if x == best_score else x
    )

# Prepare the DataFrame to LaTeX table contents without adding LaTeX table commands
# Escape LaTeX special characters
results_df.replace('%', r'\%', regex=True, inplace=True)
table_contents = results_df.to_latex(index=False, escape=False, header=False)

# Strip unwanted LaTeX commands from the table contents
table_contents = table_contents.replace("\\begin{tabular}{llllll}", "") \
                               .replace("\\toprule", "") \
                               .replace("\\midrule", "") \
                               .replace("\\bottomrule", "") \
                               .replace("\\end{tabular}", "")\
                               .replace("±", "$\pm$")

# Manually construct the LaTeX table using tabularx to fit the width of the text
latex_table = (
    "\\begin{table}[ht]\n"
    "\\centering\n"
    "\\caption{RNA Binders vs Protein Binders (ROBIN vs P\&D)}\n"
    "\\label{tab:model_performance}\n"
    "\\scriptsize\n"
    "\\begin{tabularx}{\\textwidth}{Xccccc}\n"  # Changed 'l' to 'X' for the first column
    "\\toprule\n"
    "Model & " + " & ".join(metrics) + " \\\\\n"
    "\\midrule\n"
    + table_contents.strip() +  # Remove leading/trailing whitespace
    "\\bottomrule\n"
    "\\end{tabularx}\n"
    "\\end{table}\n"
)

print(latex_table)


\begin{table}[ht]
\centering
\caption{RNA Binders vs Protein Binders (ROBIN vs P\&D)}
\label{tab:model_performance}
\scriptsize
\begin{tabularx}{\textwidth}{Xccccc}
\toprule
Model & Accuracy & Precision & Recall & F1 Score & ROC AUC \\
\midrule
GatedGraphConv & 87.81 $\pm$ 1.58\% & 88.35 $\pm$ 3.46\% & \textbf{87.36 $\pm$ 3.47\%} & 87.75 $\pm$ 1.54\% & 87.81 $\pm$ 1.58\% \\
GATv2Conv & 84.01 $\pm$ 4.22\% & 87.09 $\pm$ 4.12\% & 79.92 $\pm$ 7.17\% & 83.20 $\pm$ 4.89\% & 84.01 $\pm$ 4.22\% \\
SageConv & \textbf{88.03 $\pm$ 1.92\%} & \textbf{89.65 $\pm$ 2.63\%} & 86.12 $\pm$ 4.29\% & \textbf{87.76 $\pm$ 2.17\%} & \textbf{88.03 $\pm$ 1.93\%} \\\bottomrule
\end{tabularx}
\end{table}



# Set2 model small dataset model3 RNA Binder (ROBIN) vs Non-Binders (Merged small)

In [12]:
# Define the base directory where CSV files are located
base_dir = 'set2_ml_models/model3_binder_nonbinder'

# Models and the associated directories
models = {
    'GatedGraphConv': 'gatedgraphconv',
    'GATv2Conv': 'gatv2conv',
    'SageConv': 'sageconv'
}

# Prepare to collect all DataFrame objects
dataframes = []

# Iterate over each model and their associated directory
for model, dir_name in models.items():
    # Construct the path to the CSV file
    csv_path = os.path.join(base_dir, dir_name, f'test_results_{model}.csv')
    
    # Check if the file exists
    if os.path.exists(csv_path):
        # Load the CSV file
        df = pd.read_csv(csv_path)
        # Add a column for the model
        df['Model'] = model
        # Append the DataFrame to the list
        dataframes.append(df)
    else:
        print(f"File not found: {csv_path}")

# Concatenate all DataFrames into a single DataFrame
all_data = pd.concat(dataframes, ignore_index=True)
# rename col F1-score to F1 score
all_data.rename(columns={'F1-Score': 'F1 Score'}, inplace=True)
all_data.rename(columns={'ROC-AUC': 'ROC AUC'}, inplace=True)
# Display the combined DataFrame
all_data

Unnamed: 0,Run ID,Test Loss,Accuracy,Precision,Recall,F1 Score,ROC AUC,Model
0,10,0.515881,0.759494,0.730337,0.822785,0.77381,0.759494,GatedGraphConv
1,1,0.552205,0.702532,0.717391,0.668354,0.692005,0.702532,GatedGraphConv
2,2,0.534379,0.721519,0.697517,0.782278,0.73747,0.721519,GatedGraphConv
3,3,0.650029,0.639241,0.649457,0.605063,0.626474,0.639241,GatedGraphConv
4,4,0.555789,0.721519,0.696629,0.78481,0.738095,0.721519,GatedGraphConv
5,5,0.514893,0.749367,0.727483,0.797468,0.76087,0.749367,GatedGraphConv
6,6,0.544919,0.756962,0.737705,0.797468,0.766423,0.756962,GatedGraphConv
7,7,0.5536,0.739241,0.732187,0.75443,0.743142,0.739241,GatedGraphConv
8,8,0.572935,0.722785,0.688034,0.81519,0.746234,0.722785,GatedGraphConv
9,9,0.519586,0.74557,0.709957,0.83038,0.765461,0.74557,GatedGraphConv


In [13]:
# Assuming 'accuracy' is the column name that contains accuracy scores
# Find the row with the maximum accuracy
best_model_row = all_data.loc[all_data['Accuracy'].idxmax()]

# Extract information about the best model
best_accuracy = best_model_row['Accuracy']
best_model_run_id = best_model_row['Run ID']
model = best_model_row['Model']

print(f"Set1: Best {model} with the highest accuracy is from run {best_model_run_id} with an accuracy of {best_accuracy}.")


Set1: Best GatedGraphConv with the highest accuracy is from run 10 with an accuracy of 0.759493670886076.


In [14]:
# Models and metrics to analyze
models = ['GatedGraphConv', 'GATv2Conv', 'SageConv']
metrics = ['Accuracy', 'Precision', 'Recall', 'F1 Score', 'ROC AUC']

results = []

# Calculate metrics for each model
for model in models:
    model_data = all_data[all_data['Model'] == model]
    result_row = {'Model': model}
    for metric in metrics:
        mean = model_data[metric].mean() * 100  # Convert to percentage
        std = model_data[metric].std() * 100  # Convert to percentage
        # Format as mean ± std%
        result_row[metric] = f"{mean:.2f} ± {std:.2f}%"
    results.append(result_row)

# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Identify the best scores and bold them
for metric in metrics:
    best_score = max(results_df[metric], key=lambda x: float(x.split(' ± ')[0].replace('%','')))
    results_df[metric] = results_df[metric].apply(
        lambda x: f"\\textbf{{{x}}}" if x == best_score else x
    )

# Prepare the DataFrame to LaTeX table contents without adding LaTeX table commands
# Escape LaTeX special characters
results_df.replace('%', r'\%', regex=True, inplace=True)
table_contents = results_df.to_latex(index=False, escape=False, header=False)

# Strip unwanted LaTeX commands from the table contents
table_contents = table_contents.replace("\\begin{tabular}{llllll}", "") \
                               .replace("\\toprule", "") \
                               .replace("\\midrule", "") \
                               .replace("\\bottomrule", "") \
                               .replace("\\end{tabular}", "")\
                               .replace("±", "$\pm$")

# Manually construct the LaTeX table using tabularx to fit the width of the text
latex_table = (
    "\\begin{table}[ht]\n"
    "\\centering\n"
    "\\caption{RNA Binders (ROBIN) vs RNA Non-Binders (diverse)}\n"
    "\\label{tab:model_performance}\n"
    "\\scriptsize\n"
    "\\begin{tabularx}{\\textwidth}{Xccccc}\n"  # Changed 'l' to 'X' for the first column
    "\\toprule\n"
    "Model & " + " & ".join(metrics) + " \\\\\n"
    "\\midrule\n"
    + table_contents.strip() +  # Remove leading/trailing whitespace
    "\\bottomrule\n"
    "\\end{tabularx}\n"
    "\\end{table}\n"
)

print(latex_table)


\begin{table}[ht]
\centering
\caption{RNA Binders (ROBIN) vs RNA Non-Binders (diverse)}
\label{tab:model_performance}
\scriptsize
\begin{tabularx}{\textwidth}{Xccccc}
\toprule
Model & Accuracy & Precision & Recall & F1 Score & ROC AUC \\
\midrule
GatedGraphConv & 72.58 $\pm$ 3.55\% & 70.87 $\pm$ 2.69\% & \textbf{76.58 $\pm$ 7.30\%} & 73.50 $\pm$ 4.46\% & 72.58 $\pm$ 3.55\% \\
GATv2Conv & 69.75 $\pm$ 3.37\% & 71.27 $\pm$ 2.91\% & 66.86 $\pm$ 11.50\% & 68.34 $\pm$ 6.68\% & 69.75 $\pm$ 3.37\% \\
SageConv & \textbf{73.37 $\pm$ 1.16\%} & \textbf{72.41 $\pm$ 1.54\%} & 75.67 $\pm$ 3.97\% & \textbf{73.93 $\pm$ 1.67\%} & \textbf{73.37 $\pm$ 1.16\%} \\\bottomrule
\end{tabularx}
\end{table}

