In [2]:
import prepare_dataset
import pandas as pd
import os

dataset_noise_ratios = [0.005,0.01,0.02,0.05,0.1,0.2]
for noise in dataset_noise_ratios:
    data = prepare_dataset.aug_amazon_products(noise_ratio = noise)
    df = pd.DataFrame(data)
    noise_dataset_file = f"noisy_dataset_{noise}.csv"
    if os.path.exists(noise_dataset_file):
        df.to_csv(noise_dataset_file, mode='a', index=False, header=False)
    else:
        df.to_csv(noise_dataset_file, index=False)
    print(f"Dataset saved to {noise_dataset_file}")

Creating training data
Path to dataset files: /root/.cache/kagglehub/datasets/karkavelrajaj/amazon-sales-dataset/versions/1
Original data shape: (1465, 16)
Expanded data shape: (14640, 4)
Dataset saved to noisy_dataset_0.005.csv
Creating training data
Path to dataset files: /root/.cache/kagglehub/datasets/karkavelrajaj/amazon-sales-dataset/versions/1
Original data shape: (1465, 16)
Expanded data shape: (14640, 4)
Dataset saved to noisy_dataset_0.01.csv
Creating training data
Path to dataset files: /root/.cache/kagglehub/datasets/karkavelrajaj/amazon-sales-dataset/versions/1
Original data shape: (1465, 16)
Expanded data shape: (14640, 4)
Dataset saved to noisy_dataset_0.02.csv
Creating training data
Path to dataset files: /root/.cache/kagglehub/datasets/karkavelrajaj/amazon-sales-dataset/versions/1
Original data shape: (1465, 16)
Expanded data shape: (14640, 4)
Dataset saved to noisy_dataset_0.05.csv
Creating training data
Path to dataset files: /root/.cache/kagglehub/datasets/karkavelr

In [4]:
import pandas as pd
data = pd.read_csv("experiment_results.csv")
exp_id = "20250409090514" 
data['Exp_id'] = data['Exp_id'].astype(str)
filtered_data = data[data['Exp_id'] == exp_id]
# print(filtered_data)

# Create a dictionary to store the accuracy values
noise_accuracies = {}

# Algorithm,Noise_Ratio,T,s,Max_Included_Literals,Epochs,Platform,Total_Time,Accuracy
# Group the data by Algorithm and Noise Ratio to calculate average accuracies
grouped_data = filtered_data.groupby(['Algorithm', 'Noise_Ratio']).agg({'Accuracy': 'mean'}).reset_index()

# Pivot the data to get a structure suitable for LaTeX table generation
pivot_data = grouped_data.pivot(index='Noise_Ratio', columns='Algorithm', values='Accuracy')
   
# Generate LaTeX table
latex_table = """
\\begin{table}[h!]
\\centering
\\begin{tabular}{|c|c|c|c|}
\\hline
\\textbf{Noise Ratio} & \\textbf{GCN (\\%)} & \\textbf{GTM (\\%)} & \\textbf{TMClassifier (\\%)} \\\\ \\hline
"""

# Iterate over the pivot data to construct the table rows
for noise_ratio, row in pivot_data.iterrows():
    latex_table += f"{noise_ratio} & "
    latex_table += f"{row['Graph NN']:.2f} & {row['GraphTM']:.2f} & {row['TMClassifier']:.2f} \\\\ \\hline\n"

latex_table += "\\end{tabular}\n"
latex_table += "\\caption{Average accuracy comparison of GCN, GraphTM, and TMClassifier for varying noise ratios.}\n"
latex_table += "\\label{tab:recomm_sys_accuracy}\n"
latex_table += "\\end{table}"

print(latex_table)


\begin{table}[h!]
\centering
\begin{tabular}{|c|c|c|c|}
\hline
\textbf{Noise Ratio} & \textbf{GCN (\%)} & \textbf{GTM (\%)} & \textbf{TMClassifier (\%)} \\ \hline
0.005 & 83.39 & 98.73 & 76.73 \\ \hline
0.01 & 85.55 & 98.35 & 74.87 \\ \hline
0.02 & 83.57 & 97.73 & 72.24 \\ \hline
0.05 & 82.13 & 94.61 & 63.86 \\ \hline
0.1 & 75.93 & 89.85 & 49.48 \\ \hline
0.2 & 64.12 & 78.73 & 20.13 \\ \hline
\end{tabular}
\caption{Average accuracy comparison of GCN, GraphTM, and TMClassifier for varying noise ratios.}
\label{tab:recomm_sys_accuracy}
\end{table}


In [1]:
import pandas as pd

# Load the data
data = pd.read_csv("experiment_results.csv")
exp_id = "20250409090514"
data['Exp_id'] = data['Exp_id'].astype(str)

# Filter the data for the specified experiment ID
filtered_data = data[data['Exp_id'] == exp_id]

# Group the data by Algorithm and Noise Ratio to calculate average accuracies and standard deviations
grouped_data = filtered_data.groupby(['Algorithm', 'Noise_Ratio']).agg(
    Accuracy_mean=('Accuracy', 'mean'),
    Accuracy_std=('Accuracy', 'std')
).reset_index()

# Pivot the data to get a structure suitable for LaTeX table generation
pivot_data_mean = grouped_data.pivot(index='Noise_Ratio', columns='Algorithm', values='Accuracy_mean')
pivot_data_std = grouped_data.pivot(index='Noise_Ratio', columns='Algorithm', values='Accuracy_std')

# Start building the LaTeX table
latex_table = """
\\begin{table}[h!]
\\centering
\\begin{tabular}{|c|c|c|c|}
\\hline
\\textbf{Noise Ratio} & \\textbf{GCN (\\%)} & \\textbf{GraphTM (\\%)} & \\textbf{TMClassifier (\\%)} \\\\ \\hline
"""

# Iterate over the pivot data to construct the table rows with mean and standard deviation
for noise_ratio in pivot_data_mean.index:
    gcn_mean = pivot_data_mean.loc[noise_ratio, 'Graph NN']
    gcn_std = pivot_data_std.loc[noise_ratio, 'Graph NN']
    
    graph_tm_mean = pivot_data_mean.loc[noise_ratio, 'GraphTM']
    graph_tm_std = pivot_data_std.loc[noise_ratio, 'GraphTM']
    
    tm_classifier_mean = pivot_data_mean.loc[noise_ratio, 'TMClassifier']
    tm_classifier_std = pivot_data_std.loc[noise_ratio, 'TMClassifier']

    latex_table += f"{noise_ratio} & "
    latex_table += f"{gcn_mean:.2f} \\pm {gcn_std:.2f} & "
    latex_table += f"{graph_tm_mean:.2f} \\pm {graph_tm_std:.2f} & "
    latex_table += f"{tm_classifier_mean:.2f} \\pm {tm_classifier_std:.2f} \\\\ \\hline\n"

latex_table += "\\end{tabular}\n"
latex_table += "\\caption{Average accuracy and standard deviation comparison of GCN, GraphTM, and TMClassifier for varying noise ratios.}\n"
latex_table += "\\label{tab:recomm_sys_accuracy}\n"
latex_table += "\\{table}"

print(latex_table)



\begin{table}[h!]
\centering
\begin{tabular}{|c|c|c|c|}
\hline
\textbf{Noise Ratio} & \textbf{GCN (\%)} & \textbf{GraphTM (\%)} & \textbf{TMClassifier (\%)} \\ \hline
0.005 & 83.39 \pm 4.83 & 98.73 \pm 0.12 & 76.73 \pm 0.14 \\ \hline
0.01 & 85.55 \pm 6.99 & 98.35 \pm 0.08 & 74.87 \pm 0.12 \\ \hline
0.02 & 83.57 \pm 5.76 & 97.73 \pm 0.13 & 72.24 \pm 0.26 \\ \hline
0.05 & 82.13 \pm 5.30 & 94.61 \pm 0.34 & 63.86 \pm 0.34 \\ \hline
0.1 & 75.93 \pm 3.89 & 89.85 \pm 0.29 & 49.48 \pm 0.38 \\ \hline
0.2 & 64.12 \pm 3.07 & 78.73 \pm 0.75 & 20.13 \pm 0.04 \\ \hline
\end{tabular}
\caption{Average accuracy and standard deviation comparison of GCN, GraphTM, and TMClassifier for varying noise ratios.}
\label{tab:recomm_sys_accuracy}
\{table}


In [2]:
import pandas as pd

# Read the CSV file
data = pd.read_csv("experiment_results.csv")

# Define the experiment ID you want to filter
exp_id = "20250409090514"

# Ensure that Exp_id is treated as a string
data['Exp_id'] = data['Exp_id'].astype(str)

# Filter the data based on the experiment ID
filtered_data = data[data['Exp_id'] == exp_id]

# Group the data by Algorithm to calculate average accuracies and total time across all noise ratios
grouped_data = filtered_data.groupby('Algorithm').agg({'Accuracy': 'mean', 'Total_Time': 'mean'}).reset_index()

# Print the average results for each algorithm across all noise ratios
print("Averages across all noise ratios:")
for _, row in grouped_data.iterrows():
    algorithm = row['Algorithm']
    average_accuracy = row['Accuracy']
    average_total_time = row['Total_Time']
    
    # Print the results
    print(f"Algorithm: {algorithm}, Average Accuracy: {average_accuracy:.2f}%, Average Total Time: {average_total_time:.2f}s")


Averages across all noise ratios:
Algorithm: Graph NN, Average Accuracy: 79.11%, Average Total Time: 44.80s
Algorithm: GraphTM, Average Accuracy: 93.00%, Average Total Time: 133.75s
Algorithm: TMClassifier, Average Accuracy: 59.55%, Average Total Time: 1068.99s
