In [1]:
import numpy as np
import pandas as pd
import pandas as pd
import glob
import os

In [7]:
import pandas as pd
import numpy as np
import os

# Directory containing CSV files
csv_folder = "content/evaluation/model_pert"

# Directory for barplot images (not needed, keeping it in case)
output_dir = 'barplot_images_2'
os.makedirs(output_dir, exist_ok=True)

# List all CSV files in the folder
csv_files = glob.glob(os.path.join(csv_folder, "*.csv"))

# Initialize an empty list to store DataFrames
df_list = []

# Loop through all CSV files
for file in csv_files:
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file)

    # Extract the perturbation name and intensity using regex
    df[['Perturbation', 'Intensity']] = df['Weather Type'].str.extract(r"b'0020_(.+)_(\d)'")

    # Drop the original 'Weather Type' column
    df = df.drop('Weather Type', axis=1)
    
    # Convert 'Intensity' to an integer
    df['Intensity'] = df['Intensity'].astype(int)
    
    # Append the DataFrame to the list
    df_list.append(df)

# Concatenate all DataFrames into a single DataFrame
combined_df = pd.concat(df_list, ignore_index=True)

# Remove unnecessary columns
columns_to_remove = ['Average MAE', 'Overall Specificity', 'Average Pixel Accuracy', 'Overall Accuracy', 
                     'Overall Precision', 'Overall Recall', 'Overall Dice Coefficient']
combined_df = combined_df.drop(columns=columns_to_remove, errors='ignore')

# Sort values by Perturbation and Intensity
combined_df = combined_df.sort_values(by=['Perturbation', 'Intensity'])

# Calculate the mean for each perturbation group (without variance)
grouped_df = combined_df.groupby('Perturbation').agg(['mean']).reset_index()

# Flatten the MultiIndex in the column names
grouped_df.columns = ['Perturbation'] + ['_'.join(col).strip() for col in grouped_df.columns if col[0] != 'Perturbation']

# Function to normalize values between 0 and 1 for LaTeX barplots
def normalize_values_for_chart(values):
    min_value = np.min(values)
    max_value = np.max(values)
    
    if min_value == max_value:
        return [0.5 for _ in values]
    
    return [(val - min_value) / (max_value - min_value) for val in values]

# Function to generate LaTeX \Chart{} command for each perturbation and metric
def generate_chart_latex(perturbation, metric, df):
    """Generate the LaTeX \Chart{} command for the given perturbation and metric across all intensities."""
    intensities = df[df['Perturbation'] == perturbation]['Intensity']
    metric_values = df[df['Perturbation'] == perturbation][metric].values
    
    # Normalize the metric values for LaTeX barplot
    normalized_values = normalize_values_for_chart(metric_values)
    
    # Convert normalized values to LaTeX \Chart{} command with 5 bars
    chart_latex = f"\\Chart{{{normalized_values[0]:.2f}}}{{{normalized_values[1]:.2f}}}{{{normalized_values[2]:.2f}}}{{{normalized_values[3]:.2f}}}{{{normalized_values[4]:.2f}}}"
    
    return chart_latex

# Add LaTeX \Chart{} commands for each perturbation and metric
def add_chart_commands(df):
    for metric in combined_df.columns:
        if metric not in ['Perturbation', 'Intensity']:
            df[f'{metric}_Chart'] = df['Perturbation'].apply(lambda p: generate_chart_latex(p, metric, combined_df))
    return df

# Generate inline barplots for the metrics and store LaTeX commands in the DataFrame
grouped_df_with_charts = add_chart_commands(grouped_df)

# Custom LaTeX table generation with mean and inline barplots (no variance)
def dataframe_to_latex_with_charts(df):
    latex_lines = []  # Initialize the latex_lines list to collect LaTeX code
    num_metrics = (len(df.columns) - 1) // 2  # Exclude the Perturbation column (we now have mean and chart per metric)
    
    # Create the top-level header (one column for each metric, split into mean and chart)
    latex_lines.append("\\begin{tabular}{l%s}" % ("rr" * num_metrics))
    latex_lines.append("\\hline")
    
    # Add multi-column headers for each metric
    metric_names = ['Average MSE', 'Overall F1 Score', 'Overall IoU']  # Assuming these metrics from your table
    header_row_1 = ['Perturbation'] + [f"\\multicolumn{{2}}{{c}}{{{metric}}}" for metric in metric_names]
    latex_lines.append(" & ".join(header_row_1) + " \\\\")
    
    # Add the sub-headers (mean and barplot)
    header_row_2 = [''] + ['Mean', 'Trend'] * len(metric_names)
    latex_lines.append(" & ".join(header_row_2) + " \\\\")
    latex_lines.append("\\hline")
    
    # Add data rows
    for _, row in df.iterrows():
        row_str = [row['Perturbation']]
        for metric in metric_names:
            row_str.append(f"{row[f'{metric}_mean']:.2f}")
            row_str.append(f"{row[f'{metric}_Chart']}")
        latex_lines.append(" & ".join(row_str) + " \\\\")
    
    # Final closing line and end of table
    latex_lines.append("\\hline")
    latex_lines.append("\\end{tabular}")
    
    return "\n".join(latex_lines)

# Generate LaTeX table with inline barplots
latex_table = dataframe_to_latex_with_charts(grouped_df_with_charts)

# Save the LaTeX table to a file
with open("summary_table_with_charts.tex", "w") as f:
    f.write(latex_table)

print("LaTeX table with inline barplots saved as summary_table_with_charts.tex")

LaTeX table with inline barplots saved as summary_table_with_charts.tex


In [1]:
import pandas as pd
import numpy as np
import os
import glob

# Directory containing CSV files
csv_folder = "content/evaluation/model_pert"

# Directory for barplot images (not needed, keeping it in case)
output_dir = 'barplot_images_2'
os.makedirs(output_dir, exist_ok=True)

# List all CSV files in the folder
csv_files = glob.glob(os.path.join(csv_folder, "*.csv"))

# Initialize an empty list to store DataFrames
df_list = []

# Loop through all CSV files
for file in csv_files:
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file)

    # Extract the perturbation name and intensity using regex
    df[['Perturbation', 'Intensity']] = df['Weather Type'].str.extract(r"b'0020_(.+)_(\d)'")

    # Drop the original 'Weather Type' column
    df = df.drop('Weather Type', axis=1)
    
    # Convert 'Intensity' to an integer
    df['Intensity'] = df['Intensity'].astype(int)
    
    # Append the DataFrame to the list
    df_list.append(df)

# Concatenate all DataFrames into a single DataFrame
combined_df = pd.concat(df_list, ignore_index=True)

# Remove unnecessary columns
columns_to_remove = ['Average MAE', 'Overall Specificity', 'Average Pixel Accuracy', 'Overall Accuracy', 
                     'Overall Precision', 'Overall Recall', 'Overall Dice Coefficient']
combined_df = combined_df.drop(columns=columns_to_remove, errors='ignore')

# Sort values by Perturbation and Intensity
combined_df = combined_df.sort_values(by=['Perturbation', 'Intensity'])

# Group by Perturbation and calculate mean, max, min, and variance for 'Overall IoU'
grouped_df = combined_df.groupby('Perturbation').agg({
    'Overall IoU': ['mean', 'max', 'min', 'var']
}).reset_index()

# Rename the columns to have a single level instead of a MultiIndex
grouped_df.columns = ['Perturbation', 'IoU_Average', 'IoU_Max', 'IoU_Min', 'IoU_Variance']

# Custom LaTeX table generation for Overall IoU metrics
def dataframe_to_latex_with_iou(df):
    latex_lines = []  # Initialize the latex_lines list to collect LaTeX code
    
    # Create the top-level header (Overall IoU with four subcolumns)
    latex_lines.append("\\begin{tabular}{lrrrr}")
    latex_lines.append("\\hline")
    
    # Add multi-column headers for Overall IoU
    header_row_1 = ['Perturbation', '\\multicolumn{4}{c}{Overall IoU}']
    latex_lines.append(" & ".join(header_row_1) + " \\\\")
    
    # Add the sub-headers (Average, Max, Min, Variance)
    header_row_2 = ['', 'Average', 'Max', 'Min', 'Variance']
    latex_lines.append(" & ".join(header_row_2) + " \\\\")
    latex_lines.append("\\hline")
    
    # Add data rows
    for _, row in df.iterrows():
        row_str = [
            row['Perturbation'],
            f"{row['IoU_Average']:.2f}",
            f"{row['IoU_Max']:.2f}",
            f"{row['IoU_Min']:.2f}",
            f"{row['IoU_Variance']:.4f}"  # Variance typically requires more decimal precision
        ]
        latex_lines.append(" & ".join(row_str) + " \\\\")
    
    # Final closing line and end of table
    latex_lines.append("\\hline")
    latex_lines.append("\\end{tabular}")
    
    return "\n".join(latex_lines)

# Generate LaTeX table for IoU metrics
latex_table = dataframe_to_latex_with_iou(grouped_df)

# Save the LaTeX table to a file
with open("iou_summary_table.tex", "w") as f:
    f.write(latex_table)

print("LaTeX table with IoU metrics saved as iou_summary_table.tex")

LaTeX table with IoU metrics saved as iou_summary_table.tex
