In [1]:
import pandas as pd
import os
from matplotlib import pyplot as plt

def combine_files_by_base_name(directory):
    """
    Combines files in a directory based on their base name, excluding certain files.

    Parameters:
        directory (str): Path to the directory containing files to combine.

    Returns:
        dict: A dictionary where keys are base names and values are combined DataFrames.
    """
    file_groups = {}

    for filename in os.listdir(directory):
        if "Simple" not in filename:
            continue
        base_name = '_'.join(filename.split('_part')[0].split('_')[:-1])
        if base_name not in file_groups:
            file_groups[base_name] = []
        file_groups[base_name].append(filename)

    combined_dfs = {}
    for base_name, files in file_groups.items():
        combined_df = pd.DataFrame()

        for filename in sorted(files):  # Ensure files are processed in order of parts
            file_path = os.path.join(directory, filename)
            df = pd.read_csv(file_path)
            combined_df = pd.concat([combined_df, df], ignore_index=True)

        combined_dfs[base_name] = combined_df

    return combined_dfs

# Helper function to separate attacks based on step_num reset
def separate_attacks(df, length=10000):
    attacks = []
    current_attack = []
    
    for idx, row in df.iterrows():
        # Start a new attack if the step_num resets
        if idx > 0 and row['Num'] < df.loc[idx - 1, 'Num']:
            attacks.append(pd.DataFrame(current_attack))
            current_attack = []        

        current_attack.append(row)
    
    # Append the last attack
    if current_attack:
        attacks.append(pd.DataFrame(current_attack))
    
    return attacks

def add_entropy_column(dfs):
    """
    Takes a list of DataFrames, checks if the length of the list is a multiple of 10,
    and adds an 'entropy' column to each DataFrame, containing the remainder modulo 10 of the DataFrame's position in the list.

    Parameters:
        dfs (list of pd.DataFrame): List of DataFrames.

    Returns:
        list of pd.DataFrame: The updated list of DataFrames with the added 'entropy' column.

    Raises:
        ValueError: If the length of the list is not a multiple of 10.
    """
    if len(dfs) % 10 != 0:
        raise ValueError("The length of the list must be a multiple of 10.")

    for i, df in enumerate(dfs):
        df['entropy'] = i % 10

    return dfs

def process_and_add_entropy(file_path, separate_attacks, add_entropy_column):
    """
    Streamlines the processing of a CSV file: reads it, extracts domain from filename,
    adds a domain column, separates attacks, and adds an entropy column.

    Parameters:
        file_path (str): Path to the CSV file.
        separate_attacks (function): Function to separate attacks from the DataFrame.
        add_entropy_column (function): Function to add an entropy column to the list of DataFrames.

    Returns:
        list of pd.DataFrame: List of processed DataFrames with entropy column added.
    """
    domain = os.path.basename(file_path).split('_')[-2]
    df = pd.read_csv(file_path)
    df['domain'] = domain
    attacks = separate_attacks(df)
    return add_entropy_column(attacks)

def check_unique_values(df, columns):
    """
    Checks whether specified columns in a DataFrame have a single unique value and extracts this value.

    Parameters:
        df (pd.DataFrame): The DataFrame to check.
        columns (list of str): List of column names to check.

    Returns:
        dict: A dictionary with column names as keys and their unique value if single, else None.
    """
    unique_values = {}
    for col in columns:
        unique_vals = df[col].unique()
        if len(unique_vals) == 1:
            unique_values[col] = unique_vals[0]
        else:
            unique_values[col] = None
    return unique_values

def return_correct_flipped_correct(df, prefix=""):
    df.loc[:, f'{prefix}correct'] = (df['Origin'] == df[f'{prefix}choice']).astype(float)
    df.loc[:, f'{prefix}flipped_correct'] = (df['Origin'] == df[f'{prefix}flipped_choice']).astype(float)
    df.loc[:, f'{prefix}avg_correct'] = (df[f'{prefix}correct'] + df[f'{prefix}flipped_correct']) / 2
    return df[f'{prefix}correct'].mean(), df[f'{prefix}flipped_correct'].mean(),df[f'{prefix}avg_correct']. mean()


In [2]:
# File paths
# file_paths = [
#     '/data2/borito1907/sandcastles/distinguisher/results/long_InternLMOracle_GPT4o_unwatermarked_SentenceMutator_news_SimpleDistinguisher.csv',
#     '/data2/borito1907/sandcastles/distinguisher/results/long_InternLMOracle_GPT4o_unwatermarked_SentenceMutator_paris_SimpleDistinguisher.csv',
#     '/data2/borito1907/sandcastles/distinguisher/results/long_InternLMOracle_GPT4o_unwatermarked_SentenceMutator_space_SimpleDistinguisher.csv'.
# ]

dir = '/data2/borito1907/sandcastles/distinguisher/results'

# file_paths = [os.path.join(dir, f) for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]
# file_paths = [file for file in file_paths if "Simple" in file]


combined_dataframes = combine_files_by_base_name(dir)

# Process each combined DataFrame
all_attacks = []
all_num_attacks = []
for base_name, combined_df in combined_dataframes.items():
    combined_df['domain'] = base_name.split('_')[-1]  # Infer domain from base name
    attacks = separate_attacks(combined_df)
    attacks = add_entropy_column(attacks)

    num_attacks = [attack[attack['Num'] >= 100] for attack in attacks]

    all_attacks.extend(attacks)
    all_num_attacks.extend(num_attacks)


# Combine original DataFrames for concatenated view (optional)
long = pd.concat(all_attacks, ignore_index=True)

In [3]:
data = []

for attack in all_num_attacks:    
    correct, flipped_correct, avg_correct = return_correct_flipped_correct(attack)
    d = check_unique_values(attack, ['domain', 'entropy'])

    data.append({
        'domain' : d['domain'],
        'entropy' : d['entropy'],
        'correct': correct,
        'flipped_correct': flipped_correct,
        'avg_correct': avg_correct
    })
    
data_df = pd.DataFrame(data)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = value


In [10]:
grouped_df = (
    data_df.groupby('domain')
      .agg({
          'correct': 'mean',
          'flipped_correct': 'mean',
          'avg_correct': 'mean'
      })
      .reset_index()
)

print(grouped_df)

  domain   correct  flipped_correct  avg_correct
0   news  0.880540         0.894260     0.887400
1  paris  0.889791         0.883439     0.886615
2  space  0.838374         0.838388     0.838381


In [8]:
grouped_df = (
    data_df.groupby('entropy')
      .agg({
          'correct': 'mean',
          'flipped_correct': 'mean',
          'avg_correct': 'mean'
      })
      .reset_index()
)

print(grouped_df)

   entropy   correct  flipped_correct  avg_correct
0      0.0  0.846510         0.855210     0.850860
1      1.0  0.893931         0.866763     0.880347
2      2.0  0.891955         0.874077     0.883016
3      3.0  0.876027         0.867196     0.871612
4      4.0  0.852963         0.899648     0.876306
5      5.0  0.890632         0.876979     0.883805
6      6.0  0.844089         0.842455     0.843272
7      7.0  0.892383         0.903987     0.898185
8      8.0  0.865311         0.864449     0.864880
9      9.0  0.832504         0.863010     0.847757


In [6]:
# Create a list to store the attacks and their corresponding avg values
attack_avg_list = []

for attack in all_attacks:
    correct, flipped, avg = return_correct_flipped_correct(attack)
    attack_avg_list.append((attack, avg))  # Append the attack and its avg value as a tuple

# Sort the list by the avg value (second element of the tuple)
sorted_attacks = sorted(attack_avg_list, key=lambda x: x[1])

# Extract the sorted attacks and their averages separately if needed
sorted_attacks_list = [item[0] for item in sorted_attacks]
sorted_avgs = [item[1] for item in sorted_attacks]

# # Optionally, print the sorted averages for verification
# for attack, avg in sorted_attacks:
#     print(f"Attack: {attack}, Avg: {avg}")


In [7]:
import matplotlib.pyplot as plt
import pandas as pd
from ipywidgets import interact, IntSlider

# Function to plot cumulative avg progression for a given attack index
def plot_cumulative_avg(attack_index):
    # Get the attack for the given index
    attack = sorted_attacks_list[attack_index]

    avg_values = []
    
    # Iterate over the rows of the DataFrame cumulatively
    for i in range(1, len(attack) + 1):
        # Select the first `i` rows
        subset = attack.iloc[:i]
        
        # Apply `return_correct_flipped_correct` to the subset
        correct, flipped, avg = return_correct_flipped_correct(subset)
        
        # Store the avg value
        avg_values.append(avg)

    # Plot the graph
    plt.figure(figsize=(8, 6))
    plt.plot(range(1, len(avg_values) + 1), avg_values, marker='o')
    plt.title(f'Cumulative Avg Progression for Attack Index {attack_index}')
    plt.xlabel('Number of Rows')
    plt.ylabel('Average')
    plt.grid(True)
    plt.show()

# Create an interactive slider to select attack index
interact(
    plot_cumulative_avg,
    attack_index=IntSlider(min=0, max=len(sorted_attacks_list) - 1, step=1, value=100)
)


interactive(children=(IntSlider(value=100, description='attack_index', max=1259), Output()), _dom_classes=('wiâ€¦

<function __main__.plot_cumulative_avg(attack_index)>