In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, roc_auc_score
import pandas as pd
import os


def calculate_roc_auc(negative_scores, positive_scores):
    # 创建标签，0代表人类写的，1代表机器生成的
    labels = np.array([0] * len(negative_scores) + [1] * len(positive_scores))
    # 合并所有得分
    scores = np.array(negative_scores + positive_scores)
    valid_indices = ~np.isnan(labels) & ~np.isnan(scores)
    labels = labels[valid_indices]
    scores = scores[valid_indices]
    # 计算AUC
    auc = roc_auc_score(labels, scores)
    fpr, tpr, _ = roc_curve(labels, scores)
    return auc, fpr, tpr

def draw_roc(human_scores, wm_score):
    auc_w, fpr_w, tpr_w = calculate_roc_auc(human_scores, wm_score)

    plt.figure()
    plt.plot(fpr_w, tpr_w, color='red', label=f'Adaptive watermarked (AUC = {auc_w:.4f})')

    # Diagonal line for random chance
    plt.plot([0, 1], [0, 1], color='gray', linestyle='--')

    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.1])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title(f"{os.path.basename(result_path).split('_')[0]} ROC")
    plt.legend(loc="lower right")
    plt.grid()
    # plt.savefig(f"outputs/roc.png", dpi=300, bbox_inches='tight')
    # plt.show()
    print('ROC-AUC:', round(auc_w*100, 2))



In [2]:
def get_auc(df):
    result_dict = {}
    human_scores = df['human_score'].to_list()

    for type_ in ['adaptive', 'paraphrased', 'spoofing', 'latter_spoofing']:
        wm_scores = df[f'{type_}_watermarked_text_score'].to_list()
        assert all(not np.isnan(score) for score in wm_scores), f'{type_} watermarked text scores contain NaN values'
        auc_w, _, _ = calculate_roc_auc(human_scores, wm_scores)
        result_dict[f'{type_}_auc'] = auc_w
    return result_dict


In [None]:
import os
import re
import glob

root_path = "/blue/buyuheng/li_an.ucsb/projects/watermark-simcse/watermarking/outputs/"
baseline_root = rf'/blue/buyuheng/li_an.ucsb/projects/baselines/adaptive-text-watermark/outputs'

train_dataset_names = ['c4']
wm_dataset_names = ['c4', 'lfqa']

results = []
baseline_results = []
for train_dataset_name in train_dataset_names:
    print(f'==================== train on {train_dataset_name} ====================')
    for wm_dataset_name in wm_dataset_names:
        print(f'------- wm on {wm_dataset_name} -------')
        pattern = re.compile(rf"""
            .*/{train_dataset_name}/(?P<model_name>[^/]+)/(?P<batch_size>\d+)batch_(?P<num_epoch>\d+)epochs/
            llama(?P<num_paraphrased_llama>\d+)-(?P<num_negative_llama>\d+)gpt(?P<num_paraphrased_gpt>\d+)-(?P<num_negative_gpt>\d+)-(?P<num_summary>\d+)/
            loss_cl(?P<cl_weight>[\d\.]+)-tl(?P<tl_weight>[\d\.]+)-wneg(?P<neg_weight>[\d\.]+)-margin(?P<margin>[\d\.]+)/
            wm-{wm_dataset_name}-alpha(?P<alpha>[\d\.]+)-delta(?P<delta_0>[\d\.]+)\|(?P<delta>[\d\.]+)\.csv$
        """, re.VERBOSE)
        baseline_pattern = re.compile(rf".*/wm-{wm_dataset_name}-alpha(?P<alpha>[\d\.]+)-delta(?P<delta_0>[\d\.]+)\|(?P<delta>[\d\.]+)\.csv$", re.VERBOSE)

        # find mutual non-empty rows
        mutual_non_empty_indices = None
        for filepath in glob.iglob(root_path + "/**/*.csv", recursive=True):
            match = pattern.match(filepath)
            if match:
                df = pd.read_csv(filepath)
                if 'hate_watermarked_text_score' in df.columns:
                    df['spoofing_watermarked_text_score'] = df['hate_watermarked_text_score']
                if 'latter_spoofing_watermarked_text_score' not in df.columns:
                    continue
                df = df[['human_score', 'adaptive_watermarked_text_score', 'paraphrased_watermarked_text_score', 'spoofing_watermarked_text_score', 'latter_spoofing_watermarked_text_score']]
                non_empty_indices = set(df.dropna().index)

                if mutual_non_empty_indices is None:
                    mutual_non_empty_indices = non_empty_indices
                else:
                    mutual_non_empty_indices = mutual_non_empty_indices.intersection(non_empty_indices)
        for filepath in glob.iglob(baseline_root + "/**/*.csv", recursive=True):
            match = baseline_pattern.match(filepath)
            if match:
                df = pd.read_csv(filepath)
                df = df[['human_score', 'adaptive_watermarked_text_score', 'paraphrased_watermarked_text_score', 'spoofing_watermarked_text_score']]
                non_empty_indices = set(df.dropna().index)
                mutual_non_empty_indices = mutual_non_empty_indices.intersection(non_empty_indices)

        # get roc-auc results of mutual non-empty rows
        if mutual_non_empty_indices is not None:
            mutual_non_empty_indices = list(mutual_non_empty_indices)
            print(f'Number of mutual non-empty rows: {len(mutual_non_empty_indices)}')
            for filepath in glob.iglob(root_path + "/**/*.csv", recursive=True):
                match = pattern.match(filepath)
                if match:
                    df = pd.read_csv(filepath)
                    if 'hate_watermarked_text_score' in df.columns:
                        df['spoofing_watermarked_text_score'] = df['hate_watermarked_text_score']   
                    if 'latter_spoofing_watermarked_text_score' not in df.columns:
                        continue
                    df = df[['human_score', 'adaptive_watermarked_text_score', 'paraphrased_watermarked_text_score', 'spoofing_watermarked_text_score', 'latter_spoofing_watermarked_text_score']]
                    df = df.loc[mutual_non_empty_indices]
                    auc_result_dict = get_auc(df)

                    result_dict = {'train_dataset_name': train_dataset_name, 'wm_dataset_name': wm_dataset_name}
                    result_dict.update(match.groupdict())
                    result_dict.update(auc_result_dict)
                    results.append(result_dict)

            for filepath in glob.iglob(baseline_root + "/**/*.csv", recursive=True):
                match = baseline_pattern.match(filepath)
                if match:
                    df = pd.read_csv(filepath)
                    df = df[['human_score', 'adaptive_watermarked_text_score', 'paraphrased_watermarked_text_score', 'spoofing_watermarked_text_score']]
                    df = df.loc[mutual_non_empty_indices]
                    auc_result_dict = get_auc(df)
                    baseline_result_dict = {'wm_dataset_name': wm_dataset_name}
                    baseline_result_dict.update(match.groupdict())
                    baseline_result_dict.update(auc_result_dict)
                    baseline_results.append(baseline_result_dict)

------- wm on c4 -------
Number of mutual non-empty rows: 169


KeyError: "['latter_spoofing_watermarked_text_score'] not in index"

In [6]:
results = pd.DataFrame(results)
results.to_csv(os.path.join(root_path, 'roc_auc_results.csv'), index=False)

In [None]:
baseline_results = pd.DataFrame(baseline_results)
baseline_results.to_csv(os.path.join(root_path, 'baseline_roc_auc_results.csv'), index=False)

In [34]:
!pwd

/blue/buyuheng/li_an.ucsb/projects/watermark-simcse/watermarking/util
