### Closed-source LLMs

In [1]:
import pandas as pd

# Read the Excel file
df = pd.read_excel('results/closedsource_sent_responses.xlsx')
df.shape[0]

ImportError: Unable to import required dependencies:
numpy: Error importing numpy: you should not try to import numpy from
        its source directory; please exit the numpy source tree, and relaunch
        your python interpreter from there.

In [None]:
# filtered id list from google_filtered_id.txt
with open('google_filtered_ids.txt', 'r') as file:
    id_list = file.read().splitlines()

#filter df by id_list
df_filtered = df[df['ID'].isin(id_list)]
df_filtered.shape[0]

In [None]:
# Define categories (assuming you have a 'category' column in your DataFrame)
categories = ['business & finance', 'education', 'food & drink', 'movies',
             'music and audio', 'news and politics', 'style & fashion',
             'television', 'video gaming']
# categories = ['News and Politics']

# Define models to analyze
models = ['gpt-4o-mini', 'gemini-1.0-pro', 
         'claude-3-haiku-20240307', 'command-r']

# Define thresholds
thresholds = [0.83, 0.85, 0.9, 0.91, 0.95, 1.0]

# Print header
print("\nROUGE-L Score Analysis by Category:")
print("=" * 150)

# Print model headers with proper spacing
header = f"{'Category':<25}"
for model in models:
    header += f"{model:^30}"
print(header)

# Print threshold headers
threshold_header = " " * 25
for model in models:
    for threshold in thresholds:
        threshold_header += f"{threshold:^6}"
print(threshold_header)

print("-" * 150)

# Calculate and print results for each category
overall_results = {model: {'counts': {threshold: 0 for threshold in thresholds}} for model in models}

# Track unique sentences with scores above 0.85 for any model
sentences_above_threshold = set()

for category in categories:
    category_df = df_filtered[df_filtered['category'] == category]
    line = f"{category:<25}"
    
    for model in models:
        rouge_scores = category_df[f'{model}_rouge_l']
        counts = {
            threshold: sum(1 for score in rouge_scores if score >= threshold)
            for threshold in thresholds
        }
        
        # Update overall counts
        for threshold in thresholds:
            overall_results[model]['counts'][threshold] += counts[threshold]
            
        # Track sentences above 0.85
        sentences_above_085 = category_df[rouge_scores >= 0.85]['context_sentence']
        sentences_above_threshold.update(sentences_above_085)
        
        # Format counts with proper spacing
        for threshold in thresholds:
            line += f"{counts[threshold]:^6}"
    print(line)

# Print overall results
print("-" * 150)
line = "Overall".ljust(25)
for model in models:
    total_counts = overall_results[model]['counts']
    for threshold in thresholds:
        line += f"{total_counts[threshold]:^6}"
print(line)
print("=" * 150)

# Print total unique sentences above 0.85
print(f"\nTotal unique sentences with score >= 0.85 across all models: {len(sentences_above_threshold)}")

In [None]:
df['category']

### Open-source LLMs

In [None]:
import os
import json
from sklearn.metrics import accuracy_score, recall_score, precision_score

def readfile(filePath):
    lines= []
    with open(filePath,'r', encoding='utf-8') as f:
        lines = f.readlines()
    return lines


def readFile(filePath):
    lines = []
    with open(filePath,'r',encoding='utf-8') as f:
        lines = f.readlines()
    return lines

def analysisTable10Result(Texts, allDatas):
    results = {}
    msmNegativeResult = {}
    msmPositiveResult = {}
    for keyText in Texts:
        if keyText not in allDatas:
            continue
        if allDatas[keyText] == 0:
            msmNegativeResult[keyText] = Texts[keyText]
        else:
            msmPositiveResult[keyText] = Texts[keyText]
    r95 = 0
    r91 = 0
    r90 = 0
    r85 = 0
    r80 = 0
    r75 = 0
    r70 = 0
    r65 = 0
    r60 = 0
    r55 = 0
    for keyText in msmPositiveResult:
        v = msmPositiveResult[keyText]
        if v > 0.95:
            r95 = r95 + 1
        elif v >= 0.91:
            r91 = r91 + 1
        elif v > 0.90:
            r90 = r90 + 1
        elif v > 0.85:
            r85 = r85 + 1
        elif v > 0.80:
            r80 = r80 + 1
        elif v > 0.75:
            r75 = r75 + 1
        elif v > 0.70:
            r70 = r70 + 1
        elif v > 0.65:
            r65 = r65 + 1
            #print(keyText+':::::'+str(v))
        elif v > 0.60:
            r60 = r60 + 1
            #print(keyText+':::::'+str(v))
        else:
            r55 = r55 + 1
    result = []
    result.append(r95)
    result.append(r90)
    result.append(r85)
    result.append(r80)
    result.append(r75)
    result.append(r70)
    result.append(r60+r65)
    result.append(r55)
    results['positive'] = result
    r95 = 0
    r91 = 0
    r90 = 0
    r85 = 0
    r80 = 0
    r75 = 0
    r70 = 0
    r65 = 0
    r60 = 0
    r55 = 0
    for keyText in msmNegativeResult:
        v = msmNegativeResult[keyText]
        if v > 0.95:
            r95 = r95 + 1
        elif v >= 0.91:
            r91 = r91 + 1
        elif v > 0.90:
            r90 = r90 + 1
        elif v > 0.85:
            r85 = r85 + 1
        elif v > 0.80:
            r80 = r80 + 1
        elif v > 0.75:
            r75 = r75 + 1
        elif v > 0.70:
            r70 = r70 + 1
        elif v > 0.65:
            r65 = r65 + 1
            print(keyText)
            print(Texts[keyText])
        elif v > 0.60:
            r60 = r60 + 1
            print(keyText)
            print(Texts[keyText])
        else:
            r55 = r55 + 1


    result = []
    result.append(r95)
    result.append(r90)
    result.append(r85)
    result.append(r80)
    result.append(r75)
    result.append(r70)
    result.append(r60+r65)
    result.append(r55)
    results['negative'] = result
    return results

def forTable10():
    lines = []
    allDatas = {}
    with open('../trainDatas.txt','r', encoding='utf-8') as f:
        content = f.read()
        lines = json.loads(content)
    for line in lines:
        allDatas[line['input']] = line['label']

    # Memorization
    msmlines = []
    msmTexts = {}
    msmDir = 'result/msm/'
    msmFilenames = os.listdir(msmDir)
    for msmFilename in msmFilenames:
        msmFilePath = msmDir+msmFilename
        lines = readFile(msmFilePath)
        for line in lines:
            items = line.split('**************')
            words = items[1].split(' ')
            if len(words) <= 2:
                continue
            if int(items[3].replace('*','')) > 62:
                msmTexts[items[0] + '**********************' + items[1]] = float(items[4])
    msmResults = analysisTable10Result(msmTexts, allDatas)

    # SentenceBERT
    sbertlines = []
    sbertTexts = {}
    sbertDir = 'result/sbert/'
    sbertFilenames = os.listdir(sbertDir)
    for sbertFilename in sbertFilenames:
        sbertFilePath = sbertDir + sbertFilename
        lines = readFile(sbertFilePath)
        for line in lines:
            items = line.split('**************')
            words = items[1].split(' ')
            if len(words) <= 2:
                continue
            if int(items[3].replace('*', '')) > 62:
                sbertTexts[items[0] + '**********************' + items[1]] = float(items[4])
    sbertResults = analysisTable10Result(sbertTexts, allDatas)

    #ROUGE-L
    rougeLlines = []
    rougeLTexts = {}
    rougeLDir = 'result/2023/'
    rougeLFilenames = os.listdir(rougeLDir)
    for rougeLFilename in rougeLFilenames:
        rougeLFilePath = rougeLDir + rougeLFilename
        lines = readFile(rougeLFilePath)
        for line in lines:
            items = line.split('**************')
            words = items[1].split(' ')
            if len(words) <= 2:
                continue
            if int(items[3].replace('*', '')) > 62:
                rougeLTexts[items[0] + '**********************' + items[1]] = float(items[4])
    rougeLDir = 'result/2019/'
    rougeLFilenames = os.listdir(rougeLDir)
    for rougeLFilename in rougeLFilenames:
        rougeLFilePath = rougeLDir + rougeLFilename
        lines = readFile(rougeLFilePath)
        for line in lines:
            items = line.split('**************')
            words = items[1].split(' ')
            if len(words) <= 2:
                continue
            if int(items[3].replace('*', '')) > 62:
                rougeLTexts[items[0] + '**********************' + items[1]] = float(items[4])
    rougeLResults = analysisTable10Result(rougeLTexts, allDatas)

    print('TABLE 10: Comparison of similarity algorithms across LLMs in the sampled groundtruth dataset.')
    print('------------------------------------------------------------------------------------------------------------')
    print(' Similarity Algorithm   Dataset    <=0.6  0.6∼0.7  0.7∼0.75  0.75∼0.8  0.8∼0.85  0.85∼0.9  0.9∼0.95  0.95∼1')
    print('------------------------------------------------------------------------------------------------------------')
    negativeMsm = msmResults['negative']
    print("                        negative   %d   %d        %d         %d         %d         %d         %d         %d"%(negativeMsm[7], negativeMsm[6],negativeMsm[5], negativeMsm[4],negativeMsm[3], negativeMsm[2],negativeMsm[1], negativeMsm[0]))
    print(' Memorization')
    positiveMsm = msmResults['positive']
    print("                        positive   %d   %d       %d         %d         %d         %d         %d         %d"%(positiveMsm[7], positiveMsm[6],positiveMsm[5], positiveMsm[4],positiveMsm[3], positiveMsm[2],positiveMsm[1], positiveMsm[0]))
    print('------------------------------------------------------------------------------------------------------------')
    negativeSbert = sbertResults['negative']
    print("                        negative   %d    %d      %d       %d       %d       %d       %d         %d"%(negativeSbert[7], negativeSbert[6],negativeSbert[5], negativeSbert[4],negativeSbert[3], negativeSbert[2],negativeSbert[1], negativeSbert[0]))
    print(' SentenceBERT')
    positiveSbert = sbertResults['positive']
    print("                        positive   %d    %d      %d       %d       %d       %d       %d         %d"%(positiveSbert[7], positiveSbert[6],positiveSbert[5], positiveSbert[4],positiveSbert[3], positiveSbert[2],positiveSbert[1], positiveSbert[0]))
    print('------------------------------------------------------------------------------------------------------------')
    negativeRougeL = rougeLResults['negative']
    print("                        negative   %d    %d       %d         %d         %d         %d        %d          %d"%(negativeRougeL[7], negativeRougeL[6],negativeRougeL[5], negativeRougeL[4],negativeRougeL[3], negativeRougeL[2],negativeRougeL[1], negativeRougeL[0]))
    print(' ROUGE-L')
    positiveRougeL = rougeLResults['positive']
    print("                        positive   %d    %d       %d        %d        %d        %d       %d          %d"%(positiveRougeL[7], positiveRougeL[6],positiveRougeL[5], positiveRougeL[4],positiveRougeL[3], positiveRougeL[2],positiveRougeL[1], positiveRougeL[0]))
    print('------------------------------------------------------------------------------------------------------------')


def forTable11():
    lines = []
    allDatas = {}
    with open('../testDatas.txt','r', encoding='utf-8') as f:
        content = f.read()
        lines = json.loads(content)
    for line in lines:
        allDatas[line['input']] = line['label']

    msmlines = []
    texts = {}
    msmDir = 'result/2023/'
    msmFilenames = os.listdir(msmDir)
    for msmFilename in msmFilenames:
        msmFilePath = msmDir + msmFilename
        lines = readFile(msmFilePath)
        for line in lines:
            items = line.split('**************')
            words = items[1].split(' ')
            if len(words) <= 2:
                continue
            if int(items[3].replace('*', '')) > 62:
                texts[items[0] + '**********************' + items[1]] = float(items[4])

    msmDir = 'result/2019/'
    msmFilenames = os.listdir(msmDir)
    for msmFilename in msmFilenames:
        msmFilePath = msmDir + msmFilename
        lines = readFile(msmFilePath)
        for line in lines:
            items = line.split('**************')
            words = items[1].split(' ')
            if len(words) <= 2:
                continue
            if int(items[3].replace('*', '')) > 62:
                texts[items[0] + '**********************' + items[1]] = float(items[4])


    y = []
    pred = []
    for keyText in texts:
        if keyText not in allDatas:
            continue
        if allDatas[keyText] == 0:

            if texts[keyText] < 0.83:
                y.append(0)
                pred.append(0)
            else:
                print(keyText)
        else:
            y.append(1)
            if texts[keyText] >= 0.83:
                pred.append(1)
            else:
                pred.append(0)
    acc = accuracy_score(y,pred)
    recall = recall_score(y,pred)
    precision = precision_score(y, pred)
    print('TABLE 11: Performance Comparison of Memorization Analysis Methods')
    print('----------------------------------------------')
    print('Method        Precision    Recall    Accuracy')
    print('----------------------------------------------')
    print('Our Method    %.4f       %.4f    %.4f'%(precision, recall, acc))
    print('----------------------------------------------')
def getWebsiteCategoryResult(resultDir, threshold):
    filenames = os.listdir(resultDir)
    allDatas = {}
    allDatas['business'] = 0
    allDatas['education'] = 0
    allDatas['food'] = 0
    allDatas['movies'] = 0
    allDatas['music'] = 0
    allDatas['news'] = 0
    allDatas['fashion'] = 0
    allDatas['television'] = 0
    allDatas['video'] = 0

    for filename in filenames:
        lines = readfile(resultDir + filename)

        for line in lines:
            line = line.replace('\n', '')
            items = line.split('**************')
            if int(items[3].replace('*', '')) > 62 and float(items[4]) >= threshold:
                if threshold < 0.9:
                    print(filename + '********' + line)
                if 'sportsengine.com' in filename:
                    allDatas['business'] = allDatas['business'] + 1
                elif 'pennlive.com' in filename:
                    allDatas['education'] = allDatas['education'] + 1
                elif 'silive.com' in filename:
                    allDatas['education'] = allDatas['education'] + 1
                elif 'epicurious.com' in filename:
                    allDatas['food'] = allDatas['food'] + 1
                elif 'bonappetit.com' in filename:
                    allDatas['food'] = allDatas['food'] + 1
                elif 'gqindia.com' in filename:
                    allDatas['movies'] = allDatas['movies'] + 1
                elif 'pitchfork.com' in filename:
                    allDatas['music'] = allDatas['music'] + 1
                elif 'nj.com' in filename:
                    allDatas['news'] = allDatas['news'] + 1
                elif 'al.com' in filename:
                    allDatas['news'] = allDatas['news'] + 1
                elif 'vogue.co.uk' in filename:
                    allDatas['fashion'] = allDatas['fashion'] + 1
                elif 'gq-magazine.co.uk' in filename:
                    allDatas['fashion'] = allDatas['fashion'] + 1
                elif 'cleveland.com' in filename:
                    allDatas['television'] = allDatas['television'] + 1
                elif 'gulflive.com' in filename:
                    allDatas['video'] = allDatas['video'] + 1
                elif 'syracuse.com' in filename:
                    allDatas['video'] = allDatas['video'] + 1
    allDatas['total'] = allDatas['business'] + allDatas['education'] + allDatas['food'] + allDatas['movies'] + allDatas['music'] + allDatas['news'] + allDatas['fashion'] + allDatas['television'] + allDatas['video']

    return allDatas



def forTable8():
    allDatasGPT09 = getWebsiteCategoryResult('result/disallow_gpt_2019/',0.9)
    allDatasGPT83 = getWebsiteCategoryResult('result/disallow_gpt_2019/',0.83)
    allDatasLlama09 = getWebsiteCategoryResult('result/disallow_llama_2023/',0.9)
    allDatasLlama83 = getWebsiteCategoryResult('result/disallow_llama_2023/',0.83)
    allDatasGemma09 = getWebsiteCategoryResult('result/disallow_gemma_2023/',0.9)
    allDatasGemma83 = getWebsiteCategoryResult('result/disallow_gemma_2023/',0.83)
    print('TABLE 8: Comparison of ROUGE-L similarity scores across LLMs in disallowed categories')
    print('------------------------------------------------------------------------')
    print('                        GPT-2-XL(1.5B)   Llama-3.1-8B    Gemma-2-9B')
    print('Category                --------------   --------------  ---------------')
    print('                        >=0.83  >=0.90   >=0.83  >=0.90  >=0.83  >=0.90')
    print('------------------------------------------------------------------------')
    print('Business & Finance        %d       %d        %d       %d       %d       %d'%(allDatasGPT83['business'],allDatasGPT09['business'],allDatasLlama83['business'],allDatasLlama09['business'],allDatasGemma83['business'],allDatasGemma09['business']))
    print('Education                 %d       %d        %d       %d       %d       %d'%(allDatasGPT83['education'],allDatasGPT09['education'],allDatasLlama83['education'],allDatasLlama09['education'],allDatasGemma83['education'],allDatasGemma09['education']))
    print('Food & Drink              %d       %d        %d       %d       %d       %d'%(allDatasGPT83['food'],allDatasGPT09['food'],allDatasLlama83['food'],allDatasLlama09['food'],allDatasGemma83['food'],allDatasGemma09['food']))
    print('Movies                    %d       %d        %d       %d       %d       %d'%(allDatasGPT83['movies'],allDatasGPT09['movies'],allDatasLlama83['movies'],allDatasLlama09['movies'],allDatasGemma83['movies'],allDatasGemma09['movies']))
    print('Music and Audio           %d       %d        %d       %d       %d       %d'%(allDatasGPT83['music'],allDatasGPT09['music'],allDatasLlama83['music'],allDatasLlama09['music'],allDatasGemma83['music'],allDatasGemma09['music']))
    print('News and Politics         %d       %d        %d       %d       %d       %d'%(allDatasGPT83['news'],allDatasGPT09['news'],allDatasLlama83['news'],allDatasLlama09['news'],allDatasGemma83['news'],allDatasGemma09['news']))
    print('Style & Fashion           %d       %d        %d       %d       %d       %d'%(allDatasGPT83['fashion'],allDatasGPT09['fashion'],allDatasLlama83['fashion'],allDatasLlama09['fashion'],allDatasGemma83['fashion'],allDatasGemma09['fashion']))
    print('Television                %d       %d        %d       %d       %d       %d'%(allDatasGPT83['television'],allDatasGPT09['television'],allDatasLlama83['television'],allDatasLlama09['television'],allDatasGemma83['television'],allDatasGemma09['television']))
    print('Video Gaming              %d       %d        %d       %d       %d       %d'%(allDatasGPT83['video'],allDatasGPT09['video'],allDatasLlama83['video'],allDatasLlama09['video'],allDatasGemma83['video'],allDatasGemma09['video']))
    print('------------------------------------------------------------------------')
    print('Overall                   %d      %d        %d      %d       %d      %d'%(allDatasGPT83['total'],allDatasGPT09['total'],allDatasLlama83['total'],allDatasLlama09['total'],allDatasGemma83['total'],allDatasGemma09['total']))
    print('------------------------------------------------------------------------')



def getSmallForGPT2019():
    dir = 'result/disallow_gpt_2019/'
    names = os.listdir(dir)
    for name in names:
        lines = readfile(dir + name)

        for line in lines:
            line = line.replace('\n', '')
            items = line.split('**************')
            if int(items[3].replace('*', '')) <= 62:
                print(name + '********' + line)
                items = name.split('_')
                webname = items[4]
                dirPath = '../finalDataForGPT/' + webname + '/'
                filenames = os.listdir(dirPath)
                items = line.split('**************')
                for f in filenames:
                    lines = readfile(dirPath + f)
                    if items[0] in lines[1] and items[1] in lines[2]:
                        with open('../finalDataForSmall/small62/gpt/' + webname + '_' + f, 'w', encoding='utf-8') as f:
                            f.writelines(lines)

def rewriteResult():
    datas = {}
    lines = readFile('../finalDataForSmall/ccnews_gpt_xl_result_small.txt')
    for line in lines:
        items = line.split('**************')
        keyText = items[0]+'**************'+items[1]
        if int(items[3]) >= 62:
            datas[keyText] = keyText+'**************'+items[2]+'**************'+items[3]+'**************'+items[4]+'\n'
    dirPath = 'result/disallow_gpt_2019/'
    fileNames = os.listdir(dirPath)
    for fileName in fileNames:
        newlines = []
        filePath = dirPath+fileName
        lines = readFile(filePath)
        for line in lines:
            items = line.split('**************')
            keyText = items[0]+'**************'+items[1]
            if keyText in datas:
                newlines.append(datas[keyText])
                datas.pop(keyText)
            else:
                newlines.append(line)
        with open(filePath,'w',encoding='utf-8') as f:
            for line in newlines:
                f.write(line)


#getSmallForGPT2019()
#rewriteResult()
forTable8()
forTable10()
forTable11()


