In [1]:
import requests
from bs4 import BeautifulSoup as Bs
import random
import os
# from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
from urllib.parse import urlparse, parse_qs
import re
from faker import Faker
import pandas as pd
import ast

from seleniumwire import webdriver  # Import Selenium Wire

import openreview

from PyPDF2 import PdfReader, PdfWriter
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from reportlab.lib.colors import Color, red, blue, green  # Import colors
import io
from io import BytesIO

from huggingface_hub import InferenceApi
from openai import OpenAI

from google import genai

# Extract Review

In [2]:
def extract_output_text(file_path):
    important_text = []
    with open(file_path, 'r') as file:
        start_extracting = False
        for line in file:
            if start_extracting:
                important_text.append(line.strip())  # Collect lines after "OUTPUT:"
            elif "OUTPUT:" in line:
                start_extracting = True  # Start collecting from the next line
    return "\n".join(important_text)

# Paraphrase Review

In [3]:
def paraphrase_review(review, CHATGPT, GEMINI, prompt, client, client_gemini):
    if CHATGPT:
        res = paraphrase_review_chatgpt(client, prompt, review)
    else:
        res = paraphrase_review_gemini(client_gemini, prompt, review)
        
    return res

### ChatGPT

In [4]:
def paraphrase_review_chatgpt(client, prompt, review):
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": "You are an assistant that paraphrases reviews of papers."},
            {"role": "user", "content": prompt + review}
        ]
    )
    return response.choices[0].message.content.strip()

### Gemini

In [5]:
def paraphrase_review_gemini(client_gemini, prompt, review):
    response = client_gemini.models.generate_content(model='gemini-exp-1206', contents=prompt + review)
    
    return response.text

# Evaluate Presence of Watermark

In [6]:
def watermark_present(review, watermark, tolerance=500):
    
    # Limit the search area to the first `tolerance` characters
    search_area = review[:tolerance + len(watermark)]
    
    # Check if the target string is in the search area
    return watermark in search_area

In [7]:
def watermark_present_tech_term(review, watermark):
    return watermark in review

In [8]:
def watermark_present_fake_ref(review, watermark):
    return watermark in review

# Run

In [9]:
num_cases = 0
num_correct = 0
stats = {}

In [None]:
API_TOKEN = ""
MODEL = "mistralai/Mistral-Nemo-Instruct-2407" # works in free tier, supports large context, included banana but unsubtly. with second banana text it didn't include.

openai_api_key = ''
client = OpenAI(
    api_key=openai_api_key,
)

gemini_api_key = ""
client_gemini = genai.Client(api_key=gemini_api_key)


fetch_dir = ""
file_write_dir = "" #pfref4o means paraphrase fake ref 4o
file_to_write = file_write_dir + '/.txt'
file_to_write_acc = file_write_dir + '/acc.txt'
file_to_write_stats = file_write_dir + '/stats.txt'

CHATGPT = True # True if using gpt-4o-mini / gpt-4o, False if using one of the below open source models
GEMINI = False
WAIT = 5

prompt = 'Below is a review of a paper from a scientific conference. Paraphrase the review.\n\n'


In [11]:
# Specify the directory path
path2 = file_write_dir

if not os.path.exists(path2):
    os.makedirs(path2)

In [12]:
with open(fetch_dir + '/stats.txt', "r") as file:
    stats_get = file.read()
stats_get = ast.literal_eval(stats_get[7:])

id_values = stats_get.keys()

j=0
for id_value in id_values:
    try:
        if stats_get[id_value]['correct'] == 1:
            review = extract_output_text(fetch_dir + '/' + id_value + '.txt')
            start = stats_get[id_value]['wm']
            res = paraphrase_review(review, CHATGPT, GEMINI, prompt, client, client_gemini)

            stats[id_value] = {}
            stats[id_value]['pdf_length'] = stats_get[id_value]['pdf_length']

            file_to_write_id = file_to_write[:-4] + id_value + file_to_write[-4:]

            with open(file_to_write_id, "a") as file:
                file.write("PROMPT: "+prompt)
                file.write("\nWATERMARK: "+start)
                file.write("\nPaper ID: "+id_value)
                file.write("\nPARAPHRASED OUTPUT:\n")
                file.write(res+"\n\n\n")

            if watermark_present(res, start):
                stats[id_value]['correct'] = 1
                num_correct += 1
            else:
                stats[id_value]['correct'] = 0
            num_cases += 1

            stats[id_value]['wm'] = start

            with open(file_to_write_acc, "w") as file:
                file.write('NumCorrect: '+str(num_correct))
                file.write('\nNumCases: '+str(num_cases))

            with open(file_to_write_stats, "w") as file:
                file.write('Stats: '+str(stats))

            if j%WAIT == 0:
                time.sleep(20)

            j+=1
    except:
        pass