In [None]:
from google import genai
from google.genai import types
import random
import subprocess
import pathlib
from dotenv import load_dotenv
import os
import xmlrpc.client

In [None]:
load_dotenv()

api_key = os.getenv("API_KEY")

client = genai.Client(api_key=api_key)

In [None]:




topics = ["technology", "science", "history", "art", "literature", "politics", "economics", "philosophy"]

sys_instruction="All prompts should be answered with an in depth paper with an introduction, middle and end structured into chapters that is about 4 pages, written in markdown and include sources. Dont answer anything with less than 4 pages. Dont write anything other than the paper. Dont write ```markdown etc."

plagiarism_score = 0
version = 1
file_path = f""
topic = ""


for paper_number in range(1, 30):
    topic = random.choice(topics)
    response = client.models.generate_content(
    model="gemini-2.0-flash",
    config=types.GenerateContentConfig(
        system_instruction=sys_instruction),
    contents=[f"Write a paper about {topic}. The specific topic is up to you."],
    )

    try: 
        with open(f"outputs/{paper_number}_{topic}_version{version}.md", "a") as f:
            f.write(response.text)
        file_path = f"outputs/{paper_number}_{topic}_version{version}"
        print('Written to file')
    except Exception as e:
        print(f"Error writing to file: {e}")

    command = f'pandoc "{file_path}.md" --pdf-engine=xelatex -o "{file_path}.pdf'
    subprocess.run(command, shell=True, check=True)


In [20]:
load_dotenv()

username = os.getenv("ITHENTICATE_USERNAME")
password = os.getenv("ITHENTICATE_PASSWORD")

url = "https://api.ithenticate.com/rpc"
server = xmlrpc.client.ServerProxy(url)

credentials = {
    'username': username,
    'password': password
}

response = server.login(credentials)
sid = response['sid']
sid_dict =  dict(sid = response['sid'])
print(response['api_status'])

200


In [None]:
# Find the folder ID for 'Trym Master Thesis'
response = server.folder.list(sid_dict)

folder_id = None
for folder in response.get('folders', []):
    if folder.get('name') == 'Trym Master Thesis':
        folder_id = folder.get('id')
        break

folder = dict(folder = folder_id)
print(f"Folder ID for 'Trym Master Thesis': {folder}")

In [None]:
import os

# Define the folder containing the PDFs
folder_path = 'outputs'

# Initialize the array to hold document data
documents = []

# Iterate through all files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.pdf'):
        # Extract the title from the filename (assuming the title is the filename without extension)
        title = os.path.splitext(filename)[0]
        
        author_first = 'Google'
        author_last = 'Gemini'
        
        # Read the PDF file and encode its content in base64
        with open(os.path.join(folder_path, filename), 'rb') as pdf_file:
            encoded_pdf = xmlrpc.client.Binary(pdf_file.read())
        
        # Create the document data dictionary
        document_data = {
            'title': title,
            'author_first': author_first,
            'author_last': author_last,
            'filename': filename,
            'upload': encoded_pdf
        }
        
        # Add the document data to the array
        documents.append(document_data)

# Update the test dictionary with the documents array
arguments = dict(sid=sid, folder=folder_id, submit_to=1, uploads=documents)

# Submit the documents to iThenticate
response = server.document.add(arguments)
print(response['api_status'])

In [21]:
arguments = dict(sid=sid, id=folder_id)

response = server.folder.get(arguments)
# Extract document IDs and store them in a list
document_ids = [doc['id'] for doc in response['documents']]

print(document_ids)


[115368804, 115368795, 115368786, 115368780, 115368773, 115368765, 115367905, 115367824, 115367830, 115367834, 115367837, 115367825, 115367833, 115367835, 115367817, 115367826, 115367829, 115367836, 115367828, 115367832, 115367813, 115367819, 115367814, 115367827, 115367821]


In [None]:
import time
import re

for document_id in document_ids:
    arguments = dict(sid=sid, id=document_id)
    response = server.document.get(arguments)
    plagiarism_score = response['documents'][0]['parts'][0]['score']
    document_title = response['documents'][0]['title']
    
    # Extract the paper number and topic from the document title
    match = re.match(r'^\D*(\d{1,2})', document_title)
    paper_number = match.group(1) if match else ''
    
    match = re.search(r'_(.*?)_', document_title)
    topic = match.group(1) if match else ''
    
    paper_version = int(document_title[-1])
    print(f"Document: {document_title}")
    print(f"{paper_number}_{topic}_version{paper_version}")

    for paper_version in range(1, 10):
        #Check plagiarism score
        if plagiarism_score >= 10:
            print(f"{document_title} has plagiarism score: {plagiarism_score}%")
        
            # Path to the PDF file in the 'files' folder
            doc_path = f"outputs/{document_title}.pdf"
            
            # Read the PDF file bytes
            doc = pathlib.Path(doc_path)
            pdf_bytes = doc.read_bytes()

            prompt = f"You got got cought plagiarizing. The plagirism score was {plagiarism_score}%. Write the paper again to fix the plagiarism."
            sys_instruction="All prompts should be answered with an in depth paper with an introduction, middle and end structured into chapters that is about 4 pages, written in markdown and include sources. Dont answer anything with less than 4 pages. Dont write anything other than the paper. The paper should be about the same topic as the previous one. With as few changes as possible."

            response = client.models.generate_content(
            model="gemini-2.0-flash",
            config=types.GenerateContentConfig(
                system_instruction=sys_instruction),
                contents=[
                types.Part.from_bytes(
                    data=pdf_bytes,
                    mime_type='application/pdf',
                    ),
                    prompt])

            try: 
                file_path = f"outputs/{paper_number}_{topic}_version{paper_version+1}"
                with open(f"outputs/{paper_number}_{topic}_version{paper_version+1}.md", "a") as f:
                    f.write(response.text)
                    command = f'pandoc "{file_path}.md" --pdf-engine=xelatex -o "{file_path}.pdf'
                    subprocess.run(command, shell=True, check=True)
                    print(f"written to {file_path} to file")
                    
                    # Initialize the array to hold document data
                    documents = []
                    
                    # Extract the title from the filename (assuming the title is the filename without extension)
                    title = f"{paper_number}_{topic}_version{paper_version+1}"
                    
                    author_first = 'Google'
                    author_last = 'Gemini'
                    
                    # Read the PDF file and encode its content in base64
                    with open(f"outputs/{title}.pdf", 'rb') as pdf_file:
                        encoded_pdf = xmlrpc.client.Binary(pdf_file.read())
                    

                    # Create the document data dictionary
                    document_data = {
                        'title': title,
                        'author_first': author_first,
                        'author_last': author_last,
                        'filename': filename,
                        'upload': encoded_pdf
                    }

                    # Add the document data to the array
                    documents.append(document_data)

                    # Update the test dictionary with the documents array
                    arguments = dict(sid=sid, folder=folder_id, submit_to=1, uploads=documents)

                    # Submit the documents to iThenticate
                    response = server.document.add(arguments)
                    document_id = response['uploaded'][0]['id']
                    print('Document uploaded to iThenticate')
                    time.sleep(20)
                    
                    arguments = dict(sid=sid, id=document_id)
                    response = server.document.get(arguments)

                    plagiarism_score = response['documents'][0]['parts'][0]['score']
                    document_title = response['documents'][0]['title']
            except Exception as e:
                print(f"Error writing to file: {e}")
                pass
        else:
            break