In [None]:
from google import genai
from google.genai import types
import random
import subprocess
import pathlib
from dotenv import load_dotenv
import os

load_dotenv()

api_key = os.getenv("API_KEY")

client = genai.Client(api_key=api_key)

topics = ["technology", "science", "history", "art", "literature", "politics", "economics", "philosophy"]

sys_instruction="All prompts should be answered with an in depth paper with an introduction, middle and end structured into chapters that is about 4 pages, written in markdown and include sources. Dont answer anything with less than 4 pages. Dont write anything other than the paper."

plagiarism_score = 0
version = 1
file_path = f""
topic = ""


for paper_number in range(1, 2):
    topic = random.choice(topics)
    response = client.models.generate_content(
    model="gemini-2.0-flash",
    config=types.GenerateContentConfig(
        system_instruction=sys_instruction),
    contents=[f"Write a paper about {topic}. The specific topic is up to you."],
    )

    try: 
        with open(f"outputs/{paper_number}_{topic}_version{version}.md", "a") as f:
            f.write(response.text)
        file_path = f"outputs/{paper_number}_{topic}_version{version}"
        print('Written to file')
    except:
        print("Error writing to file")

    command = f'pandoc "{file_path}.md" --pdf-engine=xelatex -o "{file_path}.pdf'
    subprocess.run(command, shell=True, check=True)


In [None]:
import xmlrpc.client

load_dotenv()

username = os.getenv("ITHENTICATE_USERNAME")
password = os.getenv("ITHENTICATE_PASSWORD")

url = "https://api.ithenticate.com/rpc"
server = xmlrpc.client.ServerProxy(url)

credentials = {
    'username': username,
    'password': password
}

response = server.login(credentials)
sid = response['sid']
sid_dict =  dict(sid = response['sid'])




In [None]:
response = server.folder.list(sid_dict)


folder_id = None
for folder in response.get('folders', []):
    if folder.get('name') == 'Trym Master Thesis':
        folder_id = folder.get('id')
        break

folder = dict(folder = folder_id)
print(f"Folder ID for 'Trym Master Thesis': {folder}")

In [None]:
import os
import base64

# Define the folder containing the PDFs
folder_path = 'outputs'

# Initialize the array to hold document data
documents = []

# Iterate through all files in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.pdf'):
        # Extract the title from the filename (assuming the title is the filename without extension)
        title = os.path.splitext(filename)[0]
        
        # Placeholder for author information (you may need to extract this from the PDF metadata or another source)
        author_first = 'Google'
        author_last = 'Gemini'
        
        # Read the PDF file and encode its content in base64
        with open(os.path.join(folder_path, filename), 'rb') as pdf_file:
            encoded_pdf = xmlrpc.client.Binary(pdf_file.read())
        
        # Create the document data dictionary
        document_data = {
            'title': title,
            'author_first': author_first,
            'author_last': author_last,
            'filename': filename,
            'upload': encoded_pdf
        }
        
        # Add the document data to the array
        documents.append(document_data)

# Update the test dictionary with the documents array
arguments = dict(sid=sid, folder=folder_id, submit_to=1, uploads=documents)

# Print the documents array for verification
print(documents)
print(arguments)

# Submit the documents to iThenticate
response = server.document.add(arguments)
print(response)

In [None]:
arguments = dict(sid=sid, id=115322054)

response = server.document.get(arguments)
print(response)
score = response['documents'][0]['parts'][0]['score']
print(score)

In [None]:
from random import choices
population = [0, 20, 25, 40]
weights = [0.25, 0.25, 0.25, 0.25]


for paper_version in range(1, 10):
    #Check plagiarism score
    plagiarism_score = choices(population, weights)[0]
    print(f"Plagiarism score: {plagiarism_score}%")
    if plagiarism_score > 15:
        # Path to the PDF file in the 'files' folder
        doc_path = f"{file_path}.pdf"
        
        # Read the PDF file bytes
        doc = pathlib.Path(doc_path)
        pdf_bytes = doc.read_bytes()

        prompt = f"You got got cought plagiarizing. The plagirism score was {plagiarism_score}%. Write the paper again to fix the plagiarism."
        sys_instruction="All prompts should be answered with an in depth paper with an introduction, middle and end structured into chapters that is about 4 pages, written in markdown and include sources. Dont answer anything with less than 4 pages. Dont write anything other than the paper. The paper should be about the same topic as the previous one. With as few changes as possible."

        response = client.models.generate_content(
        model="gemini-2.0-flash",
        config=types.GenerateContentConfig(
            system_instruction=sys_instruction),
            contents=[
            types.Part.from_bytes(
                data=pdf_bytes,
                mime_type='application/pdf',
                ),
                prompt])

        try: 
            file_path = f"outputs/{paper_number}_{topic}_version{paper_version+1}"
            with open(f"outputs/{paper_number}_{topic}_version{paper_version+1}.md", "a") as f:
                f.write(response.text)
                command = f'pandoc "{file_path}.md" --pdf-engine=xelatex -o "{file_path}.pdf'
                subprocess.run(command, shell=True, check=True)
        except:
            pass
    else:
        break