## Goal
We want to explore how better is the performance of the system of generating a draft with a multiple step process rather than a single step process

### Initial Variables

In [4]:
import sys 
import os
import json_tricks as json
import time
from dotenv import load_dotenv
load_dotenv('../../backend/.env')
sys.path.append("../../")
from backend.LLM.AnythingLLM_client import AnythingLLMClient
from backend.Generater import Generater
from backend.LLM.OllamaLLM import OllamaAI
from backend.database.schemas import Email


anyllm_client = AnythingLLMClient("http://localhost:3001/api", "3WMNAPZ-GYH4RBE-M67SR00-7Y7KYEF")
ollama_client = OllamaAI('http://localhost:11434', 'llama3:instruct')
generator = Generater(ollama_client, anyllm_client)
path_to_enquiry = "../../dataset/fake_email_1.json"
with open(path_to_enquiry, "r") as f:
    enquiry = json.load(f)

enquiry_text = enquiry["email"]
subject = enquiry_text.split("\n\n")[0]
body = '\n'.join(enquiry_text.split("\n\n")[1:])

Performing health check on http://localhost:11434
Ollama is running
Health check passed


### Single Step Process

In [2]:

def single_shot_prompt(email: Email):
    prompt = f""" You are David Frischer, the program administrator for the UCL Software Engineering program.
            You have received the following enquiry from a student:
            Subject: {email.subject}
            Body: {email.body}
            
           
            Draft a response to the student's email.
            """
    workspace_slug =  anyllm_client.get_workspace_slug("General")
    thread = anyllm_client.new_thread(workspace_slug, "tmp-thread")
    thread_slug = thread["slug"]
    start_time = time.time()
    response = anyllm_client.chat_with_thread(workspace_slug, thread_slug, prompt)
    total_time = time.time() - start_time
    anyllm_client.delete_thread(workspace_slug, thread_slug)
    return response, total_time

### Response format

## Mutiple Step Process

In [3]:
def multi_step_process(email:Email):
    start_time = time.time()
    response = generator.single_run_reply_to_email(email)
    response_time = time.time() - start_time
    
    return response, response_time
    

## Generate for multiple drafts

In [4]:

path_to_enquiries = "../../dataset"

counter = 0
results_folder = "./results"
os.makedirs(results_folder, exist_ok=True)

already_processed = set()
for results_file in os.listdir(results_folder):
    if results_file.endswith(".json"):
        already_processed.add(results_file.replace("results_", ""))
        
for enquiry_path in os.listdir(path_to_enquiries):
    if enquiry_path in already_processed:
        print(f"Already processed {enquiry_path}")
        continue
    if "fake" not in enquiry_path and not enquiry_path.endswith(".json"):
        print(f"Skipping {enquiry_path}")
        continue
      
    filepath = os.path.join(results_folder, f"results_{enquiry_path}")
    results = {}
    try:
        if not enquiry_path.endswith(".json") or not "fake" in enquiry_path:
            continue
        print(f"Processing {enquiry_path}")
        with open(os.path.join(path_to_enquiries, enquiry_path), "r") as f:
            enquiry = json.load(f)
            
        email = enquiry["email"]
        if "subject:" in email.lower():
            subject = email.split("\n")[0].replace("Subject:", "").strip()
            body = "\n".join(email.split("\n")[1:])
        else:
            subject = "Subject not found"
            body = email
        
        email = Email(subject=subject, body=body, id=counter)
        counter += 1
        
        response_single, response_time_single = single_shot_prompt(email)
        response_multi, response_time_multi = multi_step_process(email)
        
        results = {"single_shot": {"response": response_single, "response_time": response_time_single},
                    "multi_step": {"response": response_multi, "response_time": response_time_multi}}

        with open(filepath, "w") as f:
            json.dump(results, f)
            
    except Exception as e:
        print(f"Error processing {enquiry_path}")
        print(e)
        continue    

print("Done")


Already processed fake_email_10.json
Already processed fake_email_undergrad_13.json
Already processed fake_email_undergrad_6.json
Already processed fake_email_3.json
Already processed fake_email_2.json
Already processed fake_email_undergrad_7.json
Already processed fake_email_undergrad_12.json
Already processed fake_email_11.json
Already processed fake_email_undergrad_0.json
Already processed fake_email_undergrad_19.json
Already processed fake_email_5.json
Already processed fake_email_16.json
Already processed fake_email_undergrad_15.json
Already processed fake_email_9.json
Already processed fake_email_8.json
Already processed fake_email_undergrad_14.json
Already processed fake_email_17.json
Skipping __pycache__
Already processed fake_email_4.json
Already processed fake_email_undergrad_18.json
Already processed fake_email_undergrad_1.json
Already processed fake_email_7.json
Skipping bank_questions.py
Already processed fake_email_undergrad_2.json
Already processed fake_email_18.json
Alr

[ERROR] Expecting value: line 1 column 1 (char 0)
[ERROR] Expecting value: line 1 column 1 (char 0)


Creating new thread with name: new_thread in workspace: general
Total Sim Distance: 0.3058445304632187
Total Sim Distance: 0.3207991123199463
['Question: As a prospective graduate student, can I apply for multiple programs simultaneously? If yes, are there any specific procedures or requirements I need to follow?\nAnswer: Based on the provided context from UCL\'s website, the answer to your question is:\n\nYes, as a prospective graduate student, you may submit applications for a maximum of two taught graduate programmes for entry to the 2024/25 academic year. The applications must be for different programs. Please note that if an application fee is charged, it will be charged per application.\n\nThis information can be found in the "Applications" section of UCL\'s Frequently Asked Questions (FAQs) for prospective students applying to graduate taught and research programmes. \n-------\n', "Question: I am interested in gaining accreditation for prior learning as part of my MSc applicatio

[ERROR] Expecting value: line 1 column 1 (char 0)
[ERROR] Expecting value: line 1 column 1 (char 0)


Processing fake_email_undergrad_11.json
Creating new thread with name: tmp-thread in workspace: general


[ERROR] Expecting value: line 1 column 1 (char 0)
[ERROR] Expecting value: line 1 column 1 (char 0)


Creating new thread with name: new_thread in workspace: general
Total Sim Distance: 0.3441285043954849
Total Sim Distance: 0.34534816443920135
Total Sim Distance: 0.3344442844390869
["Question: Could you please let me know if this would be considered during the evaluation of my application?\nAnswer: Based on the provided contexts, I'd be happy to help answer your question.\n\nAs a prospective student applying for an undergraduate program in Computer Science at University College London (UCL), you're wondering if being a mature student looking to make a career change would be considered during the evaluation of your application.\n\nAccording to Context 0, Undergraduate Admissions mentions that they have a FAQ section on their website. If you cannot find the information you need or have questions regarding the entry requirements for a programme, please contact Undergraduate Admissions.\n\nIn this case, I'd recommend contacting the Undergraduate Admissions team directly to inquire about h

[ERROR] Expecting value: line 1 column 1 (char 0)
[ERROR] Expecting value: line 1 column 1 (char 0)


Processing fake_email_undergrad_8.json
Creating new thread with name: tmp-thread in workspace: general


[ERROR] Expecting value: line 1 column 1 (char 0)
[ERROR] Expecting value: line 1 column 1 (char 0)


Creating new thread with name: new_thread in workspace: general
Total Sim Distance: 0.3353770971298218
Total Sim Distance: 0.30507592856884
Total Sim Distance: 0.34347236156463623
["Question: Could you please provide me with an explanation of what this means? Is it still being processed or are there any additional documents required from me?\nAnswer: Dear Lewis,\n\nThank you for your inquiry. I'm happy to help clarify the status of your application.\n\nA 'gathered field' is a way we assess applications at UCL, especially when we receive a large number of strong candidates. It means that all applications are being evaluated in the same fair and consistent manner, regardless of the submission date (October or January UCAS deadline). This approach ensures that we can make an informed decision once we have enough comparable applications.\n\nIn your case, it simply means that we need to wait until we've received and processed all other applications before making a final decision on yours. Y

[ERROR] Expecting value: line 1 column 1 (char 0)
[ERROR] Expecting value: line 1 column 1 (char 0)


Processing fake_email_12.json
Creating new thread with name: tmp-thread in workspace: general


[ERROR] Expecting value: line 1 column 1 (char 0)
[ERROR] Expecting value: line 1 column 1 (char 0)


Creating new thread with name: new_thread in workspace: general
Total Sim Distance: 0.318282887339592
Total Sim Distance: 0.31011562049388885
Total Sim Distance: 0.32173942029476166
["Question: As per your offer letter, I have accepted my place on the programme, but unfortunately, due to unforeseen circumstances, I may no longer be able to attend.\nAnswer: Based on the provided contexts from UCL's website, here is a potential response to the student's email:\n\nDear [Student],\n\nThank you for your email and for accepting your place on our programme. We understand that unforeseen circumstances can arise, and we want to support you through this process.\n\nIf you are no longer able to attend the programme, please reach out to us as soon as possible so that we can discuss the next steps. You will need to contact UCL Admissions to formally withdraw your application.\n\nPlease note that if you have already paid a tuition fee deposit, it is unlikely that you would be eligible for a full or 

[ERROR] Expecting value: line 1 column 1 (char 0)
[ERROR] Expecting value: line 1 column 1 (char 0)


Processing fake_email_1.json
Creating new thread with name: tmp-thread in workspace: general


[ERROR] Expecting value: line 1 column 1 (char 0)
[ERROR] Expecting value: line 1 column 1 (char 0)


Creating new thread with name: new_thread in workspace: general
Total Sim Distance: 0.3133559674024582
Total Sim Distance: 0.34982986748218536
Total Sim Distance: 0.33583691716194153
['Question: As an international student, do I need to take a recognized English language proficiency test (e.g., IELTS or TOEFL) prior to applying for the MSc program in Software Engineering? If so, are there any specific scores required?\nAnswer: Based on the provided context from UCL\'s prospective students graduate frequently asked questions page, you do not need to take a recognized English language proficiency test (e.g., IELTS or TOEFL) prior to applying for the MSc program in Software Engineering.\n\nAccording to the information, "The English language requirements for a programme are stated on the programme’s prospectus page. You may be required to meet one of UCL’s five levels of English language proficiency, or obtain specific scores in a UCL-recognised test."\n\nIt is recommended that you check t

[ERROR] Expecting value: line 1 column 1 (char 0)
[ERROR] Expecting value: line 1 column 1 (char 0)


Processing fake_email_undergrad_4.json
Creating new thread with name: tmp-thread in workspace: general


[ERROR] Expecting value: line 1 column 1 (char 0)
[ERROR] Expecting value: line 1 column 1 (char 0)


Creating new thread with name: new_thread in workspace: general
Total Sim Distance: 0.3757656663656235
Total Sim Distance: 0.36498455703258514
["Question: Could you please guide me on the application process for transfer students? What are the specific requirements and deadlines that I need to be aware of? Are there any additional documents or information that I should provide to support my application?\nAnswer: Thank you for your interest in transferring to University College London (UCL) for the undergraduate program in Computer Science. As a current student studying computer science, you have already met the entry requirements for the program and possess a strong academic background.\n\nTo apply as a transfer student, please follow these steps:\n\n1. Check the eligibility: Ensure that you meet UCL's transfer student criteria, which includes having completed at least one year of undergraduate study at your current institution.\n2. Research programs: Review UCL's Computer Science prog

[ERROR] Expecting value: line 1 column 1 (char 0)
[ERROR] Expecting value: line 1 column 1 (char 0)


Skipping urls.py
Processing fake_email_undergrad_5.json
Creating new thread with name: tmp-thread in workspace: general


[ERROR] Expecting value: line 1 column 1 (char 0)
[ERROR] Expecting value: line 1 column 1 (char 0)


Creating new thread with name: new_thread in workspace: general
Total Sim Distance: 0.36568671464920044
Total Sim Distance: 0.3324035257101059
Total Sim Distance: 0.3824188560247421
["Question: Could you please clarify what this means? Is it simply a matter of awaiting further review or are there additional steps that need to be completed before my application can proceed?\nAnswer: Based on the provided contexts, I'd say that UCL will determine your fee status in line with regulations published by the UK government. You can find more information about fee status determination on their website.\n\nIn response to your question, it seems that you're asking for clarification on what happens next after submitting your application. According to Context 2, if you receive an offer of admission to UCL and believe that your fee status has been incorrectly assessed, you can request a review of the assessment.\n\nFor now, it appears that your application is being processed, and you'll need to awai

[ERROR] Expecting value: line 1 column 1 (char 0)
[ERROR] Expecting value: line 1 column 1 (char 0)


Processing fake_email_0.json
Creating new thread with name: tmp-thread in workspace: general


[ERROR] Expecting value: line 1 column 1 (char 0)
[ERROR] Expecting value: line 1 column 1 (char 0)


Creating new thread with name: new_thread in workspace: general
Total Sim Distance: 0.337510347366333
Total Sim Distance: 0.3457774370908737
Total Sim Distance: 0.34567077457904816
Total Sim Distance: 0.34411947429180145
["Question: Are there any alternative arrangements that can be made in this situation? Are there any specific instructions or forms that I need to complete to facilitate the official transcript being sent directly from my university to UCL?\nAnswer: According to Context 1, you can upload a word document to your application confirming that you will submit your application through your University's transcript service. Once you have submitted your application, please have your University's transcript service send your transcript to transcripts.pgadmissions@ucl.ac.uk.\n\nSo, in this situation, it seems that there are alternative arrangements that can be made. You just need to confirm the arrangement by uploading a word document to your application and then instruct your un

[ERROR] Expecting value: line 1 column 1 (char 0)
[ERROR] Expecting value: line 1 column 1 (char 0)


Skipping generated_email_1.txt
Processing fake_email_13.json
Creating new thread with name: tmp-thread in workspace: general


[ERROR] Expecting value: line 1 column 1 (char 0)
[ERROR] Expecting value: line 1 column 1 (char 0)


Creating new thread with name: new_thread in workspace: general
Total Sim Distance: 0.33256568014621735
["Question: Could you please let me know why my application fee payment is not going through? Are there any specific requirements or restrictions that I should be aware of when making the payment?\nAnswer: Dear John,\n\nThank you for reaching out to us about the issue with your application fee payment. We apologize for the inconvenience this has caused and are here to help.\n\nThere can be a number of reasons why your payment is not going through, including:\n\n* The payment system does not accept special characters, please ensure that you only use anglicised characters when making the payment.\n* The payment may have been blocked by your card issuer/bank. You should contact them to inform them that you are making an overseas payment.\n\nTo resolve this issue, we recommend trying again using a different browser or device. Additionally, you can access Flywire's FAQs, useful videos and

[ERROR] Expecting value: line 1 column 1 (char 0)
[ERROR] Expecting value: line 1 column 1 (char 0)


Processing fake_email_undergrad_9.json
Creating new thread with name: tmp-thread in workspace: general


[ERROR] Expecting value: line 1 column 1 (char 0)
[ERROR] Expecting value: line 1 column 1 (char 0)


Creating new thread with name: new_thread in workspace: general
Total Sim Distance: 0.36567655205726624
Total Sim Distance: 0.3674103617668152
['Question: Could you please provide me with information on how to apply for transfer? I understand that there may be specific requirements and procedures to follow. I would appreciate any guidance on what I need to do next.\nAnswer: Dear Lewis,\n\nThank you for your interest in transferring to University College London (UCL) to pursue an undergraduate degree in Computer Science. We appreciate your enthusiasm for studying at UCL, and we\'re happy to guide you through the transfer application process.\n\nTo apply for transfer, please follow these steps:\n\n1. Check the entry requirements: Review the entry requirements for the Computer Science program at UCL to ensure that you meet the minimum academic qualifications. You can find this information on our website or by contacting the Undergraduate Admissions team.\n2. Gather required documents: Pre

[ERROR] Expecting value: line 1 column 1 (char 0)
[ERROR] Expecting value: line 1 column 1 (char 0)


Processing fake_email_undergrad_10.json
Creating new thread with name: tmp-thread in workspace: general


[ERROR] Expecting value: line 1 column 1 (char 0)
[ERROR] Expecting value: line 1 column 1 (char 0)


Creating new thread with name: new_thread in workspace: general
Total Sim Distance: 0.31222349405288696
Total Sim Distance: 0.3378884643316269
Total Sim Distance: 0.3311948627233505
['Question: Can I still apply for an undergraduate programme at UCL? If so, would it be possible to receive a conditional offer based on my pending exam results?\nAnswer: Based on the provided contexts from University College London (UCL), here\'s an answer to your question:\n\nAccording to Context 3, which is related to undergraduate programmes, it states that "Resits are generally considered for most of our programmes. Any programmes that do not consider resits will outline this on their department webpages." Since you\'re interested in applying for the Computer Science programme at UCL, and you\'re an adult learner returning to education, it\'s likely that your case would be considered.\n\nRegarding your question about receiving a conditional offer based on your pending exam results, Context 2 provides s

[ERROR] Expecting value: line 1 column 1 (char 0)
[ERROR] Expecting value: line 1 column 1 (char 0)


Done


In [13]:
import json as simplejson
from random import randint

def results_into_markodown():
    results_folder = "./results"
    mardown_folder = "./markdown"
    os.makedirs(mardown_folder, exist_ok=True)
    markdown = ""
    answer = {}
    for results_file in os.listdir(results_folder):
        if results_file.endswith(".json"):
            with open(os.path.join(results_folder, results_file), "r") as f:
                results = simplejson.load(f)
                
            random_number = randint(1, 2)
            single_shot_response = results['single_shot']['response']['textResponse']
            multi_step_response = results['multi_step']['response']['generated_draft_email']
            filename = results_file.replace("results_", "").replace("json", "md")
            markdown = f"# {filename}\n\n"
            if random_number == 1:
                markdown += f"## A\n\n"
                markdown += f"{single_shot_response}\n\n"
                markdown += f"## B\n\n"
                markdown += f"{multi_step_response}\n\n"
                answer[filename] = {"A": 'single_shot_response', "B": 'multi_step_response'}
            else:
                markdown += f"## A\n\n"
                markdown += f"{multi_step_response}\n\n"
                markdown += f"## B\n\n"
                markdown += f"{single_shot_response}\n\n"
                answer[filename] = {"A": 'multi_step_response', "B": 'single_shot_response'}
            
            
            markdown += "\n\n"
            
            with open(os.path.join(mardown_folder, results_file.replace("results_", "").replace("json", "md")), "w") as f:
                f.write(markdown)
    with open(os.path.join(mardown_folder, "answers.json"), "w") as f:
        simplejson.dump(answer, f)
        


In [5]:
import json as simplejson

def average_time():
    results_folder = "./results"
    total_single_shot = 0
    total_multi_step = 0
    total = 0
    for results_file in os.listdir(results_folder):
        if results_file.endswith(".json"):
            with open(os.path.join(results_folder, results_file), "r") as f:
                results = simplejson.load(f)
            total_single_shot += results['single_shot']['response_time']
            total_multi_step += results['multi_step']['response_time']
            total += 1
    print(f"Average single shot time: {total_single_shot/total}")
    print(f"Average multi step time: {total_multi_step/total}")
    
average_time()

Average single shot time: 51.24523681998253
Average multi step time: 174.0607920050621
