# Honours Project Notebook.

This notebook contains the code for my honours project. 

To run the application, `ollama` must be installed on the system. The `ollama` daemon must be run in the background using `ollama serve`. 

In [1]:
# Dependencies:
import ollama
import pandas as pd
import nltk
import re
from pprint import pprint

nltk.download("punkt_tab")

[nltk_data] Downloading package punkt_tab to
[nltk_data]     /Users/harryk/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [None]:

MODELSUSED = ['mistral:latest', 'falcon3:latest', 'qwen2.5:latest']

modelNames = [model.get('model') for model in ollama.list().models]

# Get list of models names on the system.

for i in MODELSUSED:
    if i not in modelNames:
        raise Exception(f"Model {i} not on system.")


SYSTEM = """
You correct and classify errors for radiology reports. The types of errors are omissions(where words are excluded from the transcription), insertion statements(any statements that have been added randomly by the
software inside the dicta-phone used), translational errors (the incorrect transcription of a phrase) and internal inconsistencies (such as side confusions). Only report the type(s) of error found. \n\n
"""

EXAMPLE = """
Report: 
Clinical Information Provided:
50-year-old male. fall from 7feet high when ladder gave way. Landed on back. c/o lpower rib and back pain. o/e tender BL lower limbs and flank, guarding. pneumothorax. intra-abdominal injury
.
Technique:
CT chest/abdomen/pelvis with dual arterial portal venous phase enhancement.
.
Findings:
Chest:
(No) pneumothorax, haemothorax, or pulmonary contusion.
Old left fourth rib fracture.  No acute bony injury to sternum or thoracic spine.  Wide patent bony spinal canal.
.
Abdomen/pelvis:
No intra-abdominal/pelvic free gas or fluid.
No significant abnormality seen in solid upper abdominal organs.  Normal appearance of partially filled urinary bladder.
Colonic diverticulosis with no active diverticulitis.  Remainder of unprepared large and small bowel loops are within normal limits.
No acute bony injury.  Degenerative changes in thoracolumbar spine and sacroiliac joints. Wide patent bony spinal canal.
.
Vascular:
Normal appearance of thoracic aorta with conventional configuration of aortic arch.
Normal appearance of abdominal aorta and its major branches.  Normal opacification of iliac vessels and visualised proximal femoral vessels.
.
Conclusion:
No pneumothorax.  No intra-abdominal traumatic injury.

Correction: Omission as no pneumothorax.
The radiology report will be below.
"""
dataframe = pd.read_csv(
    "datasets/testing_data.csv"
)

removedCorrection = dataframe["Removed Correction"]

correctedData = dataframe["Re-dictated"]

errors = dataframe["Type of Error"]

for i in range(len(removedCorrection)):
    # print(removedCorrection[i])
    # print("\n\n")
    if removedCorrection[i] == correctedData[i]:
        # print(i)
        # print(removedCorrection[i])
        raise ValueError(
            f"Correction not removed properly in {i} : \n{removedCorrection[i]}"
        )

for i in range(len(errors)):
    errorList = errors[i].split(";")
    errorNumber = len(errorList)
    # print(f"{i} : {errorNumber} errors.")
# display(errors)

In [17]:
# Feed each testing data into the models.
report = removedCorrection[1]
reportDict = {"Original report": report}
for modelName in MODELSUSED:
  # print(f"{modelName}")
  generated = ollama.generate(modelName, prompt=SYSTEM + EXAMPLE + report)
  reportDict[modelName] = generated["response"]

# pprint(reportDict)

tempData = pd.DataFrame(reportDict.values(), index=reportDict.keys())

tempData.to_csv("datasets/temp_data.csv")

display(tempData)

Unnamed: 0,0
Original report,Clinical Information Provided:\n50-year-old fe...
mistral:latest,1. Omission: Missing the mention of a patient ...
falcon3:latest,The radiology report contains several types of...
qwen2.5:latest,Omission \nInsertion Statements \nTranslatio...
