In [None]:
!pip install -q colab-xterm ollama pydantic huggingface_hub datasets
%load_ext colabxterm

import os
import ollama
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from datasets import load_dataset
import pandas as pd
from pydantic import BaseModel
from typing import List
from enum import Enum
from tqdm import trange
import IPython

In [None]:
# Install ollama and set the model download location to Colab's directory.
!curl -fsSL https://ollama.com/install.sh | sh

!export OLLAMA_FLASH_ATTENTION=1
!export OLLAMA_KV_CACHE_TYPE="q8_0"
!export OLLAMA_CONTEXT_LENGTH="4096"
!export OLLAMA_MODELS="/content"
!export OLLAMA_DEBUG=1

In [None]:
# Run ollama in the background using ollama serve &
!nohup ollama serve &

In [None]:
class ErrorType(Enum):
    """Strict definition of the multiple errors that can occur."""

    TranscriptionError = "Transcription Error"
    InternalInconsistency = "Internal Inconsistency"
    Omission = "Omission"
    ExtraneousStatement = "Extraneous Statement"


class RadiologyError(BaseModel):
    """This class serves as a schema to act as structured output for the models."""

    errorType: ErrorType
    errorPhrases: list[str]
    errorExplanation: list[str]


class RadiologyErrors(BaseModel):
    """Adding multiple errors for structured output."""

    errorsForWholeText: List[RadiologyError] | None



In [None]:
import subprocess
import time
import os

os.environ['OLLAMA_FLASH_ATTENTION'] = "1"
os.environ["OLLAMA_KV_CACHE_TYPE"] = "q8_0"
os.environ["OLLAMA_CONTEXT_LENGTH"] = "4096"
os.environ["OLLAMA_MODELS"] = "/content"

time.sleep(10)

model_names = ["mistral:latest", "qwen2.5:latest", "falcon3:latest"]
# model_names = ["hf.co/harrykeeran12/radiology_error_mistral_gguf:Q4_K_M"]

dataframe = pd.read_csv("testing_data.csv")

removedCorrection = dataframe["Removed Correction"]

SYSTEM = """You help correct radiology report errors. These include omissions, extraneous statements, transcription errors and internal  inconsistencies. For each mistake, show the incorrect words and explain what the problem is."""

SYSTEM2 = """You help correct radiology report errors in an isolated local system. When provided with a free-text radiology report, analyze it for:
- Omissions: Missing critical information that should be present
- Extraneous statements: Information that doesn't belong or is redundant including any template errors.
- Transcription errors: Spelling, punctuation, or terminology mistakes - Internal inconsistencies: Contradictory statements within the report. For each identified error, return:
1. The error type
2. The exact text containing the error
3. An explanation of why it's an error
The arrays must be consistent (each error needs all three elements). Ignore any errors deemed redundant. Output JSON."""

SYSTEM3 = """Your task is to identify errors in unstructured radiology reports including omissions, extraneous statements, transcription errors, and internal inconsistencies. Analyze each report and output errors in JSON format.

Example 1:
Input: "Clinical Information:\nNot given.\nTechnique:\nNon-contrast images were taken in the axial plane with a section thickness of 1.5 m.\nFindings:\nOther findings are stable.\nImpressions: \nNot given."

Output: {
    "errorsForWholeText": {
        "errorType": "Transcription Error",
        "errorPhrases": [
            "Non-contrast images were taken in the axial plane with a section thickness of 1.5 m."
        ],
        "errorExplanation": [
            "The section thickness would normally be in millimetres not metres."
        ]
    }
}

Example 2:
Input: "Clinical Information:\nPatient with chronic headaches.\nTechnique:\nMRI of the brain without contrast.\nFindings:\nNo acute intracranial abnormality.\nNo evidence of mass effect or midline shift.\nVentricles are normal in size and configuration.\nImpressions:\nNormal brain MRI."

Output: {
    "errorsForWholeText": "No errors found"
}

Example 3:
Input: "Clinical Information:\nFall from standing height.\nTechnique:\nCT scan of the right wrist.\nFindings:\nThere is a comminuted fracture of the distal radius.\nNo evidence of dislocation.\nImpressions:\nThe patient has a sprained wrist."

Output: {
    "errorsForWholeText": {
        "errorType": "Internal Inconsistency",
        "errorPhrases": [
            "There is a comminuted fracture of the distal radius.",
            "The patient has a sprained wrist."
        ],
        "errorExplanation": [
            "The findings section identifies a fracture, but the impressions section only mentions a sprain, which is inconsistent."
        ]
    }
}
Analyse the report below:"""

for m in model_names:
  !ollama pull {m}


In [None]:
def inference(x:str, MODEL_NAME:str, SYSTEM:str):
  """Performs inferences over a dataset using ollama and the GPU on the cloud system."""

  return ollama.generate(
          model=MODEL_NAME,
          system=SYSTEM,
          prompt=x,
          options={"temperature": 0},
          format=RadiologyErrors.model_json_schema(),
      )["response"]



In [None]:
inferences = ["" for i in dataframe["Removed Correction"]]

In [None]:
for modelName in model_names:
  for index in trange(len(inferences)):
    if inferences[index] == "":
      print(removedCorrection[index])
      resp = inference(removedCorrection[index],modelName, SYSTEM3)
      print(resp)
      inferences[index] = resp
    else:
      continue
  dataframe[modelName] = inferences
  dataframe.to_csv(f"{modelName}_inference_prompt3.csv")


In [None]:
!pkill ollama