# Kenya Clinical Reasoning Challenge

In [2]:
# pip install transformers datasets torch pandas numpy

In [4]:
# pip install sentencepiece

In [6]:
# pip install accelerate --upgrade

In [8]:
# suppress warnings
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # 0 = all messages, 3 = errors only

### Prepare the Data (using train_raw.csv)

In [11]:
import pandas as pd
from datasets import Dataset

In [12]:
# Load data
train_raw = pd.read_csv('train_raw.csv')

# Preprocessing function (fixed from Phase 1)
def preprocess_clinician(text):
    text = text.lower()
    text = ''.join(c for c in text if c.isalnum() or c.isspace())
    text = ' '.join(text.split())
    return text

# Handle missing Years of Experience in Prompt
def update_prompt(row):
    if pd.isna(row['Years of Experience']):
        return row['Prompt'].replace("i am a nurse", "i am a nurse with unknown years of experience")
    return row['Prompt']

# Apply preprocessing
train_raw['Prompt'] = train_raw.apply(update_prompt, axis=1)
train_raw['Clinician'] = train_raw['Clinician'].apply(preprocess_clinician)

# Create a Hugging Face Dataset
data = {'input_text': train_raw['Prompt'], 'target_text': train_raw['Clinician']}
dataset = Dataset.from_dict(data)

# Split into train (80%) and validation (20%)
train_test_split = dataset.train_test_split(test_size=0.2, seed=42)
train_dataset = train_test_split['train']
val_dataset = train_test_split['test']

In [13]:
# Verify
print(f"Train sample:\n{train_dataset[0]}\n")
print(f"Validation sample:\n{val_dataset[0]}")

Train sample:
{'input_text': 'I am a nurse with 10 years of experience in General nursing working in a National Referral Hospitals in Uasin Gishu county in Kenya. A patient is brought to the Theatre with intestinal obstruction. This is his first admission. Patient complains of inability to pass stool for one and half weeks and abdominal distension. There is no history for chronic diseases in the family. Vital sign at the time of receiving the patient in Theatre BP 130/70 mmHg, Pulse 100 bpm, RR 21/minute, SPO2 96%. Upon opening the abdomen all the intestines had become necrosed.\nQuestion\nHow should the patient be managed intraoperatively and postoperatively?', 'target_text': 'patient brought to the theater with intestinal obstruction complaints of inability to pass stool and abdominal distension vitals critical upon opening abdomen all intestines were necrosed q how should the patient be managed intraoperatively and postoperatively intraoperation resection and anastomosis constructio

#### Baseline Model (Flan-T5 Small)

In [18]:
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments
import torch

E0000 00:00:1744026471.717321  512574 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744026471.766854  512574 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [19]:
# Load tokenizer and model
model_name = "google/flan-t5-small"
tokenizer = T5Tokenizer.from_pretrained(model_name)
model = T5ForConditionalGeneration.from_pretrained(model_name)

# Tokenize the dataset
def tokenize_function(examples):
    inputs = tokenizer(examples['input_text'], max_length=512, truncation=True, padding="max_length")
    targets = tokenizer(examples['target_text'], max_length=256, truncation=True, padding="max_length")
    inputs["labels"] = targets["input_ids"]
    return inputs

train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset = val_dataset.map(tokenize_function, batched=True)

# Set format for PyTorch
train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
val_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])

# Training arguments
training_args = TrainingArguments(
    output_dir="./flan_t5_small_output",
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    warmup_steps=50,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
)

# Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Map:   0%|          | 0/320 [00:00<?, ? examples/s]

Map:   0%|          | 0/80 [00:00<?, ? examples/s]

In [22]:
# Fine-tune
trainer.train()

Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Epoch,Training Loss,Validation Loss
1,5.6715,4.670094
2,4.0974,3.601858
3,3.8247,3.365974


There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].


TrainOutput(global_step=240, training_loss=6.707022062937418, metrics={'train_runtime': 3617.3331, 'train_samples_per_second': 0.265, 'train_steps_per_second': 0.066, 'total_flos': 178454884515840.0, 'train_loss': 6.707022062937418, 'epoch': 3.0})

In [24]:
# Save model
model.save_pretrained("./flan_t5_small_finetuned")
tokenizer.save_pretrained("./flan_t5_small_finetuned")

('./flan_t5_small_finetuned/tokenizer_config.json',
 './flan_t5_small_finetuned/special_tokens_map.json',
 './flan_t5_small_finetuned/spiece.model',
 './flan_t5_small_finetuned/added_tokens.json')

In [26]:
# Load fine-tuned model
model = T5ForConditionalGeneration.from_pretrained("./flan_t5_small_finetuned")
tokenizer = T5Tokenizer.from_pretrained("./flan_t5_small_finetuned")

In [28]:
# Test a prompt
test_prompt = train_raw['Prompt'][0]  # e.g., "i am a nurse with 18 years of experience..."
inputs = tokenizer(test_prompt, return_tensors="pt", max_length=512, truncation=True)
outputs = model.generate(**inputs, max_length=256)
prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)

In [32]:
print(f"Prompt:\n{test_prompt}\n")
print(f"Prediction:\n{prediction}\n")
print(f"Ground Truth:\n{train_raw['Clinician'][0]}")

Prompt:
I am a nurse with 18 years of experience in General nursing working in a Sub-county Hospitals and Nursing Homes in Uasin Gishu county in Kenya. A 4-year-old child presents to the emergency department with second-degree burns on the forearm after accidentally touching a hot stove. The child was playing in the kitchen when they reached out to touch the stove. The burns cover about 5% of the total body surface area. The child is alert and crying, with redness, blisters, and swelling on the affected area. The burns appear to be superficial to moderate in severity. The child is in mild pain, and there is no indication of airway or breathing distress. No other injuries are noted.
Questions:
1. What is the immediate treatment protocol for second-degree burns in paediatric patients?
2. Should any tetanus prophylaxis be considered in this case?
3. What follow-up care should be recommended for burn healing?

Prediction:
4. year old child presents to the emergency department with second-d