In [1]:
%load_ext autoreload
%autoreload 2

### External and Internal Imports

In [2]:
import torch
import nltk
import torch.backends
import torch.backends.cuda
import typing as t
import scipy as sp
import numpy as np
import spacy as spa
import pandas as pd
import evaluate
import pathlib as pb
import wandb
import random as rng
from functools import partial
from tokenizers import (
    AddedToken
)
from datasets import (
    Dataset,
    NamedSplit,
    Value,
    ClassLabel,
    DatasetDict,
    Features,
)
import tensorflow as tf
from bleurt import score
from huggingface_hub import notebook_login
import transformers as ts
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    AutoModelForSeq2SeqLM,
    T5ForConditionalGeneration,
    Seq2SeqTrainingArguments,
    Seq2SeqTrainer,
    DataCollatorForSeq2Seq,
    DataCollatorForLanguageModeling,
)

2023-06-28 22:30:31.634466: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-06-28 22:30:32.414606: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:975] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-06-28 22:30:32.414758: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcufft.so.10'; dlerror: libcufft.so.10: cannot open shared object file: No such file or directory
2023-06-28 22:30:32.423754: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcusparse.so.11'; dlerror: libcusparse.so.11: cannot open shared object file: No such file or directory
2023-06-28 22:30:32.423788: W te

In [3]:
from dataset import read_mediqa_dataset

### Prepare the Environment

In [4]:
# Environment
SEED = 42
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Relative Paths
ROOT_PATH = pb.Path('..', '..')
DATA_PATH = ROOT_PATH / 'data'
WEIGHTS_PATH = ROOT_PATH / 'weights'
BLEURT_PATH: pb.Path= WEIGHTS_PATH / 'bleurt' / 'bleurt-20'

# Reproducibility
rng.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.use_deterministic_algorithms(False)

# Optimizations
torch.backends.cuda.matmul.allow_tf32 = True

# Download the dependencies
wandb.login()
nltk.download()
notebook_login()

# Track stats from project
run = wandb.init(
    # Set the project where this run will be logged
    project="ub-g12-bionlp",
    resume=True,
    id='bionlp'
)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


[34m[1mwandb[0m: Currently logged in as: [33minvokariman[0m ([33mcastelvaar[0m). Use [1m`wandb login --relogin`[0m to force relogin


NLTK Downloader
---------------------------------------------------------------------------
    d) Download   l) List    u) Update   c) Config   h) Help   q) Quit
---------------------------------------------------------------------------


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…



### Read the Dataset

In [5]:
mediqa_dataset: DatasetDict = read_mediqa_dataset(DATA_PATH).shuffle(seed=SEED)
mediqa_dataset

DatasetDict({
    augmented: Dataset({
        features: ['id', 'header', 'summary', 'dialogue', 'label'],
        num_rows: 3603
    })
    valid: Dataset({
        features: ['id', 'header', 'summary', 'dialogue', 'label'],
        num_rows: 100
    })
    train: Dataset({
        features: ['id', 'header', 'summary', 'dialogue', 'label'],
        num_rows: 1201
    })
    test: Dataset({
        features: ['id', 'header', 'summary', 'dialogue', 'label'],
        num_rows: 200
    })
})

In [6]:
mediqa_dataset.data['augmented'][:]['header'].to_pandas().value_counts()

header
FAM/SOCHX        1053
GENHX             846
PASTMEDICALHX     354
CC                231
PASTSURGICAL      189
ALLERGY           180
ROS               180
MEDICATIONS       162
ASSESSMENT        102
EXAM               69
DIAGNOSIS          57
DISPOSITION        45
PLAN               33
EDCOURSE           24
IMMUNIZATIONS      24
IMAGING            18
GYNHX              15
PROCEDURES          9
OTHER_HISTORY       6
LABS                6
Name: count, dtype: int64

### Preprocess the Data

Compute the 75th percentile for the length of the dialogue & summary

In [7]:
dialogue_length: pd.Series = pd.Series(mediqa_dataset['augmented'][:]['dialogue']).apply(nltk.word_tokenize).apply(lambda x: len(x))
dialogue_length.describe()

count    3603.000000
mean      111.036359
std       107.079140
min         3.000000
25%        44.000000
50%        82.000000
75%       152.500000
max      1859.000000
dtype: float64

In [8]:
summary_length: pd.Series = pd.Series(mediqa_dataset['augmented'][:]['summary']).apply(nltk.word_tokenize).apply(lambda x: len(x))
summary_length.describe()

count    3603.000000
mean       46.328893
std        74.076092
min         1.000000
25%         6.000000
50%        17.000000
75%        52.000000
max      1133.000000
dtype: float64

In [9]:
# Consider the length to be above the 75th percentile
max_diagolue_length = 160
max_summary_length = 60

In [10]:
# Download the metrics
if not (WEIGHTS_PATH / 'bleurt' / 'bleurt-20').exists():
    !wget https://storage.googleapis.com/bleurt-oss-21/BLEURT-20.zip {BLEURT_PATH}
    !mkdir -p {WEIGHTS_PATH / 'bleurt' / 'bleurt-20'}
    !unzip BLEURT-20.zip -d {WEIGHTS_PATH / 'bleurt' / 'bleurt-20'}
    !rm BLEURT-20.zip

In [11]:
# Download locally the pretrained model weights
model = T5ForConditionalGeneration.from_pretrained('google/flan-t5-base', cache_dir=WEIGHTS_PATH / 'flan-t5-base', device_map='auto')
tokenizer = AutoTokenizer.from_pretrained('google/flan-t5-base', cache_dir=WEIGHTS_PATH / 'flan-t5-base')

# model = AutoModelForSeq2SeqLM.from_pretrained("cogint/in-boxbart",
#                                                    cache_dir=WEIGHTS_PATH / 'in-boxbart',
#                                                    device_map='auto')
# tokenizer = AutoTokenizer.from_pretrained("cogint/in-boxbart",
#                                                cache_dir=WEIGHTS_PATH / 'in-boxbart')

In [12]:
# Add new <HEADER> special token to be used for section header prediction
tokenizer.add_special_tokens({ 'additional_special_tokens': ['<HEADER>'] })
model.resize_token_embeddings(len(tokenizer))

Embedding(32101, 768)

In [13]:
def augment(entries, p=0.50, instruction: t.Optional[str] = ''):
    # Flip a coin for each entry and determine if we add or not the section header
    has_header: np.ndarray = np.random.binomial(1, p, len(entries['id'])).astype(bool)

    # Give a hint to the model that <HEADER> 
    input_dialogue: t.List[str] = [f'{instruction} {"<HEADER>" if h else ""} ' + d
                                   for h, d in zip(has_header, entries['dialogue'])]
    output_summary: t.List[str] = [(t + ' ' if h else '') + s
                                   for h, t, s in zip(has_header, entries['header'], entries['summary'])]

    return {
        **entries,
        'dialogue': input_dialogue,
        'summary': output_summary,
    }


def tokenize(entries, max_in_len: int, max_out_len: int):
    # Apply model tokenizer
    input_tokens = tokenizer(entries['dialogue'], max_length=max_in_len,
                                  padding=True,
                                  truncation=True)
    output_tokens = tokenizer(entries['summary'], max_length=max_out_len,
                                  padding=True,
                                  truncation=True)

    # Aggregate tokens into single dict
    tokens = input_tokens
    tokens['labels'] = output_tokens['input_ids']
    return tokens


# Use different name to illustrate tokenization state
mediqa_tokenized_dataset = mediqa_dataset

# Apply augmentation & tokenization
mediqa_tokenized_dataset['augmented'] = mediqa_dataset['augmented'] \
    .map(partial(augment,
                 p=0.5,
                 instruction='summarize:'),
                 batched=True) \
    .map(partial(tokenize,
                 max_in_len=max_diagolue_length,
                 max_out_len=max_summary_length),
                 batched=True,
                 remove_columns=['id', 'header', 'summary', 'dialogue', 'label'])

# Apply augmentation & tokenization
mediqa_tokenized_dataset['valid'] = mediqa_dataset['valid'] \
    .map(partial(augment,
                 p=1.0,
                 instruction='summarize:'),
                 batched=True) \
    .map(partial(tokenize,
                 max_in_len=max_diagolue_length,
                 max_out_len=max_summary_length),
                 batched=True,
                 remove_columns=['id', 'header', 'summary', 'dialogue', 'label'])

# Tokenize test set tokenization
mediqa_tokenized_dataset['test'] = mediqa_dataset['test'] \
    .map(partial(augment,
                 p=1.0,
                 instruction='summarize:'),
                 batched=True) \
    .map(partial(tokenize,
                 max_in_len=max_diagolue_length,
                 max_out_len=max_summary_length),
                 batched=True,
                 remove_columns=['id', 'header', 'summary', 'dialogue', 'label'])

Map:   0%|          | 0/3603 [00:00<?, ? examples/s]

Map:   0%|          | 0/3603 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

In [14]:
# Inspect generated tokens for dialogue
tokenizer.batch_decode(mediqa_tokenized_dataset['augmented']['input_ids'])

["summarize: Guest_clinician: How is the patient? Doctor: He's stable. Heart rate 65. Normal rhythm. Ninety-seven percent oxygen in room air.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>",
 'summarize: <HEADER> Breathe in breath, let me tap and see. Well, your lungs seem clear. Patient: Okay.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad

In [15]:
# Inspect generated tokens for summary
tokenizer.batch_decode(mediqa_tokenized_dataset['augmented']['labels'])

['Stable.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>',
 'EXAM CHEST: Lungs bilaterally clear to auscultation and percussion.</s><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad><pad>',
 'GENHX This is a 49-year-old white male who sustained a traumatic injury to his left posterior thighthis past year while in ABCD. He sustained an injury from the patellar from a boat while in the water. He was air lifted</s>',
 'GENHX Spontaneous Vaginal delivery at 36weeks gestation to a G2P1 mother. Birth weight 7#10oz. No instrumentation required. Labor = 11hours. "Light gas anesthesia" given. Apgars</s>',
 'Please evaluate s

### FineTuning & Evaluation

In [16]:
# Prepare the metrics for evaluation
bertscore = evaluate.load('bertscore', module_type='metric', device=DEVICE)
bleurt = score.BleurtScorer(BLEURT_PATH / 'BLEURT-20')
rouge = evaluate.load('rouge', module_type='metric')


# Evaluate the performance of the models
def evaluation(output):
    # Retrieve predictions & ground-truth data
    pred_token_ids, gt_token_ids = output
    gt_token_ids = np.where(gt_token_ids != -100, gt_token_ids, tokenizer.pad_token_id)

    # Obtain decoded tokens
    pred_tokens = tokenizer.batch_decode(pred_token_ids, skip_special_tokens=True)
    gt_tokens = tokenizer.batch_decode(gt_token_ids, skip_special_tokens=True)

    # Compute metrics
    rouge_score: t.Dict[str, t.List[float]] = rouge.compute(predictions=pred_tokens, references=gt_tokens)
    bert_score: t.Dict[str, t.List[float]] = bertscore.compute(predictions=pred_tokens, references=gt_tokens, lang='en')
    bleurt_score: t.List[float] = bleurt.score(candidates=pred_tokens, references=gt_tokens)

    # Aggregate metrics
    return {
        'bertscore': np.array(bert_score['f1']).mean(),
        'bleurt': np.array(bleurt_score).mean(),
        **rouge_score,
    }

INFO:tensorflow:Reading checkpoint ../../weights/bleurt/bleurt-20/BLEURT-20.
INFO:tensorflow:Config file found, reading.
INFO:tensorflow:Will load checkpoint BLEURT-20
INFO:tensorflow:Loads full paths and checks that files exists.
INFO:tensorflow:... name:BLEURT-20
INFO:tensorflow:... bert_config_file:bert_config.json
INFO:tensorflow:... max_seq_length:512
INFO:tensorflow:... vocab_file:None
INFO:tensorflow:... do_lower_case:None
INFO:tensorflow:... sp_model:sent_piece
INFO:tensorflow:... dynamic_seq_length:True
INFO:tensorflow:Creating BLEURT scorer.
INFO:tensorflow:Creating SentencePiece tokenizer.
INFO:tensorflow:Creating SentencePiece tokenizer.
INFO:tensorflow:Will load model: ../../weights/bleurt/bleurt-20/BLEURT-20/sent_piece.model.
INFO:tensorflow:SentencePiece tokenizer created.
INFO:tensorflow:Creating Eager Mode predictor.
INFO:tensorflow:Loading model.


2023-06-28 22:30:53.025077: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


INFO:tensorflow:BLEURT initialized.


In [17]:
data_collator = DataCollatorForSeq2Seq(tokenizer, model, return_tensors='pt')

training_args = Seq2SeqTrainingArguments(
    output_dir="t5",
    evaluation_strategy="epoch",
    learning_rate=5e-5,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    weight_decay=0.01,
    save_total_limit=3,
    num_train_epochs=10,
    predict_with_generate=True
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=mediqa_tokenized_dataset["augmented"],
    eval_dataset=mediqa_tokenized_dataset["valid"],
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=evaluation,
)

trainer.train()



  0%|          | 0/1130 [00:00<?, ?it/s]

You're using a T5TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


  0%|          | 0/4 [00:00<?, ?it/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'eval_loss': 1.304470181465149, 'eval_bertscore': 0.8639779669046402, 'eval_bleurt': 0.3349599052965641, 'eval_rouge1': 0.22858423578187864, 'eval_rouge2': 0.0928020965758781, 'eval_rougeL': 0.19208519558935733, 'eval_rougeLsum': 0.19093262222075513, 'eval_runtime': 82.6518, 'eval_samples_per_second': 1.21, 'eval_steps_per_second': 0.048, 'epoch': 1.0}


  0%|          | 0/4 [00:00<?, ?it/s]

{'eval_loss': 1.1129454374313354, 'eval_bertscore': 0.863747992515564, 'eval_bleurt': 0.300099638774991, 'eval_rouge1': 0.2429719169865418, 'eval_rouge2': 0.11739615322468575, 'eval_rougeL': 0.22513702713415829, 'eval_rougeLsum': 0.22414665959748073, 'eval_runtime': 79.6826, 'eval_samples_per_second': 1.255, 'eval_steps_per_second': 0.05, 'epoch': 2.0}


  0%|          | 0/4 [00:00<?, ?it/s]

{'eval_loss': 1.071690559387207, 'eval_bertscore': 0.8658018916845321, 'eval_bleurt': 0.2952945575118065, 'eval_rouge1': 0.2526808885316513, 'eval_rouge2': 0.11897800633317522, 'eval_rougeL': 0.2337220858226875, 'eval_rougeLsum': 0.2327649325637648, 'eval_runtime': 79.2948, 'eval_samples_per_second': 1.261, 'eval_steps_per_second': 0.05, 'epoch': 3.0}


  0%|          | 0/4 [00:00<?, ?it/s]

{'eval_loss': 1.0532689094543457, 'eval_bertscore': 0.875687780380249, 'eval_bleurt': 0.33108051151037216, 'eval_rouge1': 0.28947272773716337, 'eval_rouge2': 0.13967214214882617, 'eval_rougeL': 0.26660378326965917, 'eval_rougeLsum': 0.26658704359997376, 'eval_runtime': 79.6282, 'eval_samples_per_second': 1.256, 'eval_steps_per_second': 0.05, 'epoch': 4.0}
{'loss': 2.055, 'learning_rate': 2.7876106194690264e-05, 'epoch': 4.42}


  0%|          | 0/4 [00:00<?, ?it/s]

{'eval_loss': 1.0448130369186401, 'eval_bertscore': 0.8734343075752258, 'eval_bleurt': 0.323806945681572, 'eval_rouge1': 0.2684678959454978, 'eval_rouge2': 0.1348580893943147, 'eval_rougeL': 0.24686538952662807, 'eval_rougeLsum': 0.2467601391704617, 'eval_runtime': 79.5991, 'eval_samples_per_second': 1.256, 'eval_steps_per_second': 0.05, 'epoch': 5.0}


  0%|          | 0/4 [00:00<?, ?it/s]

{'eval_loss': 1.0399585962295532, 'eval_bertscore': 0.8821739846467972, 'eval_bleurt': 0.36408275708556176, 'eval_rouge1': 0.31100258667088493, 'eval_rouge2': 0.1459293216261664, 'eval_rougeL': 0.28784077059344326, 'eval_rougeLsum': 0.28697542400137577, 'eval_runtime': 79.4822, 'eval_samples_per_second': 1.258, 'eval_steps_per_second': 0.05, 'epoch': 6.0}


  0%|          | 0/4 [00:00<?, ?it/s]

{'eval_loss': 1.0384917259216309, 'eval_bertscore': 0.8834166234731674, 'eval_bleurt': 0.36100871123373507, 'eval_rouge1': 0.3040249390007925, 'eval_rouge2': 0.14120635680166693, 'eval_rougeL': 0.2833602910373405, 'eval_rougeLsum': 0.28256900480178676, 'eval_runtime': 79.41, 'eval_samples_per_second': 1.259, 'eval_steps_per_second': 0.05, 'epoch': 7.0}


  0%|          | 0/4 [00:00<?, ?it/s]

{'eval_loss': 1.0370354652404785, 'eval_bertscore': 0.8840857809782028, 'eval_bleurt': 0.3613336993008852, 'eval_rouge1': 0.3107399540230861, 'eval_rouge2': 0.14633800544815156, 'eval_rougeL': 0.28710566479295085, 'eval_rougeLsum': 0.2855262529797403, 'eval_runtime': 79.5011, 'eval_samples_per_second': 1.258, 'eval_steps_per_second': 0.05, 'epoch': 8.0}
{'loss': 0.8642, 'learning_rate': 5.752212389380531e-06, 'epoch': 8.85}


  0%|          | 0/4 [00:00<?, ?it/s]

{'eval_loss': 1.0354198217391968, 'eval_bertscore': 0.8844044768810272, 'eval_bleurt': 0.370335805863142, 'eval_rouge1': 0.31500142312229185, 'eval_rouge2': 0.15051913591924831, 'eval_rougeL': 0.2902258278185639, 'eval_rougeLsum': 0.2890046381110993, 'eval_runtime': 79.7529, 'eval_samples_per_second': 1.254, 'eval_steps_per_second': 0.05, 'epoch': 9.0}


  0%|          | 0/4 [00:00<?, ?it/s]

{'eval_loss': 1.0366042852401733, 'eval_bertscore': 0.8845970940589905, 'eval_bleurt': 0.36692029029130935, 'eval_rouge1': 0.31201929341735013, 'eval_rouge2': 0.1503492417253139, 'eval_rougeL': 0.28810866122093226, 'eval_rougeLsum': 0.2864173725162826, 'eval_runtime': 78.8868, 'eval_samples_per_second': 1.268, 'eval_steps_per_second': 0.051, 'epoch': 10.0}
{'train_runtime': 1002.9222, 'train_samples_per_second': 35.925, 'train_steps_per_second': 1.127, 'train_loss': 1.3852475529223416, 'epoch': 10.0}


TrainOutput(global_step=1130, training_loss=1.3852475529223416, metrics={'train_runtime': 1002.9222, 'train_samples_per_second': 35.925, 'train_steps_per_second': 1.127, 'train_loss': 1.3852475529223416, 'epoch': 10.0})

### Evaluation

In [18]:
results = trainer.predict(mediqa_tokenized_dataset['test'])
wandb.log({ 'test': results.metrics })

  0%|          | 0/7 [00:00<?, ?it/s]

### Perform Some Inference

In [None]:
# Extract one sample from the dataset
index = 65
test_tokenized_dataset: Dataset = mediqa_tokenized_dataset['test']
test_example = test_tokenized_dataset[index]
test_input_token_ids = test_example['input_ids']
test_output_token_ids = test_example['labels']

# Show the extracted sample
print('Tokenized Dialogue')
print(tokenizer.decode(test_input_token_ids, skip_special_tokens=True))
print()
print('Tokenized Summary')
print(tokenizer.decode(test_output_token_ids, skip_special_tokens=True))

# Create batch with the token_ids
batch_input = torch.tensor(test_input_token_ids, device=DEVICE).unsqueeze(0)
batch_output = torch.tensor(test_output_token_ids, device=DEVICE).unsqueeze(0)

# Predict summary
outputs = model.generate(batch_input, max_new_tokens=max_summary_length).cpu()

# Output
print()
print('Summary Output')
print(tokenizer.decode(outputs.squeeze(0)))

Tokenized Dialogue
  Doctor: How's everything going on? 
Patient: I'm having this weird feeling where I feel that my face is going to twitch and I start doing these grimacing actions on my face, mouth, and then it kind of suppresses it, but I'm not sure what's happening. I'm still having a fear of feeling of pressure in the backside of my head and it comes approximately like once each week. 
Doctor: What exactly does this pressure feels like? 
Patient: It feels like pins and needles in my head and a lot of pressure and often it feels like water is running down my hair. 
Doctor: How frequent are these feelings? Is anything helping you? Are you taking any medications or anything else for it?

Tokenized Summary
GENHX The patient had several episodes where she felt like her face was going to twitch, which she could suppress it with grimacing movements of her mouth and face.  She reports she is still having right posterior head pressure like sensations approximately one time per week.  Thes

In [None]:
wandb.finish()