# Import libraries

In [None]:
# drive access
from google.colab import drive
drive.mount('/content/drive')

# standard library
import numpy as np
import pandas as pd
import tensorflow as tf

# for model
!pip install transformers
from transformers import RobertaTokenizer, TFRobertaModel, TFRobertaForMaskedLM

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Get a few examples from our dataset

In [None]:
df = pd.read_csv('/content/drive/MyDrive/266/Data/Clean_Data/EmoV_Arctic/punctuated_cased_train.csv')

df.head(10)

Unnamed: 0.1,Unnamed: 0,filename,clean_filename,actor,gender,emotion,auto_transcription,label,cleaned_auto_transcription,cleaned_label
0,0,amused_29-45_0042.wav,42,bea,female,amused,HOW COULD HE EXPLAIN HIS POSSESSION OF THE SKETCH,How could he explain his possession of the ske...,How could he explain his possession of the ske...,How could he explain his possession of the ske...
1,1,amused_46-56_0046.wav,46,bea,female,amused,THE GIRL FACED HIM HER EYES SHINING WITH SUDDE...,"The girl faced him, her eyes shining with sudd...","The girl faced him, her eyes shining with sudd...","The girl faced him, her eyes shining with sudd..."
2,2,amused_1-15_0005.wav,5,bea,female,amused,WILL WE EVER FORGET IT,Will we ever forget it.,Will we ever forget it.,Will we ever forget it.
3,3,amused_281-308_0281.wav,281,bea,female,amused,I DO NOT BLAME YOU FOR ANYTHING REMEMBER THAT,I do not blame you for anything; remember that.,I do not blame you for anything. Remember that.,I do not blame you for anything. Remember that.
4,4,amused_225-252_0226.wav,226,bea,female,amused,THAT CAME BEFORE MY A V CS,That came before my A B C's.,That came before my a v cs.,That came before my A B C's.
5,5,amused_141_168_0146.wav,146,bea,female,amused,OF COURSE THAT IS UNINTERESTING SHE CONTINUED,"Of course, that is uninteresting, she continued.","Of course, that is uninteresting. She continued.","Of course, that is uninteresting, she continued."
6,6,amused_85-112_0108.wav,108,bea,female,amused,HE WATED IN THE EDGE OF THE WATER AND BEGAN SC...,He waded into the edge of the water and began ...,He wated in the edge of the water and began sc...,He waded into the edge of the water and began ...
7,7,amused_281-308_0301.wav,301,bea,female,amused,COULD ALMAY DREAMS VIOLATED THIS LAW,But all my dreams violated this law.,Could almay dreams violated this law.,But all my dreams violated this law.
8,8,amused_169-196_0175.wav,175,bea,female,amused,DOWN THERE THE EARTH WAS ALREADY SWELLING WITH...,Down there the earth was already swelling with...,"Down there, the earth was already swelling wit...",Down there the earth was already swelling with...
9,9,amused_16-28_0027.wav,27,bea,female,amused,NUNTE MY SURPRISE HE BEGAN TO SHOW ACTUAL ENTH...,To my surprise he began to show actual enthusi...,"Nunte, my surprise, he began to show actual en...",To my surprise he began to show actual enthusi...


# Create a few example transcriptions

In [None]:
# transcription and label are pretty much the same, only difference in punctuation
RAW_TRANS1 = 'of course that is uninteresting she continued'
CLEAN_TRANS1 = 'Of course, that is uninteresting. She continued.'
LABEL1 = 'Of course, that is uninteresting, she continued.'

# transcription and label have one verb word difference
RAW_TRANS2 = 'he wated in the edge of the water and began scrubbing himself'
CLEAN_TRANS2 = 'He wated in the edge of the water and began scrubbing himself.'
LABEL2 = 'He waded into the edge of the water and began scrubbing himself.'

# transcription and label have multiple differences for both words and punctuations
RAW_TRANS3 = 'nunte my surprise he began to show actual enthusiasm in my favor'
CLEAN_TRANS3 = 'Nunte, my surprise, he began to show actual enthusiasm in my favor.'
LABEL3 = 'To my surprise he began to show actual enthusiasm in my favor.'

# Define the SCORE_MODEL

In [None]:
MAX_LEN = 512
CHECKPOINT = 'roberta-large'
TOKENIZER = RobertaTokenizer.from_pretrained(CHECKPOINT)
ROBERTA = TFRobertaModel.from_pretrained(CHECKPOINT)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaModel: ['lm_head.layer_norm.bias', 'lm_head.layer_norm.weight', 'roberta.embeddings.position_ids', 'lm_head.dense.weight', 'lm_head.bias', 'lm_head.dense.bias']
- This IS expected if you are initializing TFRobertaModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
Some weights or buffers of the TF 2.0 model TFRobertaModel were not initialized from the PyTorch model and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and infe

In [None]:
def create_score_model(llm,
                       num_unfreeze=0,
                       hidden_size=[1024]):

  if num_unfreeze == 0:
    llm.trainable = False
  elif num_unfreeze == 24:
    llm.trainable = True
  else:
    retrain_layers = []

    for retrain_layer_number in range(num_unfreeze):
      layer_code = '_' + str(23 - retrain_layer_number)
      retrain_layers.append(layer_code)

    print('retrain layers: ', retrain_layers)

    for w in llm.weights:
      if not any([x in w.name for x in retrain_layers]):
        w._trainable = False

  input_ids = tf.keras.layers.Input(shape=(MAX_LEN,), dtype=tf.int64, name='input_ids_layer')
  token_type_ids = tf.keras.layers.Input(shape=(MAX_LEN,), dtype=tf.int64, name='token_type_ids_layer')
  attention_mask = tf.keras.layers.Input(shape=(MAX_LEN,), dtype=tf.int64, name='attention_mask_layer')

  llm_inputs = {'input_ids': input_ids,
                'token_type_ids': token_type_ids,
                'attention_mask': attention_mask}

  llm_out = llm(llm_inputs)
  output_token = llm_out.last_hidden_state
  hidden = tf.math.reduce_mean(output_token, axis=1)

  for i, each in enumerate(hidden_size):
    hidden = tf.keras.layers.Dense(each, activation='relu', name=f'hidden_layer_{i}')(hidden)
    hidden = tf.keras.layers.Dropout(0.1)(hidden)

  classification = tf.keras.layers.Dense(1, activation='sigmoid',name='classification_layer')(hidden)

  classification_model = tf.keras.Model(inputs=[input_ids, token_type_ids, attention_mask], outputs=[classification])

  # Optimizer
  optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5,
                                      beta_1=0.9,
                                      beta_2=0.98,
                                      epsilon=1e-06,
                                      clipnorm=0.0)

  # Learning rate scheduler
  lr_schedule = tf.keras.optimizers.schedules.PolynomialDecay(initial_learning_rate=1e-5,
                                                              decay_steps=5336,
                                                              end_learning_rate=1e-10,
                                                              power=1.0)

  optimizer.lr = lr_schedule

  classification_model.compile(optimizer=optimizer,
                                loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
                                metrics='accuracy')

  return classification_model

# Create the SCORE_MODEL

In [None]:
SCORE_MODEL = create_score_model(ROBERTA, num_unfreeze=24)
DIR = '/content/drive/MyDrive/266/Grammatical_Acceptability_Classifier/Final/'
FILEPATH = DIR + 'model6e.weights.03-0.87.hdf5'
SCORE_MODEL.load_weights(FILEPATH)

# Create the MLM_MODEL

In [None]:
MLM_MODEL = TFRobertaForMaskedLM.from_pretrained(CHECKPOINT)

Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFRobertaForMaskedLM: ['roberta.embeddings.position_ids']
- This IS expected if you are initializing TFRobertaForMaskedLM from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFRobertaForMaskedLM from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFRobertaForMaskedLM were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFRobertaForMaskedLM for predictions without further training.


# Create a simple GEC model:

Method:
1. Iteratively mask each word in the sentence
2. Use TFRobertaForMaskedLM with RoBERTa-large to predict the masked word, with k-beam = 3
3. Use trained GAC model to score the predicted sentences (where the masked word is replaced with the predicted word)
4. Keep the predicted sentence only if the score is at least as good as the original score

In [None]:
class SimpleGEC:
  def __init__(self,
               sentence,
               score_model=SCORE_MODEL,
               mlm_model=MLM_MODEL,
               k=3):
    # instantiate class variables
    self.sentence = sentence
    self.sentence_list = self.sentence.split(' ')
    self.k = k
    self.score_model=score_model
    self.mlm_model=mlm_model
    self.scores = {}
    self.scores[self.sentence] = self.score_sentence(self.sentence)

    # iteratively mask each word in the sentence_list & score the sentence
    for i, _ in enumerate(self.sentence_list):
      new_sentences = []
      masked_word = self.sentence_list[i]
      print('masked word: ', masked_word)
      masked_sentence = ' '.join(self.sentence_list[:i] + ['<mask>'] + self.sentence_list[i+1:])
      masked_tokenized = self.tokenize_sentence(masked_sentence)
      masked_candidates = self.get_candidates(masked_tokenized, i)
      print('masked candidates: ', masked_candidates)

      for masked_candidate in masked_candidates:
        new_sentence = masked_sentence.replace('<mask>', masked_candidate.strip())
        new_sentences.append(new_sentence)

      new_scores = self.score_sentence(new_sentences)
      for i, new_score in enumerate(new_scores):
        if new_score >= self.scores[self.sentence]:
          self.scores[new_sentences[i]] = new_score

    print('-'*60)
    print('original_sentence: ', self.sentence)
    print('original_score: ', self.scores[self.sentence])
    print('-'*60)

  def get_candidates(self, tokenized_masked_sentence, i):
      masked_output = self.mlm_model(tokenized_masked_sentence)
      masked_logits = masked_output.logits[0, i+1]
      masked_top_k = tf.math.top_k(masked_logits, self.k).indices.numpy()
      masked_candidates = [TOKENIZER.decode(i) for i in masked_top_k]
      return masked_candidates

  def score_sentence(self, sentence_to_score):
    sentence_tokenized = self.tokenize_sentence(sentence_to_score)
    ypred = self.score_model.predict([sentence_tokenized.input_ids, sentence_tokenized.token_type_ids, sentence_tokenized.attention_mask])
    return ypred

  def tokenize_sentence(self, sentence_to_tokenize):
    return TOKENIZER(sentence_to_tokenize,
                     add_special_tokens=True,
                     max_length=MAX_LEN,
                     padding='max_length',
                     return_token_type_ids=True,
                     truncation=True,
                     return_tensors="tf"
                     )

# Run SimpleGEC on example 1

In [None]:
gec_label1_score = SimpleGEC(LABEL1).scores

dict(sorted(gec_label1_score.items(), key=lambda item: item[1], reverse=True))

masked word:  Of
masked candidates:  ['Of', 'of', 'Off']
masked word:  course,
masked candidates:  [' course', 'course', ' which']
masked word:  that
masked candidates:  [',', ' that', ' this']
masked word:  is
masked candidates:  [' that', ' it', ' this']
masked word:  uninteresting,
masked candidates:  [' is', ' was', "'s"]
masked word:  she
masked candidates:  [' un', ' non', ' Un']
masked word:  continued.
masked candidates:  ['interesting', 'important', ' interesting']
------------------------------------------------------------
original_sentence:  Of course, that is uninteresting, she continued.
original_score:  [0.998242]
------------------------------------------------------------


{'Of course, this is uninteresting, she continued.': array([0.9987665], dtype=float32),
 'Of course, that is uninteresting, she continued.': array([0.998242], dtype=float32)}

In [None]:
gec_raw_trans1_score = SimpleGEC(RAW_TRANS1).scores

dict(sorted(gec_raw_trans1_score.items(), key=lambda item: item[1], reverse=True))

masked word:  of
masked candidates:  ['Of', 'of', 'But']
masked word:  course
masked candidates:  [' course', 'course', ' which']
masked word:  that
masked candidates:  [' it', ' this', ' that']
masked word:  is
masked candidates:  ["'s", ' is', ' was']
masked word:  uninteresting
masked candidates:  [' why', ' what', ' when']
masked word:  she
masked candidates:  ['interesting', 'important', 'interested']
masked word:  continued
masked candidates:  [' she', ' to', ' in']
------------------------------------------------------------
original_sentence:  of course that is uninteresting she continued
original_score:  [0.9922142]
------------------------------------------------------------


{'Of course that is uninteresting she continued': array([0.99853015], dtype=float32),
 'of course that is why she continued': array([0.9966671], dtype=float32),
 'of course it is uninteresting she continued': array([0.9954644], dtype=float32),
 'of course this is uninteresting she continued': array([0.9941053], dtype=float32),
 'of course that is uninteresting she continued': array([0.9922142], dtype=float32)}

In [None]:
gec_clean_trans1_score = SimpleGEC(CLEAN_TRANS1).scores

dict(sorted(gec_clean_trans1_score.items(), key=lambda item: item[1], reverse=True))

masked word:  Of
masked candidates:  ['Of', 'of', 'Off']
masked word:  course,
masked candidates:  [' course', ' which', 'course']
masked word:  that
masked candidates:  [',', ' that', ' this']
masked word:  is
masked candidates:  [' that', ' it', ' That']
masked word:  uninteresting.
masked candidates:  [' is', ' was', "'s"]
masked word:  She
masked candidates:  [' un', ' non', ' Un']
masked word:  continued.
masked candidates:  ['interesting', 'important', 'original']
------------------------------------------------------------
original_sentence:  Of course, that is uninteresting. She continued.
original_score:  [0.9966452]
------------------------------------------------------------


{'Of course, this is uninteresting. She continued.': array([0.9975824], dtype=float32),
 'Of course, that is uninteresting. She continued.': array([0.9966452], dtype=float32)}

The GEC model doesn't correct punctuation and keeps the same punctuation as the input. Perhaps we should consider feeding the sentence without punctuation and after the sentence has been corrected, add in the punctuation (so the punctuation is more sensible)?

# Run SimpleGEC on example 2

In [None]:
gec_label2_score = SimpleGEC(LABEL2).scores

dict(sorted(gec_label2_score.items(), key=lambda item: item[1], reverse=True))

masked word:  He
masked candidates:  ['He', 'David', 'Smith']
masked word:  waded
masked candidates:  [' stepped', ' walked', ' leaned']
masked word:  into
masked candidates:  ['aded', 'ade', 'ading']
masked word:  the
masked candidates:  [' into', ' in', ' toward']
masked word:  edge
masked candidates:  [' the', ' a', ' to']
masked word:  of
masked candidates:  [' edge', ' edges', ' end']
masked word:  the
masked candidates:  [' of', ' the', ' in']
masked word:  water
masked candidates:  [' the', ' a', ' it']
masked word:  and
masked candidates:  [' water', ' river', ' pool']
masked word:  began
masked candidates:  [' and', ' he', ',']
masked word:  scrubbing
masked candidates:  [' began', ' begun', ' started']
masked word:  himself.
masked candidates:  [' scrub', ' dab', ' sob']
------------------------------------------------------------
original_sentence:  He waded into the edge of the water and began scrubbing himself.
original_score:  [0.99968493]
--------------------------------

{'Smith waded into the edge of the water and began scrubbing himself.': array([0.9998092], dtype=float32),
 'David waded into the edge of the water and began scrubbing himself.': array([0.99974537], dtype=float32),
 'He waded into the edge edges the water and began scrubbing himself.': array([0.9997117], dtype=float32),
 'He waded into the edge of the water and began scrubbing himself.': array([0.99968493], dtype=float32)}

In [None]:
gec_raw_trans2_score = SimpleGEC(RAW_TRANS2).scores

dict(sorted(gec_raw_trans2_score.items(), key=lambda item: item[1], reverse=True))

masked word:  he
masked candidates:  ['He', 'Man', 'he']
masked word:  wated
masked candidates:  [' stood', ' sat', ' stopped']
masked word:  in
masked candidates:  ['ated', 'aded', 'ate']
masked word:  the
masked candidates:  [' in', ' to', ' into']
masked word:  edge
masked candidates:  [' the', ' The', 'the']
masked word:  of
masked candidates:  [' edge', ' edges', ' center']
masked word:  the
masked candidates:  [' of', ' the', ' to']
masked word:  water
masked candidates:  [' the', ' a', ' The']
masked word:  and
masked candidates:  [' water', ' river', ' pool']
masked word:  began
masked candidates:  [' and', ' he', ' then']
masked word:  scrubbing
masked candidates:  [' began', ' begin', ' begins']
masked word:  himself
masked candidates:  [' scrub', ' dab', ' scra']
------------------------------------------------------------
original_sentence:  he wated in the edge of the water and began scrubbing himself
original_score:  [0.9719724]
-------------------------------------------

{'He wated in the edge of the water and began scrubbing himself': array([0.9984768], dtype=float32),
 'he sat in the edge of the water and began scrubbing himself': array([0.99806136], dtype=float32),
 'he stood in the edge of the water and began scrubbing himself': array([0.9966603], dtype=float32),
 'he stopped in the edge of the water and began scrubbing himself': array([0.9963963], dtype=float32),
 'Man wated in the edge of the water and began scrubbing himself': array([0.9947978], dtype=float32),
 'he wated in the edge of the water and began scrubbing himself': array([0.9719724], dtype=float32)}

In [None]:
gec_clean_trans2_score = SimpleGEC(CLEAN_TRANS2).scores

dict(sorted(gec_clean_trans2_score.items(), key=lambda item: item[1], reverse=True))

masked word:  He
masked candidates:  ['He', 'he', 'I']
masked word:  wated
masked candidates:  [' stood', ' sat', ' stopped']
masked word:  in
masked candidates:  ['aded', 'ated', 'ade']
masked word:  the
masked candidates:  [' in', ' to', ' into']
masked word:  edge
masked candidates:  [' the', ' in', ' to']
masked word:  of
masked candidates:  [' edge', ' edges', ' end']
masked word:  the
masked candidates:  [' of', ' the', ' to']
masked word:  water
masked candidates:  [' the', ' a', ' his']
masked word:  and
masked candidates:  [' water', ' river', ' pool']
masked word:  began
masked candidates:  [' and', ',', ' he']
masked word:  scrubbing
masked candidates:  [' began', ' begun', ' beginning']
masked word:  himself.
masked candidates:  [' scrub', ' dab', ' scra']
------------------------------------------------------------
original_sentence:  He wated in the edge of the water and began scrubbing himself.
original_score:  [0.9941025]
------------------------------------------------

{'He stopped in the edge of the water and began scrubbing himself.': array([0.99915284], dtype=float32),
 'He sat in the edge of the water and began scrubbing himself.': array([0.9991304], dtype=float32),
 'He stood in the edge of the water and began scrubbing himself.': array([0.9989081], dtype=float32),
 'He wated in the edge of the water and began scrubbing scra': array([0.9979078], dtype=float32),
 'He wated in the edge of the water and began scrubbing scrub': array([0.996097], dtype=float32),
 'He wated in the edge of the water and began scrubbing himself.': array([0.9941025], dtype=float32)}

In this example, 'waded' was misspelled to 'wated' but it was not corrected in any of the candidates. The original transcription with 'wated' actually scored high from the GAC model so this might be one of those instances where we just 'let it be'.

# Run SimpleGEC on example 3

In [None]:
gec_label3_score = SimpleGEC(LABEL3).scores

dict(sorted(gec_label3_score.items(), key=lambda item: item[1], reverse=True))

masked word:  To
masked candidates:  ['To', 'In', 'At']
masked word:  my
masked candidates:  [' my', ' his', ' our']
masked word:  surprise
masked candidates:  [' surprise', ' relief', ' delight']
masked word:  he
masked candidates:  [' she', ' he', ' they']
masked word:  began
masked candidates:  [' began', ' started', ' seemed']
masked word:  to
masked candidates:  [' to', ' the', ' and']
masked word:  show
masked candidates:  [' show', ' express', ' display']
masked word:  actual
masked candidates:  [' some', ' more', ' great']
masked word:  enthusiasm
masked candidates:  [' interest', ' affection', ' feelings']
masked word:  in
masked candidates:  [' in', ' for', ' towards']
masked word:  my
masked candidates:  [' my', ' our', ' his']
masked word:  favor.
masked candidates:  [' work', ' presence', ' company']
------------------------------------------------------------
original_sentence:  To my surprise he began to show actual enthusiasm in my favor.
original_score:  [0.9977789]
--

{'To my surprise he began to express actual enthusiasm in my favor.': array([0.9990672], dtype=float32),
 'To my surprise he began to show great enthusiasm in my favor.': array([0.99896246], dtype=float32),
 'To my surprise he began to show some enthusiasm in my favor.': array([0.99888587], dtype=float32),
 'To my surprise he began to display actual enthusiasm in my favor.': array([0.9984319], dtype=float32),
 'To my surprise he began to show actual enthusiasm in our favor.': array([0.99842596], dtype=float32),
 'To my surprise he began to show more enthusiasm in my favor.': array([0.9983378], dtype=float32),
 'To my surprise he began to show actual enthusiasm in my favor.': array([0.9977789], dtype=float32)}

In [None]:
gec_raw_trans3_score = SimpleGEC(RAW_TRANS3).scores

dict(sorted(gec_raw_trans3_score.items(), key=lambda item: item[1], reverse=True))

masked word:  nunte
masked candidates:  ['To', 'In', 'TO']
masked word:  my
masked candidates:  ['st', ' to', ' my']
masked word:  surprise
masked candidates:  [' my', ' second', ' the']
masked word:  he
masked candidates:  [' surprise', ' surprised', ' shock']
masked word:  began
masked candidates:  [' he', ',', ' I']
masked word:  to
masked candidates:  [' began', ' begun', ' beginning']
masked word:  show
masked candidates:  [' to', ' t', ' conve']
masked word:  actual
masked candidates:  [' show', ' Show', ' SHOW']
masked word:  enthusiasm
masked candidates:  [' actual', ' actually', ' real']
masked word:  in
masked candidates:  [' enthusiasm', ' enthusiastic', ' excitement']
masked word:  my
masked candidates:  [' in', ' In', 'in']
masked word:  favor
masked candidates:  [' my', ' the', ' his']
------------------------------------------------------------
original_sentence:  nunte my surprise he began to show actual enthusiasm in my favor
original_score:  [[0.4449153]]
------------

{'To my surprise he began to show actual enthusiasm in my favor': array([0.99719167], dtype=float32),
 'TO my surprise he began to show actual enthusiasm in my favor': array([0.98455596], dtype=float32),
 'In my surprise he began to show actual enthusiasm in my favor': array([0.97843367], dtype=float32),
 'nunte my surprise he began to show actual enthusiasm enthusiastic my favor': array([0.47926992], dtype=float32),
 'nunte my surprise he began to show actual enthusiasm in my favor': array([[0.4449153]], dtype=float32)}

In [None]:
gec_clean_trans3_score = SimpleGEC(CLEAN_TRANS3).scores

dict(sorted(gec_clean_trans3_score.items(), key=lambda item: item[1], reverse=True))

masked word:  Nunte,
masked candidates:  ['To', 'In', 'Imagine']
masked word:  my
masked candidates:  ['ay', 'ationally', 'aturally']
masked word:  surprise,
masked candidates:  [',', ' of', '!,']
masked word:  he
masked candidates:  [' my', ' to', ' our']
masked word:  began
masked candidates:  [' surprise', ' shock', ' disappointment']
masked word:  to
masked candidates:  [',', '!,', ';']
masked word:  show
masked candidates:  [' he', ' I', ' she']
masked word:  actual
masked candidates:  [' began', ' begun', ' started']
masked word:  enthusiasm
masked candidates:  [' to', ' a', ' t']
masked word:  in
masked candidates:  [' show', ' Show', ' SHOW']
masked word:  my
masked candidates:  [' actual', ' actually', ' some']
masked word:  favor.
masked candidates:  [' enthusiasm', ' interest', ' excitement']
------------------------------------------------------------
original_sentence:  Nunte, my surprise, he began to show actual enthusiasm in my favor.
original_score:  [[0.19494519]]
----

{'To my surprise, he began to show actual enthusiasm in my favor.': array([0.9932491], dtype=float32),
 'Imagine my surprise, he began to show actual enthusiasm in my favor.': array([0.980801], dtype=float32),
 'In my surprise, he began to show actual enthusiasm in my favor.': array([0.9395085], dtype=float32),
 'Nunte, my !, he began to show actual enthusiasm in my favor.': array([0.39657044], dtype=float32),
 'Nunte, ay surprise, he began to show actual enthusiasm in my favor.': array([0.3478072], dtype=float32),
 'Nunte, my surprise, he began to show actual enthusiasm in my interest': array([0.25532335], dtype=float32),
 'Nunte, aturally surprise, he began to show actual enthusiasm in my favor.': array([0.21383084], dtype=float32),
 'Nunte, my surprise, he began to show actual enthusiasm in my favor.': array([[0.19494519]], dtype=float32)}

In this example, the GEC model actually corrected the incorrect sentence (both the misspelled word and the punctuation)