# Error Suppression

## Part 0: Encoding and processing a file, helper methods, etc 

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
def encode_file(filename): 
  sentences = []
  annotations = {} 

  idx = -1 
  with open(filename, encoding="latin-1") as f: 
    for line in f.readlines(): 
      if line[0] == 'S':
        """
        changes for removing '\n' and indexing the sentence from the second 
        element
        """
        sentences.append(line.rstrip()[2:]) 
        idx += 1
      elif line[0] == 'A':
        if idx in annotations: 
          annotations[idx].append(line) 
        else: 
          annotations[idx] = [line] 
      else: 
        continue 

  return sentences, annotations     

In [None]:
def encode_errors_only(annotations): 
  errors = {} 
  error_count = 0
  noop_count = 0 
  for idx in annotations.keys(): 
    errors[idx] = [] 
    for annot in annotations[idx]: 
      error = annot.split("|||")[1]
      if error != "noop": 
        error_count += 1
        errors[idx].append(error)
      else:
        noop_count += 1

  return errors, error_count, noop_count

In [None]:
def get_error_dist(errors, noop_count): 
  error_dist = {} 
  total_count = noop_count
  for idx in errors.keys(): 
    for error in errors[idx]: 
      total_count += 1
      if error not in error_dist: 
        error_dist[error] = 1
      else: 
        error_dist[error] += 1 
  
  for error in error_dist.keys(): 
    error_dist[error] = error_dist[error] * 1.0 / total_count 

  return error_dist 

In [None]:
def get_error_dist_count(errors, noop_count): 
  error_dist = {} 
  total_count = noop_count
  for idx in errors.keys(): 
    for error in errors[idx]: 
      total_count += 1
      if error not in error_dist: 
        error_dist[error] = 1
      else: 
        error_dist[error] += 1 

  return error_dist 

In [None]:
import json

In [None]:
# Opening JSON file
with open('/content/drive/MyDrive/CS 546 Final Project /CoNLL14 test data evaluation/lang_tags.json') as json_file:
    all_lang_data = json.load(json_file)

In [None]:
all_lang_data.keys()

dict_keys(['ca', 'ko', 'ru', 'pl', 'pt', 'th', 'es', 'zh', 'it', 'tr', 'fr', 'ja', 'de', 'el', 'sv', 'nl'])

In [None]:
all_lang_data['ru']

{'M:ADJ': 2,
 'R:PART': 3,
 'U:NOUN:POSS': 3,
 'R:NOUN:INFL': 3,
 'U:ADJ': 3,
 'M:NOUN:POSS': 4,
 'M:PART': 4,
 'U:PART': 5,
 'U:VERB:FORM': 5,
 'U:CONJ': 5,
 'R:VERB:INFL': 6,
 'R:CONTR': 6,
 'M:ADV': 7,
 'R:CONJ': 8,
 'U:CONTR': 9,
 'R:NOUN:POSS': 9,
 'U:PRON': 10,
 'M:VERB:FORM': 11,
 'U:VERB': 12,
 'M:NOUN': 13,
 'U:OTHER': 13,
 'U:NOUN': 15,
 'M:CONJ': 15,
 'U:ADV': 16,
 'M:PRON': 28,
 'U:VERB:TENSE': 29,
 'R:VERB:SVA': 30,
 'M:VERB': 39,
 'R:ADV': 45,
 'M:VERB:TENSE': 45,
 'M:OTHER': 45,
 'R:ADJ': 53,
 'M:PREP': 53,
 'R:PRON': 58,
 'R:MORPH': 60,
 'U:PREP': 66,
 'R:PUNCT': 71,
 'R:WO': 73,
 'U:DET': 74,
 'R:ORTH': 77,
 'R:NOUN:NUM': 79,
 'R:VERB:FORM': 82,
 'U:PUNCT': 87,
 'UNK': 91,
 'R:DET': 98,
 'R:NOUN': 101,
 'R:VERB:TENSE': 107,
 'M:PUNCT': 126,
 'R:PREP': 174,
 'R:VERB': 183,
 'R:OTHER': 278,
 'M:DET': 297,
 'R:SPELL': 477,
 'noop': 715}

In [None]:
lang_probs = {}
for x in all_lang_data.keys():
  lang_probs[x] = {}
  count = 0
  for y in all_lang_data[x].keys():
    count = count + all_lang_data[x][y]
  for y in all_lang_data[x].keys():
    lang_probs[x][y] = all_lang_data[x][y] / count

In [None]:
goal_error_dist = lang_probs['ru']

In [None]:
goal_error_dist

{'M:ADJ': 0.0005091649694501018,
 'R:PART': 0.0007637474541751527,
 'U:NOUN:POSS': 0.0007637474541751527,
 'R:NOUN:INFL': 0.0007637474541751527,
 'U:ADJ': 0.0007637474541751527,
 'M:NOUN:POSS': 0.0010183299389002036,
 'M:PART': 0.0010183299389002036,
 'U:PART': 0.0012729124236252546,
 'U:VERB:FORM': 0.0012729124236252546,
 'U:CONJ': 0.0012729124236252546,
 'R:VERB:INFL': 0.0015274949083503055,
 'R:CONTR': 0.0015274949083503055,
 'M:ADV': 0.0017820773930753565,
 'R:CONJ': 0.002036659877800407,
 'U:CONTR': 0.002291242362525458,
 'R:NOUN:POSS': 0.002291242362525458,
 'U:PRON': 0.0025458248472505093,
 'M:VERB:FORM': 0.00280040733197556,
 'U:VERB': 0.003054989816700611,
 'M:NOUN': 0.003309572301425662,
 'U:OTHER': 0.003309572301425662,
 'U:NOUN': 0.0038187372708757637,
 'M:CONJ': 0.0038187372708757637,
 'U:ADV': 0.004073319755600814,
 'M:PRON': 0.007128309572301426,
 'U:VERB:TENSE': 0.007382892057026477,
 'R:VERB:SVA': 0.007637474541751527,
 'M:VERB': 0.009928716904276986,
 'R:ADV': 0.01145

In [None]:
filepath = "/content/drive/MyDrive/CS 546 Final Project /CoNLL14 test data evaluation/train_gec.m2"

## Part 1: Identify what errors need to be fixed

In [None]:
# Using the same m2 file as if that's the one I need to supress errors in. proof of concept basically 
sentences, annotations = encode_file(filepath)
errors_by_sentence, error_count, noop_count= encode_errors_only(annotations)
error_dist_count = get_error_dist_count(errors_by_sentence, noop_count)

In [None]:
import math

In [None]:
del goal_error_dist['noop']

In [None]:
print(error_count)

62958


In [None]:
goal_error_dist_count = {} 
for error in goal_error_dist.keys(): 
  goal_error_dist_count[error] = math.ceil(goal_error_dist[error] * (error_count + noop_count))

In [None]:
goal_error_dist_count

{'M:ADJ': 38,
 'R:PART': 57,
 'U:NOUN:POSS': 57,
 'R:NOUN:INFL': 57,
 'U:ADJ': 57,
 'M:NOUN:POSS': 76,
 'M:PART': 76,
 'U:PART': 95,
 'U:VERB:FORM': 95,
 'U:CONJ': 95,
 'R:VERB:INFL': 114,
 'R:CONTR': 114,
 'M:ADV': 133,
 'R:CONJ': 152,
 'U:CONTR': 171,
 'R:NOUN:POSS': 171,
 'U:PRON': 190,
 'M:VERB:FORM': 209,
 'U:VERB': 228,
 'M:NOUN': 247,
 'U:OTHER': 247,
 'U:NOUN': 285,
 'M:CONJ': 285,
 'U:ADV': 304,
 'M:PRON': 532,
 'U:VERB:TENSE': 551,
 'R:VERB:SVA': 570,
 'M:VERB': 740,
 'R:ADV': 854,
 'M:VERB:TENSE': 854,
 'M:OTHER': 854,
 'R:ADJ': 1006,
 'M:PREP': 1006,
 'R:PRON': 1101,
 'R:MORPH': 1139,
 'U:PREP': 1253,
 'R:PUNCT': 1347,
 'R:WO': 1385,
 'U:DET': 1404,
 'R:ORTH': 1461,
 'R:NOUN:NUM': 1499,
 'R:VERB:FORM': 1556,
 'U:PUNCT': 1651,
 'UNK': 1727,
 'R:DET': 1860,
 'R:NOUN': 1917,
 'R:VERB:TENSE': 2030,
 'M:PUNCT': 2391,
 'R:PREP': 3302,
 'R:VERB': 3472,
 'R:OTHER': 5275,
 'M:DET': 5635,
 'R:SPELL': 9050}

In [None]:
new_dict = {}
for k,v in errors_by_sentence.items():
    for x in v:
        new_dict.setdefault(x,[]).append(k)

In [None]:
from random import sample

In [None]:
def errors_to_supress(goal_error_dist_count, error_dist_count, new_dict):
  new_dict_1 = {}
  for x in error_dist_count.keys():
    new_dict_1[x[1:-1]] = error_dist_count[x]

  new_dict_2 = {}
  for x in new_dict.keys():
    new_dict_2[x[1:-1]] = new_dict[x]


  error_dict = {}
  for x in goal_error_dist_count.keys():
    #print(x)
    #print(new_dict.keys())
    if x in new_dict_1.keys():
      count_goal = goal_error_dist_count[x]
      count_current = new_dict_1[x]
      #print(count_goal, count_current)
      if count_current > count_goal:
        error_dict[x] = count_current - count_goal
  print(error_dict)

  print(new_dict)
  error_dict_idx = {}
  for x in error_dict:
    error_dict_idx[x] = sample(new_dict_2[x],error_dict[x])
  print(error_dict_idx)


  idx_error = {}
  for k,v in error_dict_idx.items():
    for x in v:
      idx_error.setdefault(x,[]).append(k)
  print(idx_error)

  print(errors_by_sentence)
  for z in errors_by_sentence.keys():
    list_errors = errors_by_sentence[z]
    #print(list_errors)
    new_list_err = []
    for g in list_errors:
      new_list_err.append(g[1:-1])
    errors_by_sentence[z] = new_list_err
    #print(errors_by_sentence[z])

  idx_error_ann_idx = {}
  for x in idx_error.keys():
    idx_error_ann_idx[x] = []
    err_cat_count = {}
    for y in idx_error[x]:
      if y in err_cat_count.keys():
        err_cat_count[y] += 1
      else:
        err_cat_count[y] = 1
    #print(err_cat_count)
    for z in err_cat_count.keys():
      indices = [i for i, j in enumerate(errors_by_sentence[x]) if j == z]
      idx_error_ann_idx[x].extend(indices[:err_cat_count[z]])
  print(idx_error_ann_idx)

  for x in idx_error_ann_idx.keys():
    idx_error_ann_idx[x].sort()
  print(idx_error_ann_idx)
  return idx_error_ann_idx

In [None]:
idx_error_ann_idx = errors_to_supress(goal_error_dist_count, error_dist_count, new_dict)

{'M:ADJ': 120, 'R:PART': 336, 'U:NOUN:POSS': 12, 'U:ADJ': 50, 'M:NOUN:POSS': 115, 'M:PART': 21, 'R:CONTR': 48, 'M:ADV': 227, 'U:PRON': 59, 'U:VERB': 85, 'M:NOUN': 210, 'U:OTHER': 546, 'U:NOUN': 108, 'U:ADV': 74, 'M:PRON': 215, 'R:VERB:SVA': 797, 'M:OTHER': 434, 'M:PREP': 267, 'R:MORPH': 420, 'R:PUNCT': 917, 'U:DET': 1061, 'R:ORTH': 1327, 'R:NOUN:NUM': 1004, 'R:VERB:FORM': 357, 'R:NOUN': 455, 'R:VERB:TENSE': 994, 'M:PUNCT': 5495, 'R:PREP': 867}
{' M:PUNCT ': [0, 1, 7, 10, 16, 19, 19, 20, 20, 20, 21, 51, 65, 65, 71, 73, 85, 88, 88, 89, 96, 97, 97, 98, 104, 113, 113, 114, 114, 115, 116, 117, 118, 119, 120, 120, 127, 130, 130, 130, 130, 130, 130, 136, 136, 139, 139, 143, 148, 149, 150, 157, 162, 173, 173, 176, 178, 195, 198, 203, 213, 219, 219, 219, 219, 219, 219, 220, 220, 220, 221, 226, 237, 259, 260, 262, 262, 262, 270, 271, 274, 275, 280, 290, 291, 292, 297, 299, 300, 311, 319, 320, 320, 320, 320, 321, 325, 325, 326, 331, 331, 333, 340, 345, 358, 358, 360, 360, 362, 363, 363, 378, 379,

In [None]:
idx_error_ann_idx

{8926: [0, 2, 3],
 27287: [1],
 10132: [1, 8],
 8690: [1],
 984: [0, 1, 2],
 5713: [2],
 2858: [0, 6, 10, 12, 14, 19, 20, 22, 28],
 3227: [0, 1, 2, 3, 5],
 654: [0, 1],
 16425: [0, 3, 5],
 11145: [1],
 10580: [0, 1, 2],
 4647: [1, 4, 5, 9, 10, 11, 16],
 8666: [0, 1, 2, 4],
 3233: [1, 2],
 6619: [1],
 4648: [0, 1, 3, 4],
 8958: [1, 2],
 2012: [5],
 2210: [4, 5, 9],
 33714: [0],
 15811: [1, 4, 11],
 2508: [0, 2],
 4732: [0, 1, 2],
 31049: [0, 1, 2, 3, 5],
 13443: [0, 1, 3],
 4974: [1, 2, 4],
 6846: [1, 4],
 19955: [0],
 6621: [0],
 33931: [3, 4, 5, 6],
 4054: [0, 1, 3, 6],
 3109: [1],
 29661: [0],
 6338: [1],
 9325: [5],
 2522: [1],
 21099: [0],
 16183: [2],
 15902: [0],
 30519: [2, 3, 4],
 7411: [1],
 25229: [0],
 5538: [1, 3, 5, 7, 8, 10, 20],
 31224: [1],
 3808: [0],
 11020: [0, 1],
 614: [0, 2, 4, 5, 8],
 22522: [1],
 33282: [0, 1],
 9904: [2],
 3966: [0, 1, 3],
 12717: [3, 4, 8],
 6822: [2],
 30808: [0],
 20022: [1],
 13403: [0, 2, 3],
 23657: [0, 1],
 26202: [3],
 9552: [0],
 22135

In [None]:
errors_by_sentence[18649]

['M:DET']

In [None]:
annotations[3]

['A 8 9 ||| R:SPELL ||| center ||| REQUIRED ||| -NONE- ||| 0\n']

In [None]:
def get_errors_to_suppress(goal_error_dist_count, errors_by_sentence): 
  errors_to_suppress = {} 
  for sent_idx in errors_by_sentence.keys(): 
    annot_idx = 0
    errors_to_suppress[sent_idx] = []
    missing_suppressed = False # has at least one MISSING error been suppressed? 
    for error in errors_by_sentence[sent_idx]:
      goal_error_dist_count[error] -= 1 
      # TODO: check if this logic is okay  
      if goal_error_dist_count[error] <= 0: 
        if error[0] == 'M': 
          if not missing_suppressed: 
            errors_to_suppress[sent_idx].append(annot_idx)
            missing_suppressed = True 
        else: 
          errors_to_suppress[sent_idx].append(annot_idx)


      annot_idx += 1
    
  return errors_to_suppress 

In [None]:
errors_to_suppress = get_errors_to_suppress(goal_error_dist_count, errors_by_sentence)

print(errors_to_suppress)

KeyError: ignored

## Part 2: Fix the errors in the file 

Types of errors:
- R: Replace
- M: Missing
- U: Unnecessary
- noop 

In [None]:
def supresser_method(sentence_dict):

    # idx_change is used to adjust the indices of the edits
    idx_change = 0
    # sentence_list is the source/S sentence in a list format
    sentence_list = sentence_dict['S'].split(' ')

    # iterating over the annotations
    for x in sentence_dict['A']:
        # getting the type edit
        type_edit = x[1].split(':')[0]

        # getting the start index from the annotation line and adjusting for subsequent changes
        str_idx = int(x[0].split(' ')[1])
        str_idx = str_idx + idx_change

        # getting the end index from the annotation line and adjusting for subsequent changes
        end_idx = int(x[0].split(' ')[2])
        end_idx = end_idx + idx_change

        # calculating the count of elements to be inserted
        if len(x[2]) > 0:
            insert_count = int(len(x[2].split(' ')))
            insert_str_list = x[2].split(' ')
        else:
            insert_count = 0

        # code for deleting elements and updating the idx change
        if str_idx == end_idx:
            pass
        else:
            del sentence_list[str_idx:end_idx]
            idx_change = idx_change - (end_idx - str_idx)

        # code for inserting elements and updating the idx change
        if insert_count == 0:
            pass
        else:
            sentence_list[str_idx:str_idx] = insert_str_list
            idx_change = idx_change + len(insert_str_list)

    return sentence_list

In [None]:
sentences[3]

'I recommend visiting the artificial lake in the certer of the city which is surrounded by a park .'

In [None]:
annotations[3]

['A 8 9 ||| R:SPELL ||| center ||| REQUIRED ||| -NONE- ||| 0\n']

In [None]:
sentence_dict = {}
sentence_dict['S'] = sentences[3]#'Firstly , I would like to tell you , that the group has been booked into Palace Hotel , which locates beside the congress house in the centre .'
sentence_dict['A'] = []
selected_annotations = []
##
for x in annotations[3]:  
  sentence_dict['A'].append(x.split('|||'))

output_list = supresser_method(sentence_dict)
print(*output_list)

I recommend visiting the artificial lake in the  center  of the city which is surrounded by a park .


In [None]:
idx = 3427
annoations_to_suppress = []
for x in idx_error_ann_idx[idx]:
  print(annotations[idx][x])
  annoations_to_suppress.append(annotations[idx][x].split('|||'))
print(annoations_to_suppress)

KeyError: ignored

In [None]:
annoations_to_suppress

[]

In [None]:
annotations[idx]

['A 4 5 ||| U:NOUN:POSS |||  ||| REQUIRED ||| -NONE- ||| 0\n']

In [None]:
sentences[idx]

"And the flying saucers ' culture has dominated . They will be green in color ."

In [None]:
sentence_dict_1 = {}
sentence_dict_1['S'] = sentences[idx]
sentence_dict_1['A'] = annoations_to_suppress
output_list = supresser_method(sentence_dict_1)
print(*output_list)

And the flying saucers ' culture has dominated . They will be green in color .


In [None]:
# write a src file
# iterate over sentences file, if index in idx_error_ann_idx, run the supression
# logic if not then just print the sentence

In [None]:
ordered_idx_error_ann_idx = {k : [k] for k in sorted(idx_error_ann_idx)}

In [None]:
with open("Output_ru.txt", "w") as text_file:
  for r in range(0, len(sentences)):
    if r in ordered_idx_error_ann_idx.keys():
      #supress logic
      idx = r
      annoations_to_suppress = []
      for x in idx_error_ann_idx[idx]:
        annoations_to_suppress.append(annotations[idx][x].split('|||'))
      sentence_dict_1 = {}
      sentence_dict_1['S'] = sentences[idx]
      sentence_dict_1['A'] = annoations_to_suppress
      output_list = supresser_method(sentence_dict_1)
      str_a = ' '.join(output_list)
      text_file.writelines(str_a + '\n')
      pass
    else:
      #print(sentences[r])
      text_file.writelines(sentences[r] + '\n')

In [None]:
with open("Source.txt", "w") as text_file:
  for s in sentences:
    text_file.writelines(s + '\n')

In [None]:
a = ['This', 'job', 'does', "n't", 'demand', 'much', 'ability', ',', 'but', 'a', 'lot', 'of', 'patience', 'though', '.']
str_a = ' '.join(a)
print(str_a)

This job does n't demand much ability , but a lot of patience though .


In [None]:
'''
# return start_index, end_index, error_type, word(s)
def parse_annotation(annot): 
  a_split = annot.split("|||")

  # get the error type: 
  err_type = a_split[1]

  # get indices
  index_split = a_split[0].split(" ")
  start_index = int(index_split[1])
  end_index = int(index_split[2])

  # get words to replace/remove 
  words = a_split[2].split(" ")

  return start_index, end_index, err_type, words
'''   

'\n# return start_index, end_index, error_type, word(s)\ndef parse_annotation(annot): \n  a_split = annot.split("|||")\n\n  # get the error type: \n  err_type = a_split[1]\n\n  # get indices\n  index_split = a_split[0].split(" ")\n  start_index = int(index_split[1])\n  end_index = int(index_split[2])\n\n  # get words to replace/remove \n  words = a_split[2].split(" ")\n\n  return start_index, end_index, err_type, words\n'

TODO: I don't think this works if the replacement phrase is > than the part of the sentence. also this method doesn't fully work. 

In [None]:
'''
def replace(sent_split, start_index, end_index, words):
  try: 
    for i in range(end_index - start_index):
      if i >= len(words): 
        sent_split[start_index + i + 1] = "" 
      elif start_index + i + 1 >= len(sent_split): 
        sent_split.append(words[i])
      else: 
        sent_split[start_index + i + 1] = words[i]
  except: 
    print("replace failed") 
'''

'\ndef replace(sent_split, start_index, end_index, words):\n  try: \n    for i in range(end_index - start_index):\n      if i >= len(words): \n        sent_split[start_index + i + 1] = "" \n      elif start_index + i + 1 >= len(sent_split): \n        sent_split.append(words[i])\n      else: \n        sent_split[start_index + i + 1] = words[i]\n  except: \n    print("replace failed") \n'

In [None]:
'''
def missing(sent_split, start_index, end_index, words): 
  for i in range(end_index - start_index): 
    sent_split.insert(start_index + i + 1, words[i]) 
'''

'\ndef missing(sent_split, start_index, end_index, words): \n  for i in range(end_index - start_index): \n    sent_split.insert(start_index + i + 1, words[i]) \n'

In [None]:
'''
def unnecessary(sent_split, start_index, end_index, words): 
  for i in range(end_index - start_index): 
    # substitute with empty string to prevent indices from changing 
    if start_index + i + 1 < len(sent_split): 
      sent_split[start_index + i + 1] = "" 
'''

'\ndef unnecessary(sent_split, start_index, end_index, words): \n  for i in range(end_index - start_index): \n    # substitute with empty string to prevent indices from changing \n    if start_index + i + 1 < len(sent_split): \n      sent_split[start_index + i + 1] = "" \n'

In [None]:
'''
def correct_error(sent, annot): 
  start_index, end_index, error_type, words = parse_annotation(annot) 
  sent_split = sent.split(" ")

  # shouldn't be needed but just in case, early termination condition 
  if error_type == "noop": 
    return sent 

  # get R, M, U error type
  err_subtype = error_type.split(":")[0]

  if err_subtype == "R": 
    replace(sent_split, start_index, end_index, words)

  # TODO: fix missing so that it is compatible with further edits 
  elif err_subtype == "M": 
    missing(sent_split, start_index, end_index, words)

  elif err_subtype == "U": 
    unnecessary(sent_split, start_index, end_index, words)

  correct_sent = ' '.join(sent_split) 
  correct_sent = correct_sent.replace("  ", " ")
  return correct_sent 
'''

'\ndef correct_error(sent, annot): \n  start_index, end_index, error_type, words = parse_annotation(annot) \n  sent_split = sent.split(" ")\n\n  # shouldn\'t be needed but just in case, early termination condition \n  if error_type == "noop": \n    return sent \n\n  # get R, M, U error type\n  err_subtype = error_type.split(":")[0]\n\n  if err_subtype == "R": \n    replace(sent_split, start_index, end_index, words)\n\n  # TODO: fix missing so that it is compatible with further edits \n  elif err_subtype == "M": \n    missing(sent_split, start_index, end_index, words)\n\n  elif err_subtype == "U": \n    unnecessary(sent_split, start_index, end_index, words)\n\n  correct_sent = \' \'.join(sent_split) \n  correct_sent = correct_sent.replace("  ", " ")\n  return correct_sent \n'

In [None]:
'''
sent = 'S I am writing in order to express my disappointment about your musical show " Over the Rainbow " .'
annot = 'A 9 10|||R:PREP|||with|||REQUIRED|||-NONE-|||0|||ca'
print(correct_error(sent, annot))
'''

'\nsent = \'S I am writing in order to express my disappointment about your musical show " Over the Rainbow " .\'\nannot = \'A 9 10|||R:PREP|||with|||REQUIRED|||-NONE-|||0|||ca\'\nprint(correct_error(sent, annot))\n'

In [None]:
'''
sent = 'S I am writing in order to express my disappointment about your musical show " Over the Rainbow " .'
annot = 'A 9 10|||R:PREP|||with|||REQUIRED|||-NONE-|||0|||ca'
print(correct_error(sent, annot))
'''

'\nsent = \'S I am writing in order to express my disappointment about your musical show " Over the Rainbow " .\'\nannot = \'A 9 10|||R:PREP|||with|||REQUIRED|||-NONE-|||0|||ca\'\nprint(correct_error(sent, annot))\n'

In [None]:
'''
sent = 'S Maybe you should make have next year ?'
annot = 'A 8 9|||R:PUNCT|||.|||REQUIRED|||-NONE-|||0|||ru'
print(correct_error(sent, annot))
'''

"\nsent = 'S Maybe you should make have next year ?'\nannot = 'A 8 9|||R:PUNCT|||.|||REQUIRED|||-NONE-|||0|||ru'\nprint(correct_error(sent, annot))\n"

In [None]:
for sent_idx in range(len(sentences)): 

  try: 
    sentence_dict = {}
    sentence_dict['S'] = sentences[sent_idx]
    sentence_dict['A'] = [] 
    for annot_idx in errors_to_suppress[sent_idx]: 
      sentence_dict['A'].append(annotations[sent_idx][annot_idx])
      
    corrected_sentence = supresser_method(sentence_dict)
    #print(corrected_sentence)
    sentences[sent_idx] = " ".join(corrected_sentence)
  except: 
    print("failed")
    continue 

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
failed
['S', 'Classrooms', ',', 'Lessons', 'and', 'Breaks', '.\n']
failed
['S', 'Obviously', 'it', 'will', 'be', 'interesting', 'to', 'find', 'out', 'what', 'methods', 'are', 'used', 'by', 'our', 'teachers', 'to', 'help', 'pupils', 'understand', 'their', 'subject', 'better', '.\n']
failed
failed
failed
['S', 'Outdoor', 'Activities\n']
failed
failed
['S', 'Conclusion\n']
failed
['S', 'I', 'hope', 'you', 'will', 'consider', 'this', 'information', '.\n']
['S', 'Dear', 'Mr', 'Robertson', ',\n']
failed
failed
['S', 'However', ',', 'there', 'is', 'one', 'point', 'that', 'we', 'would', 'like', 'to', 'change', 'in', 'the', 'programme', '.\n']
failed
failed
['S', 'Furthermore', 'is', 'totally', 'free', 'for', 'students', '.\n']
failed
failed
failed
['S', 'It', 'is', 'also', 'possible', 'to', 'go', 'to', 'the', 'show', 'in', 'the', 'morning', 'but', 'we', 'are', 'totally', 'agree', 'that', 'we', 'do', "n't", 'want', 'to', 'lose', '

## Part 3: Creating a new file with errors suppressed

In [None]:
with open("/content/new_file.m2", "w+") as f: 
  for sent_idx in range(len(sentences)): 
    f.write(sentences[sent_idx])
    annot_idx = 0
    for annotation in annotations[sent_idx]: 
      if annot_idx not in errors_to_suppress[sent_idx]: 
        f.write(annotation)
      annot_idx += 1

    f.write("\n")