In [23]:
import pandas as pd
import re

# metrics 
def edit_distance_similarity(str1, str2):
    m, n = len(str1), len(str2)
    dp = [[0] * (n + 1) for _ in range(m + 1)]

    for i in range(m + 1):
        for j in range(n + 1):
            if i == 0:
                dp[i][j] = j  # If str1 is empty, insert all characters of str2
            elif j == 0:
                dp[i][j] = i  # If str2 is empty, remove all characters of str1
            elif str1[i - 1] == str2[j - 1]:
                dp[i][j] = dp[i - 1][j - 1]  # If last characters are the same, ignore them
            else:
                dp[i][j] = 1 + min(dp[i][j - 1],  # Insert
                                   dp[i - 1][j],  # Remove
                                   dp[i - 1][j - 1])  # Replace

    edit_dist = dp[m][n]
    max_len = max(m, n)
    similarity = 1 - (edit_dist / max_len)

    return similarity

def edit_distance(str1, str2):
    m, n = len(str1), len(str2)
    dp = [[0] * (n + 1) for _ in range(m + 1)]

    for i in range(m + 1):
        for j in range(n + 1):
            if i == 0:
                dp[i][j] = j  # If str1 is empty, insert all characters of str2
            elif j == 0:
                dp[i][j] = i  # If str2 is empty, remove all characters of str1
            elif str1[i - 1] == str2[j - 1]:
                dp[i][j] = dp[i - 1][j - 1]  # If last characters are the same, ignore them
            else:
                dp[i][j] = 1 + min(dp[i][j - 1],  # Insert
                                   dp[i - 1][j],  # Remove
                                   dp[i - 1][j - 1])  # Replace
    return dp[m][n]

def word_error_rate(reference, hypothesis):
    reference_words = reference.split()
    hypothesis_words = hypothesis.split()
    edit_dist = edit_distance(reference_words, hypothesis_words)
    return edit_dist / len(reference_words)


def character_error_rate(reference, hypothesis):
    reference_chars = list(reference)
    hypothesis_chars = list(hypothesis)
    edit_dist = edit_distance(reference_chars, hypothesis_chars)
    return edit_dist / len(reference_chars)

def filter_string(input_string):
    # Convert to lowercase and remove newlines
    input_string = input_string.lower().replace('\n', ' ')
    # Keep only lowercase letters, dots, commas, and spaces
    filtered_string = re.sub(r'[^a-z., ]', '', input_string)
    return filtered_string

def process_prediction(csv_file):
    clf_result = pd.read_csv(csv_file)
    chars = clf_result[clf_result['Prob'] > 0.8]['Key prediction'].to_list()

    processed = []

    i = 0
    while i < len(chars):
        char = chars[i]
        if char == 'dot': processed.append('.')
        elif char == 'comma': processed.append(',')
        elif char == 'space': processed.append(' ')
        elif char == 'delete':
            if len(processed):
                processed.pop()
        else:
            processed.append(char)
        i += 1
    
    prediction = filter_string(''.join(processed))
    return prediction

def evaluate(prediction, ground_truth_path):
    prediction = filter_string(prediction)
    
    # Open the file in read mode ('r')
    with open(ground_truth_path, 'r') as file:
        # Read the entire content of the file
        gt = file.read()

    print(edit_distance_similarity(prediction, gt))
    print(word_error_rate(gt, prediction))

# Topview

Chat can be found here: https://chatgpt.com/share/2b75070b-6326-4a22-9a20-814f23f83c6d

In [17]:
prediction = process_prediction('stream_results/topview/video_6_clf.csv')
prediction

'kkkknondddeeerrr  aaqqqueiiannnccyy  kk,,  ii   ahhhhioopppee   yyooou     aaaarree   ddoopng  www  wweeelll   a..    iiww    wwwanntteeeeedd  tto   uupppddaatttee   yyno    oo  alttesstttt   apprroojjeecctttlak,,,,  whhhcchhh   hhhass       bbteeennn   qquuuitteee    attthhee    jjjjoonuunnnneee  k,,,...   tthhee  worrkkk   kinnnvvollllllavvveeeeasa  aaaaa  sseeerrrirreen  off  ccoommppeeedxx   ttaaask   adss  k,,  eeaa dch  wiittthh     itss   uupnnniqquuuieess   aaccchhhhaalllleellllleenngesa...   oour   kteaamm  kkk,,,   iinnncccllluudduuuieddinngg  pkkkeennnn    aandd    zzzeeeelldddaaa,,,,  hhhasws  bgebgeeeeen   aadddaeiiddlkiaggeeentttloyy   ajwwwokkinnng ttoottt   jaakkkelleeee  kttteswee   isssssuueeel   rrreeedccceeeetlllyyy     kkwweee   faacceeddf      ffaasseeerfvvvvveeeraalla   aqqquuuiiirrkkyy    sssiiiattttuuattiiooonnss   attthht   eeeguuiiireeaaasddd  iimmmmeeddeeedeaaatteee   aaaattttttiinn   tthhakaaffuulollyy  kk,,  wwee   mmmannggeeeeeaddd  ttto  adddeeesws   th

In [22]:
correction = """
Nondeer Aqueian,
I hope you are doing well. I wanted to update you on the latest project, which has been quite challenging. The work involves a series of complex tasks, each with its own unique challenges. Our team, including Ken and Zelda, has been diligently working to tackle the issues we've recently faced. We've encountered several quirky situations that required immediate action. Thankfully, we managed to address them with the help of our innovative techniques and quick adjustments. The process has been quite rigorous, but we are making significant strides.

Our project is driven by advanced algorithms and cutting-edge technology. The key aspect is accurate data analysis, sifting through extensive datasets, and precise measurements. We’ve had to navigate tricky scenarios, but we are confident in the direction we are heading.

If you have any questions or need more details, please feel free to reach out. We are available for a quick call if that would be more convenient. Your feedback is highly valued and will help us ensure the project meets all expectations.

Thank you for your continued support and understanding. We look forward to sharing more updates with you soon.

Best regards,
Jack Zimmerman
"""
evaluate(correction, 'datasets/video-2/ground_truths/video_6.txt')

 nondeer aqueian, i hope you are doing well. i wanted to update you on the latest project, which has been quite challenging. the work involves a series of complex tasks, each with its own unique challenges. our team, including ken and zelda, has been diligently working to tackle the issues weve recently faced. weve encountered several quirky situations that required immediate action. thankfully, we managed to address them with the help of our innovative techniques and quick adjustments. the process has been quite rigorous, but we are making significant strides.  our project is driven by advanced algorithms and cuttingedge technology. the key aspect is accurate data analysis, sifting through extensive datasets, and precise measurements. weve had to navigate tricky scenarios, but we are confident in the direction we are heading.  if you have any questions or need more details, please feel free to reach out. we are available for a quick call if that would be more convenient. your feedback

In [20]:
prediction = process_prediction('stream_results/topview/video_7_clf.csv')
prediction


'qqqdddeerr     tteemmammm,,     ii   hooopppaayyoo   lll     haaddd  aaa   faaannstttaaaaaaasssstttiicc  mee,  ojouu   reedccnettt    atteeeempllllyyyeeeeee   eee pappppprreeedcciiaaaatttioon  d  eeeee vvnnntt..   kii  ttt   aawassssd   ssstttuuuuulyy   aa  aaplleeewure    ttteo   seeeee   deeevveeeryoonnneee    ccoome  ttgettthheer aaaa  aaddd  eenujoyy  sdtthhee     ffeeeeaatttssatti  vvitttteieeo..  yyyooour    aadeehhuussiissaaasssmmm    aaandd  ptt vvveee   aseeeevvneeerryyyyyy    n   aammme dddeee e   ttthee ee veeeett   aaa    reeeaouunnnddiinngg   acccsess  ...   wweee    sswwannntt  edd  tto  ttaaakk    maw  mmomeeenntt   a   ttteoo  ttteeeezttteeenndd   ooouurr   hheaarrteeeelll t   gggrraaddttifuuuttuedeee    ftto   eee hh       oof  yyyoouure   rrdda  wwoorrkkee   dd   dneeedddeediccaattttond..  eee veennttt     ikkeee    adtthheeweee  arre a   sss mmmalll   ttlloeeenn      oeff   aaappppprreeedccdeiieatttioonn      ffoe  tthhee  iiiinncheeeddddiidccbbbllee   eee fffeleett

In [24]:
correction = """
Dear Team,
I hope you all had a fantastic time. I truly appreciated the recent event. It was an absolute pleasure to see everyone come together and enjoy the festivities. Your enthusiasm and participation made the event a resounding success.

We wanted to take a moment to extend our heartfelt gratitude for your hard work and dedication. The event is a small token of appreciation for the incredible effort you put into your roles every day. As we move forward, let's carry this spirit of camaraderie and teamwork into our work. Your contributions are invaluable to our company’s success, and we look forward to achieving even greater milestones together.

Once again, thank you for being an integral part of our team. Here's to continued success and many more memorable moments ahead.

Best regards,
Kathy
"""
evaluate(correction, 'datasets/video-2/ground_truths/video_7.txt')


0.8703703703703703
0.2


# Rightside view
Chat can be found here: https://chatgpt.com/share/c9c7419a-9c45-4b5a-90d9-2bb2ff7c09f5

In [25]:
prediction = process_prediction('stream_results/video_6_clf.csv')
prediction

'deeaarrr qquucicccanccyy,,,,  i  hhioopeee uuuuyouua     are eddooinngnaweell,,...  ffii wwwannntteeedd   etto uhpddaaeee ttteeefyoouuu  oonn ooouuurrr aaatttesssat   fpporroooneeccttttt,,, wwhhiccchh   hhass beeeef      quutteee tthee efjooarrnnrieeeya...  tthheee awwworrkkk  ffinnnvvaccolllvveessa aa  assseerre iiseeess ajon pcccoomppleexx tasaksa eeeaaccchh awwieth  ittas aauuniiqaueee  cccchaallleeennggeess...   oourrr tteeemaa,, innccclluuddinnggefkkeeedvveenn   aanndd zzeelllddda,, haaa    avveeee nn  ddeilliigeenttllyy lcaawwoorkinngg etottaaccleee  aatthheesseee eisssueess..  rreeeccaenntauyy  awwee dfacceed aaaa assevvceerall  aaqurriirkeyy ssittuuaaraaattioonnss  tahh frequuiirreedd  iifmmmmediiaaatee  aaatteeeedddtttioonl... tthhaaannnkaaffullllyyf,,, wwee  maaamaaannaaaaggeeddd eto   aaddddddreheem  wwiithhh  tttheehhelp   ooff ourr  ffiiffnnnoovaaive tteeccchanaaiiauuiieess aaaandd qqquuiccka  aaaaaddaauussatammeenttsa...  tthee epprroocceeesssa  haassaa  eeeen  fqqfuuiie

In [26]:
correction = """
Dear Quiccanccy,

I hope you are doing well. I wanted to update you on our latest project, which has been quite the journey. The work involves a series of complex tasks, each with its unique challenges. Our team, including Kevin and Zelda, has been diligently working to tackle these issues.

Recently, we faced several quirky situations that required immediate attention. Thankfully, we managed to address them with the help of our innovative techniques and quick adjustments. The process has been tough, but we are making significant strides.

Our project is driven by advanced algorithms and cutting-edge technology. Key aspects include X-ray data analysis, thorough testing, and precise measurements. We’ve had to navigate through tricky scenarios, but we are confident in the direction we are heading.

If you have any questions or need more details, please feel free to reach out. We are available for a quick call if that would be more convenient. Your feedback and insights are highly valued and will help ensure that the project meets all expectations.

Thank you for your continued support and understanding. We look forward to sharing more updates with you soon.

Best regards,
Jack Q. Czimamer
"""
evaluate(correction, 'datasets/video/ground_truths/video_6.txt')

0.9403107113654947
0.11


In [27]:
prediction = process_prediction('stream_results/video_7_clf.csv')
prediction


'deear tteeaama,,,  ffaiie hoopee uyuyouuu  aalll afhhaaadd aa faaanattsstticf  tiaammee oiiuur  rrrreccceenttt emmpplleeoyyeee aaapppreeccciaatiioonn eevveennett...  ii tt cwaaasssttrruallyy aa  plleeaaseurre  ttt  sseeeee  eeevveerryyooneee  accoommeee  togggaeetthherr srtaanndd fjeeennaooyyaa   ttheee ffeestt haavvvifaattiss.  yyoouuurrrr  eeentthhhuusiaassmmma  aaand s poosssiittiivvvee  eenneergyyu  maadee rtthtthheeeeeevveentt aaa  arressoonnadiinnggea  suucccccessa.. wweee  waaaanntteddd  attoeetaakkee  aaa  mmeennntt  ttt oo seexxteendd oouuurr  qqhhheeaarrtaaffaeellt gattiitttttuude  attoeeeeeecchh   oooff  yyouuurr  aahaardd   wwworrrkk  aaanndd  deedddiiccaatiioonna...  eeedvveentts llikee  thpeeseee  aaree aaa   smaalla   tttooekkeen   ooouuuf aapppprreeeciiaattionnn  ffooro tt heeffiincccrrddibblle aeefforrq  ett uyyouu ppuutt isnntto   yyoouuurr erroollee  eeevvvaeeryayyla.... aaass mwwweemoooevveeeee foorawwaa,,  leetssaccaaaryyy aaforrawwwaaarddfatthhiissasspaarrrritt  

In [28]:
correction = """
Dear Team,

I hope you all had a fantastic time at our recent employee appreciation event. It was truly a pleasure to see everyone come together and enjoy the festivities. Your enthusiasm and positive energy made the event a resounding success.

We wanted to take a moment to extend our heartfelt gratitude to each of you for your hard work and dedication. Events like these are a small token of appreciation for the incredible effort you put into your roles every day.

As we move forward, let's carry this spirit of camaraderie and teamwork with us. Your contributions are invaluable to our company's success, and we look forward to achieving even greater milestones together.

Once again, thank you for being an integral part of our team. Here's to continued success and many more memorable moments ahead.

Best regards,
Kathy
"""
evaluate(correction, 'datasets/video/ground_truths/video_7.txt')

0.9516908212560387
0.09701492537313433


# Hyperformer
Chat can be found here: https://chatgpt.com/share/66b6aa50-a030-4e0b-9e50-ea77b5307179

In [29]:
prediction = process_prediction('hyperformer_stream_results/video_6_clf.csv')
prediction

'daaiierrr  qquaaiiiinnnccci,  iiiii  hhhhoooope   hyaoouuuaa  a   rrree   ddooiinn ww wsio  ..   iiiwwh  wwaannntteeeddd  ttt   upppddatei  uuhoouooouu  aaaattissst   ipppoohjeeccaatth,,,   whhicchha  hhhaassss   bbeen  aaiiiteee   ttthee jjoouunnineeey,   hhee wwoorrkka   kkiiinnvvoolalllvvvaaeesss   a   ssserrresss   ff  cooommeess   taaasss   sss h,   eaaa ccchhhwwiiitthhh   iiiittsss   annnniiiquuiesss   aaaaccchhhaaanllllaelllnennnggs  ..   oourri  dkteeemamm pa,,   iiannnnncccoluppuhiinngg    kkkkkeiinn    aaaandd   xbeeellda,,,   hhhassss   abbeeeen    kddaiiisllliiisgggnnntttlhyy iawwwookiiiinnng  tttoaakkkklleea  dsttteesssee iisssssuueesessss..   datrrrrreccaaeentllyyya    iwwweee   ffaaaccaaeddd   sveerralla  ikqquuiiirrkkky  sssiiiggstttutiioonss   sssahhaaa  ruuiiaadddd  iiimmmmeiiiaatteee  faaattttiiinnks   tthhkkfllllyha,,  wee   aaaggeeefddd   tttoadddddsssshtheem   wwwiiitthhatthh hhheelltpp  ooof  ooourr iiiinnnnnoattiiivvae    tticchainnfiiiqquuueesssi  fkaaanndd  q

In [30]:
correction = """
Dear Quinn,

I hope you are doing well. I wanted to update you on the artist project, which has been quite the journey. The work involves a series of complex tasks, each with its unique challenges. Our team, including Ken and Zelda, has been diligently working to tackle these issues. Recently, we faced several quirky situations that required immediate attention. Thankfully, we managed to address them with the help of our innovative techniques and quick adjustments.

The process has been rigorous, but we are making significant strides. Our project is driven by advanced algorithms and cutting-edge technology. The key aspects include accurate data analysis, extensive datasets, and precise measurements. We have had to navigate through a tricky scenario, but we are confident in the direction we are heading.

If you have any questions or need more details, please feel free to reach out. We are available for a quick call if that would be more convenient. Your feedback and insights are greatly valued and will help us ensure the project meets expectations.

Thank you for your continued support and understanding. We look forward to sharing more details with you soon.

Best regards,
Jack A. Ismer
"""
evaluate(correction, 'datasets/video-2/ground_truths/video_6.txt')

0.9401639344262295
0.11616161616161616


In [31]:
prediction = process_prediction('hyperformer_stream_results/video_7_clf.csv')
prediction

'ssslfdddaaerra    ttttaammmmm    iii   hhaoppyyou  llllll  haiddd aaa   faaannaaattaaaaasssssttiiicc,     opu  dprreiinnttt   ss eefpporwwyyyeeeee   ee aappppprreeeciiaaatiinaa  eaa vvnnntt ..    ii   tttt  ssssss  s   ttuuully   i   s ppleeaasss  ueeee    ttt  ssseeee  ddevvveeronnnae    ccoonne  ttbatheeerraa    aadnd  nnhy     k tthhee   aeesaassttiia   ... yyopuurr        aeehhhaiiiissssssssssssmmm   kkaaan popiiiittivvaee   faenneeyy    ssa mmaaaaddee k  ttheeeevveent ssaa   rrreeedssssunnnddiing     uucccaasssss  ....   wweee   iwwwnnnteddddttt   aakk   jma   mommennt  ff   ttoo  ttssstteennnn    ooouurr hheeerrtt  attuudddeee  fttto  wweehhh       of yoddddd  wwoorkkk    aaddd   ddeeddddidccainn  ..   e ennttt      iiikeee    f tttheessee    rra   s   mm   tttoieen  aoaf  appppprrreeediiiiaanntiionn   kfo  tthheee     iiinnccaaaedddiicbbleed   sffffaahyooukpputtt   skiiinnnttttto  urr    nrollldsss   eveeeyyiidattyyh     ...   sss   wwweee   movva  oorwwwwdddd    ,,  eeetss    

In [32]:
correction = """
Dear Tam,

I hope you had a fantastic time at the event. Your support and appreciation mean everything to us. It truly pleases us to see everyone come together and enjoy the occasion. Your enthusiasm and positive energy made the event a resounding success.

We wanted to take a moment to express our heartfelt gratitude for your hard work and dedication. These are small tokens of appreciation for the incredible effort you've put into your role every day. As we move forward, let's carry this spirit as a team and continue our work with the same camaraderie and commitment. Your contributions are invaluable to our company’s success, and we look forward to achieving even greater milestones together.

Once again, thank you for being an integral part of our team. Here's to continued success and many more memorable moments ahead.

Best regards,
Katy
"""
evaluate(correction, 'datasets/video-2/ground_truths/video_7.txt')

0.8080094228504122
0.3333333333333333
