In [53]:
import pandas as pd
import re

from configs.api_keys import OPEN_AI

# metrics 
def edit_distance_similarity(str1, str2):
    m, n = len(str1), len(str2)
    dp = [[0] * (n + 1) for _ in range(m + 1)]

    for i in range(m + 1):
        for j in range(n + 1):
            if i == 0:
                dp[i][j] = j  # If str1 is empty, insert all characters of str2
            elif j == 0:
                dp[i][j] = i  # If str2 is empty, remove all characters of str1
            elif str1[i - 1] == str2[j - 1]:
                dp[i][j] = dp[i - 1][j - 1]  # If last characters are the same, ignore them
            else:
                dp[i][j] = 1 + min(dp[i][j - 1],  # Insert
                                   dp[i - 1][j],  # Remove
                                   dp[i - 1][j - 1])  # Replace

    edit_dist = dp[m][n]
    max_len = max(m, n)
    similarity = 1 - (edit_dist / max_len)

    return similarity

def edit_distance(str1, str2):
    m, n = len(str1), len(str2)
    dp = [[0] * (n + 1) for _ in range(m + 1)]

    for i in range(m + 1):
        for j in range(n + 1):
            if i == 0:
                dp[i][j] = j  # If str1 is empty, insert all characters of str2
            elif j == 0:
                dp[i][j] = i  # If str2 is empty, remove all characters of str1
            elif str1[i - 1] == str2[j - 1]:
                dp[i][j] = dp[i - 1][j - 1]  # If last characters are the same, ignore them
            else:
                dp[i][j] = 1 + min(dp[i][j - 1],  # Insert
                                   dp[i - 1][j],  # Remove
                                   dp[i - 1][j - 1])  # Replace
    return dp[m][n]

def word_error_rate(reference, hypothesis):
    reference_words = reference.split()
    hypothesis_words = hypothesis.split()
    edit_dist = edit_distance(reference_words, hypothesis_words)
    return edit_dist / len(reference_words)


def character_error_rate(reference, hypothesis):
    reference_chars = list(reference)
    hypothesis_chars = list(hypothesis)
    edit_dist = edit_distance(reference_chars, hypothesis_chars)
    return edit_dist / len(reference_chars)

def filter_string(input_string):
    # Convert to lowercase and remove newlines
    input_string = input_string.lower().replace('\n', ' ')
    # Keep only lowercase letters, dots, commas, and spaces
    filtered_string = re.sub(r'[^a-z., ]', '', input_string)
    return filtered_string

def remove_consecutive_letters(s):
    if not s:
        return s
    
    result = [s[0]]  # Start with the first character
    for char in s[1:]:
        if char != result[-1]:  # Only add if it's different from the last character
            result.append(char)
    
    return ''.join(result)

def process_prediction(csv_file):
    clf_result = pd.read_csv(csv_file)
    chars = clf_result[clf_result['Prob'] > 0.8][]['Key prediction'].to_list()
    # chars = clf_result['Key prediction'].to_list()
    processed = []

    i = 0
    while i < len(chars):
        char = chars[i]
        if char == 'dot':
            processed.append('.')
        elif char == 'comma': 
            processed.append(',')
        elif char == 'space':
            processed.append(' ')
        elif char == 'delete':
            if len(processed):
                processed.pop()
        else:
            processed.append(char)
   
        i += 1
    
    # prediction = filter_string(''.join(processed))
    # prediction = remove_consecutive_letters(prediction)
    return ''.join(processed)

def evaluate(prediction, gt):
    prediction = filter_string(prediction)
    print(prediction)
    print(edit_distance_similarity(prediction, gt))
    print(word_error_rate(gt, prediction))

    return edit_distance_similarity(prediction, gt), word_error_rate(gt, prediction)
    
from openai import OpenAI
def fix_text(text):
    client = OpenAI(
        api_key=OPEN_AI
    )
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        seed=0,
        messages=[
            {"role": "user", "content": f"Please correct this text, reply in one line:\n {text}. "}
        ]
    )

    m = completion.choices[0].message.content
    print(m)
    return filter_string(m)


In [54]:
prediction = process_prediction(f'stream_results/7_clf.csv')
prediction

'ps dddderrrra eaam,,,   i   aolp ayyyu  llll aada  faaadtsssttc  iiis,  r rreeettt teeeepllllqueeeee  ee apppppoeeecciiaaatiioo vvnttt ll    ttt sssssss  tttuullyy  alluuee tttoo   eeeeee eevvvayoooneee ccooomee tttthheer   adddh  tthee aetsstt  ttttteeess   .ypuurrr  eeetttsssssssmmm aadd   ppooiiiiiivvve eeaaneeyyyeomaaadddee theeeeevveettaa redsssounddddgg  uuccccs..   e saanteeeddttt aakk a   moofeett tttttol  ttttttenddd  oouuu eeetrreelleatddeeto eecchh   offyyud wworrke dd ddeeddiiccatttoon ...  eeeeeenntt   ss   iikkeee ttthheeesssee  are   aall  tttooeen ff  apreeeeccciieaaattiioon  ffrr theee ancceedddllee effffyyyuu uuuuttt  liinotttoo  yuurrreeeleeess eeevverrydday. ss wwwee   oovee   oorwaarr,,,  es ccaaarrrqyy  ffforrwrrrdd   tthhhss pppiirriaaiittt   ll ccam aadereeee aaaddddtteemaaamm   wrr   yuuarr  oontriibbuuutoon s  r   inaallllluataleee atto  ouurr   cccooommapppaanny ucccccae,,   ndddd wwee   lool  ffawaaredd   ttt  ccchhieeesig  eeeeeenn  rrretre ommifleestonnee

In [56]:
pred = 'dear team, i would like to express my appreciation for the outstanding teamwork displayed in all the tasks weve accomplished. your efforts have made a significant impact and have certainly contributed to our success. i look forward to continuing this journey together. best regards, katy.'
gt = 'dear team, i hope you all had a fantastic time, our recent employee appreciation event. it was truly a pleasure to see everyone come together and enjoy the festivities. your enthusiasm and positive enery made the event a resounding success. we wanted to take a moment to extend our heartfelt gratitude to each of your hard work and dedication. events like these are a small token of appreciation for the incredible effort you put into your roles every day. as we move forward, lets carry forward this spirit of camaraderies and teamwork. your contributions are invaluable to our company success, and we look forward to achieveing even greater milestones together. once again, thank you for being an integral part of our team. heres to continue success and many more memorable moments ahead. best regards, katty'
evaluate(pred, gt)


dear team, i would like to express my appreciation for the outstanding teamwork displayed in all the tasks weve accomplished. your efforts have made a significant impact and have certainly contributed to our success. i look forward to continuing this journey together. best regards, katy.
0.2666666666666667
0.8740740740740741


(0.2666666666666667, 0.8740740740740741)

In [29]:
for model in ['ctrgcn']:
    for f in ['7_clf.csv']:
        prediction = process_prediction(f'stream_result/{model}/{f}')
        print(prediction)
        # fixed = fix_text(prediction)
        # video_id = f.split('_')[0]
        # edit, wer = evaluate(fixed, f'./datasets/topview/ground_truths/video_{video_id}.txt')
        # with open(f'stream_result/{model}/{video_id}_predictions.txt', 'w') as file:
        #     file.write(prediction + '\n')
        #     file.write('Corrected: ' + fixed + '\n')
        #     file.write('Edit distance ' + str(edit) + '\n')
        #     file.write('WER ' + str(wer) + '\n')

ai ofdaera teame, i hope tyo aolo k aed a ikanatarastica it, o o ue poreicint stenfploratye e iaporeiciationcka e vent . is tit sas s trauly a ods ple aswe tuhe to ae i envaeryone colme together a aod jena py lsthe r aktestsasti vs ats. o y yo our keaentuhahpuaiswsm kasnd porseitivfe flsaeivniety a ma de ke theivent saka resopauansainga ucaesf ps. we swant skseda to take ksa jma amovamet s s sto teuxstene opre here fkataerote fgtrertistude sto aehch of youre ed work s asd dae dicaiton s. e fente s ike k these aetre a s maoa toene oaf aproecieartione f aro the k ai nchae diai cbvloe earfort ayou pt lkienotato y or so araoldes ae vfey dtay . s we amovae oraew ard a, erts ca ry aoaforward this s pirfiaitsa o cama af der e sa ad tema k k am aork . yoyou re ont aiobuation s ae ina olouartiebatole sato eour comaonspapany aucaels. nd awe lok forwared to a chaieves s isn aevaen gtearte r omiola estonsa gtethe is. once gain s, tania you for akngeitnge an santegaraloa parengpt o itour team . ear

In [148]:
prediction = process_prediction(f'hyperformer/0_clf.csv')
print(prediction)

sgod atenon, te boarad ajoafa diarectotres amertitngis hive beaen saschiedules s folowasaed jury fapeiousailolya ldconfirmed ay jublys octobet. the time flof eachfs fmentng dwaile bae amjm to njnon hoawebvesdeuetherljlodsgiasatkial datais swil be anounchaed at a olated date fjharodior atoi ealchi ojetinl. pleaese vetgl alr fre to contetcact ateitherda akethe larneya at , or me jsi you shoylold sahva ny auesto s.,. thank, yo


In [167]:
raw = 'sssss   dddeeer  ttaamaa   iiii   hhoooayooo    lll    h aa   faaatttaaaassssttiicca  ee,   jouu  eeeittt   ssteeplqyyeeeeer   ee  aapeecciiaatton   ettt ...      tttttttt   asssss  sttuuulllyyy   aa   dllleeaassreee    ttto  seee    eeevvyoooee    cccoome  ttttherraa   aadd   eeoooyy    sttthee   eeesstttssstttiiv..  yyyoourr      eeethhhiiissssssammm    aan ootivvee    aaeeyyyyy    saaaammmaaaddee  d  tttheeevveett   saaaa   eeessssoouuddig   aucccsssa...   ee   sswwnntttedddtto ttakk   a    a   mommett   atttoo tttextteend    ooour  heerttteett  gggdddttiiutttuudeee   ttto  eeeh    o  yourr   aaddd  wwwooorrkke     aadd   ddeedddiicciitttoon.   e eetttt    s iiikee    stttessee   a a  samma   tttooeeen    f  aapprreeecciiiiettiioon    foor  ttheee    iiincceedddicbbllee  sssefffart   yyoou tttt   sttttttttoo   yoorrr    eolllddeesss  eeeeeyy  dattyyy    ..   ssss wwe   ovve   forwwaaaddd  ,,   ttss  cccaarryyy   fooraaarrddd ttthhii   sppiiirriiiittt   aa     ccaammmaaa   aaaadeerrrreee  a   aaadd teemm   faamm   r   ..   yyoooouu  rr  oootttiibbuuoossss   ar   iinallluuaaaatttbbee    ttt  oor   cccooommaaaannaaaaniiy    uuccccsss,  annd  ee    llooo aaarddd  ttto  accchhhiieeevvveesiiisng   eeeveenn     geeeeatte rrr   miilleeattee ssssssssss teetttheer..   ooccce   ggiii ,,,  ttanq  yoouu   ooor   beeg  aan   ttteeeggraall    paaeeddttt  oo toour  teeammma .   hhaaarees   tooo  ootttiiitnteueee   succccceessss   aadd  mmm   mooor    aaammmeemmeeooorraabbbee   omet  hhhaeed .   bbbesstt   eeegggaadd  ,,    kaatttttya'

fixed  = """
the time of each meeting will be from
ten to noon . however, further logistical
details will be announced at a later date
prior to each meeting. please feel free
to contact either sk or lh if you have any
questions. thank you.
"""
evaluate(fixed, 'the time of each meeting will be am to noon however, further logistical details will be announced at a later date prior to each meeting. please feel free to contact either keith larney, or me if you should have any questions. thank you.')

 the time of each meeting will be from ten to noon . however, further logistical details will be announced at a later date prior to each meeting. please feel free to contact either sk or lh if you have any questions. thank you. 
0.8601694915254238
0.16279069767441862


(0.8601694915254238, 0.16279069767441862)

In [32]:
prediction = process_prediction('stream_result/hyperformer/6_clf.csv')
prediction

'sssaaddiiiieeeaeiqquueiiiinnncccuk,,   iii iihhhiioopae  yyoouuue      aaarreee   ddtooiggg iweeloo   ,..   iiww    waanntteeeedd  tttoo  uuudpdddattte  uooou  oour    salaaatssstt    ijooojeeectt,,,,,  hhiicchh   hhhassss  bbbbeeeennn   uittee   atttheei ijjoooianueenneeykk,,,,,,   tttheei  wwookk   iiiinnnvvooollllllllvvveesss   sia  sssseerrriiireesss fi ccoomoeejxx  ttasssks   sssk   eaaacchhhiiiitttti iiittsss  ujniiiiauueeesss  ccchhaalllleelllleeenngggess  ..   oouurr   dkaaateeemmmmmmkk,,  iinncccccllluuuiiingg    kkkkkeiin   aannd  kzzzeeelldddaaak,,,   hhhisss  bbbebbbeeen    ddddiiiiiliiidggentotu  wwsooikiggg itttottikkkee   ikatteeissee iiissssuueesesssll.    diattreeeeeccceentllyyy    swweee   aaccceddd   diiisssserrvvverralll  iqquiiirreeeey ssiiitttettiiioooonnssi   saattttttt  eeeeegguiieeadddiiiiiimmmeedeeeiiitti aaaatttttiea tttheikdfflloykk,,,,   ee    mmagggeeeeeedd  tto  ddddddeessss tttheeemm  diiittthttheei ttthellp  ooooojjooouuriiiiiinnnnoattiivvvee   aaattee

In [33]:
prediction = process_prediction('stream_result/hyperformer/7_clf.csv')
prediction

'ss   aadddeet   a tteeeammmaa,,   iii   hhhoooe   yoo   olll   hhaaedd a  faaantttaaaasssstiicaaa  teeee,    l   joouur   eeeetttt   steeolqoeyeeeee   eeaaappppooeeeeciiiaaittoon    ee nttt aa..   ii  ttttt ssssss  stttuuullly   a  ialleeeassee    tto    eeee   eebvveetyooonnee   ccoomee  ttggtttheee     addd  eeuoooy     attteee  aaeesssttsstttiik   vvvittttteessssdll.  youur        eeettyhhiiiisssssmmmm  nd  pppiitiivvee    aeeeneerrt    ammaaaadddeed    ttteeeevveettt   saaa  eeessouudndddiggg   asuuccccsssssaa...   wwee    swannnt  eeeddtto  tttiik  a   mmoieeettt    atttoo  ttttexxittteendd    oouuu   heeeataaaffteeel  tt   ggggaeredddtiuuuttttueddeee   sfttoo  eeechh    ouuyoedddd wwooorkkke   aaaddd  dddeeddddiiiciiiittooonaa.   eeeenttt    ss  liiieeee   ttteessee    aer   a    sssimmaoo   tttoooeeeen    kk of  aarreeeedddiiiiieeatttioon    o  ttteei   sskkiiincceeedddiiccbbllee  saaeeffferttt  syyoou  auuttt   kssssiiinttttttoo  uoourr     srolldddes  eeevveeryeddatttyys   s 

In [34]:
prediction = process_prediction('stream_result/hyperformer/19_clf.csv')
prediction

'sshhellllooo   jjoooohhsja,,   ttiank   yooou    ffoorassshooopppppppiinngppattt ourr  swwwwwebbbbbbbbbbbsstittteeel.   sssttthee  wwwwee  suaaarraanntteeeep   ssyooooouur    sattiiss    ffaaacttttoonn f    wiitth ooouuurrr   hggghie  qqqqllit    rrrojjddduttttttsssss   addd     ccuusattooommeerrss    ssuupppoorttts..  iinncc    yooourrr  jsxppppperttttieesssaeee  wwwiittth tthee  oorddddeerrr ddiidd   ntt   sliivvppp  ttttttoo  uooouooorrr   hhhggh   sstaandddeaaddssss    ,   e    haave   isssuueessadddd  yoou   aa    aacc eerrreedddiiittt    si   ttthee   mmoouugngg   tt   iof  aaa    hhunddareedddda...  sppllleeesssee   allllloow    tttheeeee    to  jffiivvee  dddayy   foor   oouurrbbbbannkk    sttoo   pppppully satthiiss    cceeedddittt   dt  yooourrrrr   jjpacccccoooounttt  ss.    iif   yoouu     hhhsvve  nn  uuueesssttiioonnsssss,    ppeeee assee   vvisssiiittt     ccaaattooooojeerr    suuppppppprttaaa..  thhannk  yoouaaggaaaiiinnn  foorr  ssshoooppppppppiiinnggg   wwwiitttth   

In [35]:
prediction = process_prediction('stream_result/hyperformer/20_clf.csv')
prediction

'sssssllls,,    ii   tttiiinn   s    iii   ssshhhoooudddd  ivvee   suyoouu        llttttttlle   sabbbbccckggggroodddd   loon   sssmmaaalll     vveeentttrrrees..   bbiilllo   pppeerrrkkkinnsss    aaandd   p    hhhhaavvv  ra   ssst ooongg  pppperrjsssonaalll    aaaaddd pproooofffeeessssiiionaaal   aaertttioonnssipppplal   hee    is   aan     eexxxttttteeeemmlsuy   rraetttteattiiivvvee   iiniddiivvviiiuuaall..  hllllyyy  tttuuaallllly    ccooommmmsaattteeddd   fonn hiiimm  dtyy    asss  sssooome oooneee   wwwhhooo  thiinksswouuttsssidddee    ttthee  bbooxxxxx...    gill   aaattuuuaaarlllllyyy sssaaattee    iinn         ffbbbaarr   urr   yyyeeearrrrrsss    aaadd  sssiaiiddd   jttthee  ettttt   saatabbbddessabbbbbee   mmmaaaaarrrkeeett  wwwpoooullddd  bbbee  bbbbnndddwwiitddddtttttthhh  ss..   hhee   hhaasss   bbbeeee e    swwwccuucccccaeessssfulll   iiinn  tthhe    gaass  abbbjjsssiinnessssssa  wann   he   hhaass  hhhaadd   ssoommeoonnee   ttp   faiiilllttteerrrr   hiss   dddpasssa.   ssss

In [36]:
prediction = process_prediction('stream_result/ctrgcn/7_clf.csv')
prediction

'i    aaddeeeer    teeemmmmkk ihooppp   ttaoo    ooo haad   a   annapsttsstiiia  iieeee     oo    oouoe  apoeeeiccta  kkremooooorraeyye    appoooeeciiaaittionn    ee vvvknnnt .   iii    t tsss   tttuuoll      pooeeaeee  ttoo  eee  oeeevveonnne  cooommee  ogtthheeek  ddd   jeennppp     theee  eeesstttikkkvv .  oo uu      eupahhappaiiiissmmm  kn   pooiitiivaa  aeevvvaneey  mmaaadaeee   teeeevveet     kreeeopunddeggg  uccccos   ...   w   aawnnnt ed  ttoo ikk ae  aa   mmivmeentt   kttto   tteusttteennee   oou heearfiaeeea  ggaddttitttueeeee  sso aeech    ooeeedd   wwoooirrkkk    ad    seedddiiicittoonn o   e eenntt  a  iikkeee ttthesee   aer    aammaoo  toooieeen   oo   aaapppproeeecciiieentioon   ooo ttheeee   i nnccaaaeeeiiccoooe   aaeffooooo pptrt  okii nottoo   uuoouu      roooddddes   evveer attyy     o..   ss   ww    movvaa  ooooraaaawarrrdd  k,   ets cck  rrray ooaafooorraarrrdd   tiii     piiiriitt      o  iccca mmmaaa   adeer eeeee  kaaadd aeemmm k aamm  aaoooikk     yoa  onnbguut

In [37]:
prediction = process_prediction('stream_result/ctrgcn/6_clf.csv')
prediction

'kseeeear  kaueiinnncccykkk   ihpaiiiooopue  ooup arreee  dooogn eeoooa op.   iiaaa   saannnteeee   ttooo ppddaaoo  oou  ooou   ooaaattessssta daaooooojjeeecccatk,,   siccchh   aaasss    baeeenn   uiitteee  ktthheee  jooorauueennnneeekk.    tthee  oookkk  kiiinnnvoooallllllvvaaess   kkkkkk   skeriiireeess  oof  cooomaooeexxx   ttaakkk  ssk  kkk  eeccch   aiiit   iiit  unnniiauuues   ccchhaaaoooaeeooaaeenngggeess  .   oouuurr   kkkkkkeeemmmmkkk,   iinnnccccooouuupppinnngo  kkkkkkkeniina   aannd  eeeodddakkkk haaass   vvaageeeeenn   kkdd  iidooiiidddfggeenttoo aaoookkinng totaa kkkeee   kkktteeisseee  issssuessl  kkrrarrecckeenntoaaayy     aaaawee  aacccaaeddd   iooiissseaaavaeeerrrla kkaouuiiirkky   siittueaitiiooonnsssi  taaaaeeeguuuiidd  iiikmmmmmeeitte   attiin    tthkkoooukkk,,  aee   maegeeeedd   too  ddddooeoos  ttheemm  isaitth thee  aaeeooop  ooo aoooouuu iiinnnnnnovvtiivve  aecccinnniieaueiak  aannd  aauuickkkkk   aaddd aaajssseaammennntss  tthee  prooorcceess   paaaa   vaeeeen

In [38]:
prediction = process_prediction('stream_result/ctrgcn/19_clf.csv')
prediction

'xxxdeolloo   jjooouhhnkk,,,,  tttonk yoou    ffoorrrassoooopppppppiiinnngppaatiurr rwwwweeegbbbbgssiiittteeelllll...   ottthe  ffd ggggsgaaaaaee   ggfluuaaarrrrraaanntteeee    suoooourr   sstttiiss   ffffccttoonniuf    wiitth ooouuurrr   hgggee  ff quuqqouutt  proorurrjjeddduuattttisfssssa  an    ccuuusotooommeera   ssuppppooorrrtt fol.... siinncce  yyoourr    aexsppppppeerrppttieeseee   fwittth tthhe    oorrdddeerrr krfdiidc  iinntt   apiivveeuurpp  zttttttoo uooourrro gha  ssstttaandddaeaarrdddss ,,,  ee  hhaabvvee  iisuueedd   pou  aaa   cccceeeerrrrrerciiitttt   dann  ttheee rmmoouunggg tt  iroofoof aaa    pnarreeeddaaa.....  ppooeaae   aooluow    ttteeee    rt   fffiivvee  dddpyyys fffoor  uoorrr  ebbaek   sttoo  pppooofatthhiss   cccarrriiureereeeiittt   rtt rooourrurrr   pccccccoooouunnttttii o.  iiff yoouuu    havvve   nnn    uuuueessstiioonnosssssss,,  peeeeaassee   vvviessiiittt   cuuaptttoooooeeerr puupppppprrrrtto... tthaaannk  yyyoouurrdkagggiiign zfoorr  ssshoooooppppppp

In [39]:
prediction = process_prediction('stream_result/ctrgcn/20_clf.csv')
prediction

'soodooarrrrrududsssseooo,,,,  ii  thiiink  r  hoooodd rrpgiivvvee rou    a  oiitttttttooee ioobbbcckkkkggggoooruuedddd   fdoonn    mmmaoo  fvvvnnttrrreerksss...  bbiio   kspppeerrrriiifinnnsa   aaannn pii  hhhhaaavvee  rassttrroongg ikfpppeerriffofpppoonnall   ian  pprooofffffesssioooll  aerrrrttionnapshppprrl...  hheee  iii   aaannn eeexxsttttteeemerifo    dcrrrrrrrttttieeatttiiivvvee  fffinniddiivvviiiduaollrffff l... whooafyya   acttuuoooouuy   fccooommmmeentttteerrd  fonnahhiiimmmedtty sss  dsooomee proonnee   rahhooothiinkkss puuttiicdee   rrttthe  bbooxxxxx...   ikbgioool   fkoactttuuuuuollllyyy uuuuaattee    fin   fffaaa  rffffbbaarr   ffouurrrrr   yyyeeearrrrrrs   aaann  ssiiaiddd    nnttthee   annneettt   rftaabbekeeerkfsaabbbee   mmmmaaaerfrrrkkett pooooouooddd  bgee bbbnddppiittddtttthhe  fff..   hee   haasss  bbeeeeee enn   rfr kkkcccuucccccooeessssffuooor   dfuiinn tthee  rggaass   rdbbbunnssiinnaeessssiisaffa   ee  ppnnd he   uhasss   hhhadd   ppoommeeoonneeeeerrttto  if

In [40]:
correction = """
Nondeer Aqueian,
I hope you are doing well. I wanted to update you on the latest project, which has been quite challenging. The work involves a series of complex tasks, each with its own unique challenges. Our team, including Ken and Zelda, has been diligently working to tackle the issues we've recently faced. We've encountered several quirky situations that required immediate action. Thankfully, we managed to address them with the help of our innovative techniques and quick adjustments. The process has been quite rigorous, but we are making significant strides.

Our project is driven by advanced algorithms and cutting-edge technology. The key aspect is accurate data analysis, sifting through extensive datasets, and precise measurements. We’ve had to navigate tricky scenarios, but we are confident in the direction we are heading.

If you have any questions or need more details, please feel free to reach out. We are available for a quick call if that would be more convenient. Your feedback is highly valued and will help us ensure the project meets all expectations.

Thank you for your continued support and understanding. We look forward to sharing more updates with you soon.

Best regards,
Jack Zimmerman
"""
evaluate(correction, 'datasets/video-2/ground_truths/video_6.txt')

FileNotFoundError: [Errno 2] No such file or directory: 'datasets/video-2/ground_truths/video_6.txt'

In [20]:
prediction = process_prediction('stream_result/ctrgcn/7_clf.csv')
prediction


'qqqdddeerr     tteemmammm,,     ii   hooopppaayyoo   lll     haaddd  aaa   faaannstttaaaaaaasssstttiicc  mee,  ojouu   reedccnettt    atteeeempllllyyyeeeeee   eee pappppprreeedcciiaaaatttioon  d  eeeee vvnnntt..   kii  ttt   aawassssd   ssstttuuuuulyy   aa  aaplleeewure    ttteo   seeeee   deeevveeeryoonnneee    ccoome  ttgettthheer aaaa  aaddd  eenujoyy  sdtthhee     ffeeeeaatttssatti  vvitttteieeo..  yyyooour    aadeehhuussiissaaasssmmm    aaandd  ptt vvveee   aseeeevvneeerryyyyyy    n   aammme dddeee e   ttthee ee veeeett   aaa    reeeaouunnnddiinngg   acccsess  ...   wweee    sswwannntt  edd  tto  ttaaakk    maw  mmomeeenntt   a   ttteoo  ttteeeezttteeenndd   ooouurr   hheaarrteeeelll t   gggrraaddttifuuuttuedeee    ftto   eee hh       oof  yyyoouure   rrdda  wwoorrkkee   dd   dneeedddeediccaattttond..  eee veennttt     ikkeee    adtthheeweee  arre a   sss mmmalll   ttlloeeenn      oeff   aaappppprreeedccdeiieatttioonn      ffoe  tthhee  iiiinncheeeddddiidccbbbllee   eee fffeleett

In [24]:
correction = """
Dear Team,
I hope you all had a fantastic time. I truly appreciated the recent event. It was an absolute pleasure to see everyone come together and enjoy the festivities. Your enthusiasm and participation made the event a resounding success.

We wanted to take a moment to extend our heartfelt gratitude for your hard work and dedication. The event is a small token of appreciation for the incredible effort you put into your roles every day. As we move forward, let's carry this spirit of camaraderie and teamwork into our work. Your contributions are invaluable to our company’s success, and we look forward to achieving even greater milestones together.

Once again, thank you for being an integral part of our team. Here's to continued success and many more memorable moments ahead.

Best regards,
Kathy
"""
evaluate(correction, 'datasets/video-2/ground_truths/video_7.txt')


0.8703703703703703
0.2


# Rightside view
Chat can be found here: https://chatgpt.com/share/c9c7419a-9c45-4b5a-90d9-2bb2ff7c09f5

In [25]:
prediction = process_prediction('stream_results/rightside/video_6_clf.csv')
prediction

'deeaarrr qquucicccanccyy,,,,  i  hhioopeee uuuuyouua     are eddooinngnaweell,,...  ffii wwwannntteeedd   etto uhpddaaeee ttteeefyoouuu  oonn ooouuurrr aaatttesssat   fpporroooneeccttttt,,, wwhhiccchh   hhass beeeef      quutteee tthee efjooarrnnrieeeya...  tthheee awwworrkkk  ffinnnvvaccolllvveessa aa  assseerre iiseeess ajon pcccoomppleexx tasaksa eeeaaccchh awwieth  ittas aauuniiqaueee  cccchaallleeennggeess...   oourrr tteeemaa,, innccclluuddinnggefkkeeedvveenn   aanndd zzeelllddda,, haaa    avveeee nn  ddeilliigeenttllyy lcaawwoorkinngg etottaaccleee  aatthheesseee eisssueess..  rreeeccaenntauyy  awwee dfacceed aaaa assevvceerall  aaqurriirkeyy ssittuuaaraaattioonnss  tahh frequuiirreedd  iifmmmmediiaaatee  aaatteeeedddtttioonl... tthhaaannnkaaffullllyyf,,, wwee  maaamaaannaaaaggeeddd eto   aaddddddreheem  wwiithhh  tttheehhelp   ooff ourr  ffiiffnnnoovaaive tteeccchanaaiiauuiieess aaaandd qqquuiccka  aaaaaddaauussatammeenttsa...  tthee epprroocceeesssa  haassaa  eeeen  fqqfuuiie

In [158]:
correction = """
Dear Quiccanccy,

I hope you are doing well. I wanted to update you on our latest project, which has been quite the journey. The work involves a series of complex tasks, each with its unique challenges. Our team, including Kevin and Zelda, has been diligently working to tackle these issues.

Recently, we faced several quirky situations that required immediate attention. Thankfully, we managed to address them with the help of our innovative techniques and quick adjustments. The process has been tough, but we are making significant strides.

Our project is driven by advanced algorithms and cutting-edge technology. Key aspects include X-ray data analysis, thorough testing, and precise measurements. We've had to navigate through tricky scenarios, but we are confident in the direction we are heading.

If you have any questions or need more details, please feel free to reach out. We are available for a quick call if that would be more convenient. Your feedback and insights are highly valued and will help ensure that the project meets all expectations.

Thank you for your continued support and understanding. We look forward to sharing more updates with you soon.

Best regards,
Jack Q. Czimamer
"""
evaluate(correction, 'datasets/video/ground_truths/video_6.txt')

0.028595458368376736
186.0


(0.028595458368376736, 186.0)

In [27]:
prediction = process_prediction('stream_results/rightside/video_7_clf.csv')
prediction


'deear tteeaama,,,  ffaiie hoopee uyuyouuu  aalll afhhaaadd aa faaanattsstticf  tiaammee oiiuur  rrrreccceenttt emmpplleeoyyeee aaapppreeccciaatiioonn eevveennett...  ii tt cwaaasssttrruallyy aa  plleeaaseurre  ttt  sseeeee  eeevveerryyooneee  accoommeee  togggaeetthherr srtaanndd fjeeennaooyyaa   ttheee ffeestt haavvvifaattiss.  yyoouuurrrr  eeentthhhuusiaassmmma  aaand s poosssiittiivvvee  eenneergyyu  maadee rtthtthheeeeeevveentt aaa  arressoonnadiinnggea  suucccccessa.. wweee  waaaanntteddd  attoeetaakkee  aaa  mmeennntt  ttt oo seexxteendd oouuurr  qqhhheeaarrtaaffaeellt gattiitttttuude  attoeeeeeecchh   oooff  yyouuurr  aahaardd   wwworrrkk  aaanndd  deedddiiccaatiioonna...  eeedvveentts llikee  thpeeseee  aaree aaa   smaalla   tttooekkeen   ooouuuf aapppprreeeciiaattionnn  ffooro tt heeffiincccrrddibblle aeefforrq  ett uyyouu ppuutt isnntto   yyoouuurr erroollee  eeevvvaeeryayyla.... aaass mwwweemoooevveeeee foorawwaa,,  leetssaccaaaryyy aaforrawwwaaarddfatthhiissasspaarrrritt  

In [28]:
correction = """
Dear Team,

I hope you all had a fantastic time at our recent employee appreciation event. It was truly a pleasure to see everyone come together and enjoy the festivities. Your enthusiasm and positive energy made the event a resounding success.

We wanted to take a moment to extend our heartfelt gratitude to each of you for your hard work and dedication. Events like these are a small token of appreciation for the incredible effort you put into your roles every day.

As we move forward, let's carry this spirit of camaraderie and teamwork with us. Your contributions are invaluable to our company's success, and we look forward to achieving even greater milestones together.

Once again, thank you for being an integral part of our team. Here's to continued success and many more memorable moments ahead.

Best regards,
Kathy
"""
evaluate(correction, 'datasets/video/ground_truths/video_7.txt')

0.9516908212560387
0.09701492537313433


# Hyperformer
Chat can be found here: https://chatgpt.com/share/66b6aa50-a030-4e0b-9e50-ea77b5307179

In [29]:
prediction = process_prediction('stream_results/hyperformer/video_6_clf.csv')
prediction

'daaiierrr  qquaaiiiinnnccci,  iiiii  hhhhoooope   hyaoouuuaa  a   rrree   ddooiinn ww wsio  ..   iiiwwh  wwaannntteeeddd  ttt   upppddatei  uuhoouooouu  aaaattissst   ipppoohjeeccaatth,,,   whhicchha  hhhaassss   bbeen  aaiiiteee   ttthee jjoouunnineeey,   hhee wwoorrkka   kkiiinnvvoolalllvvvaaeesss   a   ssserrresss   ff  cooommeess   taaasss   sss h,   eaaa ccchhhwwiiitthhh   iiiittsss   annnniiiquuiesss   aaaaccchhhaaanllllaelllnennnggs  ..   oourri  dkteeemamm pa,,   iiannnnncccoluppuhiinngg    kkkkkeiinn    aaaandd   xbeeellda,,,   hhhassss   abbeeeen    kddaiiisllliiisgggnnntttlhyy iawwwookiiiinnng  tttoaakkkklleea  dsttteesssee iisssssuueesessss..   datrrrrreccaaeentllyyya    iwwweee   ffaaaccaaeddd   sveerralla  ikqquuiiirrkkky  sssiiiggstttutiioonss   sssahhaaa  ruuiiaadddd  iiimmmmeiiiaatteee  faaattttiiinnks   tthhkkfllllyha,,  wee   aaaggeeefddd   tttoadddddsssshtheem   wwwiiitthhatthh hhheelltpp  ooof  ooourr iiiinnnnnoattiiivvae    tticchainnfiiiqquuueesssi  fkaaanndd  q

In [30]:
correction = """
Dear Quinn,

I hope you are doing well. I wanted to update you on the artist project, which has been quite the journey. The work involves a series of complex tasks, each with its unique challenges. Our team, including Ken and Zelda, has been diligently working to tackle these issues. Recently, we faced several quirky situations that required immediate attention. Thankfully, we managed to address them with the help of our innovative techniques and quick adjustments.

The process has been rigorous, but we are making significant strides. Our project is driven by advanced algorithms and cutting-edge technology. The key aspects include accurate data analysis, extensive datasets, and precise measurements. We have had to navigate through a tricky scenario, but we are confident in the direction we are heading.

If you have any questions or need more details, please feel free to reach out. We are available for a quick call if that would be more convenient. Your feedback and insights are greatly valued and will help us ensure the project meets expectations.

Thank you for your continued support and understanding. We look forward to sharing more details with you soon.

Best regards,
Jack A. Ismer
"""
evaluate(correction, 'datasets/video-2/ground_truths/video_6.txt')

0.9401639344262295
0.11616161616161616


In [22]:
prediction = process_prediction('stream_results/hyperformer/video_7_clf.csv')
prediction

'ssldddarra    ttttammmm   iii   hhoyo  llll  haidddaa   aaannaattaaaaassssstt    ou rreitt   sseeppowyyyeeee   eeapprreeeciiaatinaa  eanntt ..    i  tt  sssss  s   tuull      ppleeaueee    ttt  sseee  evveernnnae    ccne ttheee    aad nh     thee   aeeastti v..uur      hhiiiisssssssssssmmm   aan piitivve   anney    sammaaaaddee  ttheeeevveet aa   reedssnddiin   cccaassss  ....   wweee   wwwnnntteddddttt     a mennt  f  tto  tsstenn    uuhh ttuddeee  ftto  weehhh     of ddddd  wworkkk   ddd   ddeedddiccinn .   e nnttt    iiikee   fttheessee  r     m   ttten  af  pppppreeiiiaaniion   fo  tthheee     iinncceddibbled  ffffaroopptt  iiinnttto  rr   rollldddss   veeeytty    ...   sss  wwwee   mo  oorwwwwdddd  ,,  ts  ccarrrfforwwwaddd  tthhii   ss iifiiiiitt      ccaama  aaaaeeerrreee      aaaddd  teemmm   am   kks.  hyoar   nnttiiibuiossss   e   iiinnaallluulee   tt     ccoommmmaannpaanniy    auucccss,   nndwee   llooi  fwdddd   ttt aaachhhiieessiiinn  eeveennn   eettt rrrr   mmmiet ssss  

In [32]:
correction = """
Dear Tam,

I hope you had a fantastic time at the event. Your support and appreciation mean everything to us. It truly pleases us to see everyone come together and enjoy the occasion. Your enthusiasm and positive energy made the event a resounding success.

We wanted to take a moment to express our heartfelt gratitude for your hard work and dedication. These are small tokens of appreciation for the incredible effort you've put into your role every day. As we move forward, let's carry this spirit as a team and continue our work with the same camaraderie and commitment. Your contributions are invaluable to our company’s success, and we look forward to achieving even greater milestones together.

Once again, thank you for being an integral part of our team. Here's to continued success and many more memorable moments ahead.

Best regards,
Katy
"""
evaluate(correction, 'datasets/video-2/ground_truths/video_7.txt')

0.8080094228504122
0.3333333333333333


## HF 

In [164]:
prediction = process_prediction('hf_topview2_stream_results/video_10_clf.csv')
prediction

FileNotFoundError: [Errno 2] No such file or directory: 'hf_topview2_stream_results/video_10_clf.csv'

In [166]:
correction = """Dear team,
I hope you all had a fantastic time at our recent event. It was truly a pleasure to have everyone come together and enjoy the festivities. Your enthusiasm and positive energy made the event a resounding success.
We wanted to take a moment to extend our heartfelt gratitude to each of you for your dedication. Events like this are a true testament to the appreciation we have for the incredible effort you put into your work every day.
As we move forward, let's carry the spirit of camaraderie and teamwork with us. Your contributions are invaluable to our company's success, and we look forward to achieving even greater milestones together.
Once again, thank you for being such an integral part of our team. Here's to continued success and many more memorable moments ahead.
Best regards, Kathy
"""
gt = "dear team, i hope you all had a fanstastic time at our recent employee event. it was truly a pleasure to see everyone come together and enjoy the festivities. your enthusiasm and positive energy made the event a resounding success. we wanted to take a moment to extend our heartfelt gratitute to each of your hard work and dedication. events like these are a small token of appreciation for the incrediable effort you put into your roles everyday. as we move forward, lets carry forward this spirit of camaraderie and teamwork. your contributions are invaluable to our company success, and we look forward to achieving even greater milestones together. once again, thank you for begin an integral part of our team. heres to continue success and many more memorable moments ahead. best regards, katty"
evaluate(correction, gt)

dear team, i hope you all had a fantastic time at our recent event. it was truly a pleasure to have everyone come together and enjoy the festivities. your enthusiasm and positive energy made the event a resounding success. we wanted to take a moment to extend our heartfelt gratitude to each of you for your dedication. events like this are a true testament to the appreciation we have for the incredible effort you put into your work every day. as we move forward, lets carry the spirit of camaraderie and teamwork with us. your contributions are invaluable to our companys success, and we look forward to achieving even greater milestones together. once again, thank you for being such an integral part of our team. heres to continued success and many more memorable moments ahead. best regards, kathy 
0.8930348258706468
0.21641791044776118


(0.8930348258706468, 0.21641791044776118)

In [34]:
prediction = process_prediction('/Users/haily/Documents/GitHub/Research Learning/clf.csv')
prediction

'xxraatmgaihoe yoyo al zzad a fnazastaaccsc ass, rrreeccett a ou  rcaen emplye  eeeveta  ws ul a apeesue osee eveyoeaoetee anddonhotergiestivvitress  .yyy entuurrhuiiaam andpsitvive  eeyzdde eevet  aaa eesounding  esucess. e wswated  otare iz ammeethat teend ou etfeelatt gru  e oich aof yu ad of add  dedictnon veents lt heeee ar afaal ttaoe o aappoiieciatonnr frte inddceeeddaaaale fefret yyo u tt  so  froles eedd. ass e overr fraard  d, ts caryfrrwwad this spirit  of caamaaaeaaadrr  fand steamaerdk yoursyu rr cotribuions saee   pinvaqduinble aou ompy succs adwe lood foorwwearrdd tl eechleiin  deet  greateer amileeatoes agthroce  aainn  ,ank u o  agn an etiteerrrl att f  u ta ee ast aotuiiqe  asucesssf aad ayoe amoraal oents aded.esreads   at'

In [31]:
correction = """
Jennifer, thanks for hosting the meeting yesterday. Sorry I had to leave early, but I had to meet with Beth for a personal matter. I believe there was a lot of good information exchanged, which is why it was unfortunate that I had to leave so soon. I am certainly aware that there were different opinions shared, but we did not have a chance to discuss them in detail. Regarding the relationship between our company and Enron, it is important to keep this very clear in our forward strategy. At the end of the day, maintaining clear communication and aligning our goals is crucial. I accept responsibility for any issues in the relationship. If my corporate talks involve deals with Enron, we still have to manage the account carefully. Peter expressed concerns about that. He has always preserved the integrity of the account team by ensuring responsibility to the customer. That is why we need to keep Dave informed and make sure no actions are taken without his knowledge. He is not the decision-maker or dealmaker, but as you are well aware, the contact with Enron has impacted other business opportunities within Enron. Dave is fully responsible for Enron and should be involved in resolving any situations that impact our relationship with Enron. I need your help in guiding the organization to avoid situations where Enron will continue to influence our business transactions. Dave needs to have a complete understanding of how his business transactions shape our dealings with Enron. He will understand the situation and how it can impact the business at Enron.
"""
evaluate(correction, 'datasets/topview-2/ground_truths/video_11.txt')

0.6420382165605096
0.5783132530120482


In [5]:
# HF 2
prediction = process_prediction('stream_results/hf_3/video_11_clf.csv')
prediction

'innniiiffer,,  hhnnssrsstiiinnggg     thee  mmeeeenngg   yyeessseerrrddday,,..  ssoorrrry     ii  heeaadd    oo  lleeaffvee   buut  fi haaddd   oo  mmeeeetiteee    wwith    bbbeeettth  pppeerrrlllmmnn....  ii    lleiievvee  te rrre  wwssss aaaa llo  ggooddd  iinfformmmtiiionn  ccchaanngeddd  nnd  ttt isswwyseeaann sso   ll... i  cccerrrttiinlllyyy  ammawwwayy  iihh   affffddfiiiffffeennntiiinnssitt  toeenoonn   at  iid  nnntt    avvee eefffforsee  m....rreeegggaaarrddiigg ssttee eellttiioonnsspppp  aaeeeeeeeeennnggcoomma   aan  nnnronn....,  iinn   dddottoo    mmmakkke   sssooe  tthgsss  a   vvrrrryyy   ccllerr   iinn   rrggooo  forrrrrrrdd rrsstrrrrraatteeeggggya.. a  thee end ffoohee   dyy,,,  te   ccccccssunnttttam,,, ddaave  sssuauarrliinnnpppp   uugggt   neeff,,   ammmyyeff,   aaaaandd eerrrrryy,,,  wnn   thheespppooonnsssiabiiiiiiiittyyy sor te  eennron rrrlltiionnsssiipp...  iiff mmmycccccoorpppoorraeefllllkkkssar  aaammakkingg  dddllss wiiiitth ennoonn ,,  ee sssttiilll   avve