In [9]:
import os
import pandas as pd
from evaluate import load
from nltk.translate.bleu_score import SmoothingFunction, corpus_bleu
import warnings

os.chdir("..")
warnings.filterwarnings("ignore")

In [2]:
def bleu(ref, gen):
    ''' 
    calculate pair wise bleu score. uses nltk implementation
    Args:
        references : a list of reference sentences 
        candidates : a list of candidate(generated) sentences
    Returns:
        bleu score(float)
    '''
    ref_bleu = []
    gen_bleu = []
    for l in gen:
        gen_bleu.append(l.split())
    for i,l in enumerate(ref):
        ref_bleu.append([l.split()])
    cc = SmoothingFunction()
    score_bleu = corpus_bleu(ref_bleu, gen_bleu, weights=(0, 1, 0, 0), smoothing_function=cc.method4)
    return score_bleu

In [12]:
bertscore = load("bertscore")
def bert(ref, gen):
    results = bertscore.compute(predictions=gen, references=ref, lang="en")
    return {metric: sum(scores)/len(scores) for metric, scores in results.items() if metric != "hashcode"}

In [4]:
ref = pd.read_csv("data/processed/split/counsel-chat-best-answer-test.csv")
ref = ref.answerText
ref

0      It is very common for people to have multiple ...
1      Do you live with your mom and have constant in...
2      When I'm working with men with this type of si...
3      Hello, and thank you for your question and see...
4      Give yourself a little more credit for self-ob...
                             ...                        
107    You are not alone.  SocIal media marketing is ...
108    Staying present is an attitude most of us aspi...
109    It sounds like your confused as to why your fr...
110    Finding the right therapist for you is very im...
111    It's more than just normal, it's expected! Qui...
Name: answerText, Length: 112, dtype: object

### Base model

In [20]:
gen_base = pd.read_csv("response/test-response-model_base_no_sys.csv")
gen_base = gen_base.generatedAnswerText
gen_base

0      It's completely understandable that you have a...
1      It's completely understandable that you don't ...
2      I'm so sorry to hear that you're experiencing ...
3      Sorry to hear that you're going through a diff...
4      Sorry to hear that you're feeling alone and st...
                             ...                        
107    It's understandable to feel frustrated when yo...
108    Great! Here are some suggestions for building ...
109    I'm so sorry to hear that you're experiencing ...
110    Great question! Finding the right therapist is...
111    It is completely normal for people to cry duri...
Name: generatedAnswerText, Length: 112, dtype: object

In [21]:
print(bleu(ref, gen_base))
print(bert(ref, gen_base))

0.029918555044600812
{'precision': 0.8262056687048503, 'recall': 0.835644604904311, 'f1': 0.8308073596230575}


### Finetuned Llama-2-7B-Chat

In [16]:
gen_qlora = pd.read_csv("response/test-response-model_240411_0952.csv")
gen_qlora = gen_qlora.generatedAnswerText
gen_qlora

0      Thank you for sharing your personal struggles ...
1      Thank you for reaching out for support. It's c...
2      Thank you for reaching out for support. It tak...
3      Thank you for sharing your struggles with me. ...
4      Thank you for sharing your feelings with me. I...
                             ...                        
107    Thank you for reaching out with your concern. ...
108    Of course! Building positive relationships in ...
109    Thank you for reaching out with your concern. ...
110    Thank you for reaching out with your question!...
111    Hello there! It's completely normal for people...
Name: generatedAnswerText, Length: 112, dtype: object

In [17]:
print(bleu(ref, gen_qlora))
print(bert(ref, gen_qlora))

0.0306515699220267
{'precision': 0.8290465399622917, 'recall': 0.8361664499555316, 'f1': 0.8324928746691772}


### RAG with Llama-2-7B

In [18]:
gen_rag = pd.read_csv("response/test-response-llama2-7B-RAG.csv")
gen_rag = gen_rag.response
gen_rag

0       It is not uncommon for individuals to have mu...
1       It sounds like you are in a difficult situati...
2       It's important to understand that erectile dy...
3       It sounds like you are struggling with some p...
4       It sounds like you are struggling with some d...
                             ...                        
107     It's possible that the people you are reachin...
108     Of course! Having positive relationships in t...
109     It is understandable to feel frustrated or up...
110     Finding a good therapist is crucial for succe...
111     It is completely normal for people to cry dur...
Name: response, Length: 112, dtype: object

In [19]:
print(bleu(ref, gen_rag))
print(bert(ref, gen_rag))

0.03323976953759787
{'precision': 0.8392691617565495, 'recall': 0.8385810325188296, 'f1': 0.8388241917959282}
