In [2]:
import pandas as pd
import numpy as np
import regex as re
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction

In [88]:
df_s = pd.read_csv('processed_llama2.csv')
df = df_s.replace('', None).dropna()
df.head()

Unnamed: 0,prompt,original_response,llama_response
0,"Yes, here I am :)",Hi there - good to speak to you!,"Yes, here I am :). I'm so glad you're here! *h..."
1,"I'm from Spain, from Tenerife. I came back hom...",OK yes I can imagine! It's pretty cold here in...,"I'm from Spain, from Tenerife. I came back hom..."
2,At the moment I'm not working so I have a lot ...,What's your normal work?,At the moment I'm not working so I have a lot ...
3,"About traveling to the UK, I don't go often. I...",Ah right - yes I thought you seemed very fluen...,"About traveling to the UK, I don't go often. I..."
4,In my last job I worked as a careers advisor (...,It's careers!,In my last job I worked as a careers advisor (...


In [89]:
def count_syllables(word):
    word = word.lower()
    vowels = "aeiouy"
    syllable_count = 0
    if word[0] in vowels:
        syllable_count += 1
    for index in range(1, len(word)):
        if word[index] in vowels and word[index - 1] not in vowels:
            syllable_count += 1
    if word.endswith("e"):
        syllable_count -= 1
    if syllable_count == 0:
        syllable_count = 1
    return syllable_count

In [90]:
def calculate_flesch_reading_ease(text):
    # Split the text into sentences and words
    sentences = re.split(r'[.!?]', text)
    sentences = [s for s in sentences if s]  # Remove empty strings
    num_sentences = len(sentences)
    
    words = re.findall(r'\w+', text)
    num_words = len(words)
    
    # Calculate syllables
    syllable_count = sum(count_syllables(word) for word in words)
    
    # Calculate ASL (Average Sentence Length) and ASW (Average Syllables per Word)
    ASL = num_words / num_sentences if num_sentences > 0 else 0
    ASW = syllable_count / num_words if num_words > 0 else 0
    
    # Calculate Flesch Reading Ease score
    flesch_score = 206.835 - (1.015 * ASL) - (84.6 * ASW)
    return flesch_score


In [91]:
# Apply the function to calculate the Flesch Reading Ease score for each row
df['FleschReadingEase_Original'] = df['original_response'].apply(calculate_flesch_reading_ease)
df['FleschReadingEase_Llama'] = df['llama_response'].apply(calculate_flesch_reading_ease)

# Calculate the average Flesch Reading Ease score across the column
average_score_og = df['FleschReadingEase_Original'].mean()
average_score_ll = df['FleschReadingEase_Llama'].mean()

# Display the DataFrame with the Flesch Reading Ease scores and print the average score
#print(df)
print(f"Average Flesch Reading Ease Score (Original): {average_score_og}")
print(f"Average Flesch Reading Ease Score (Llama): {average_score_ll}")



Average Flesch Reading Ease Score (Original): 85.4575709460931
Average Flesch Reading Ease Score (Llama): 77.0865226955559


In [38]:
#train
# Average Flesch Reading Ease Score (Original): 85.51828929776485
# Average Flesch Reading Ease Score (Llama): 77.88151880974976

#dev
# Average Flesch Reading Ease Score (Original): 83.03879332077877
# Average Flesch Reading Ease Score (Llama): 87.60023413654709

#test
# Average Flesch Reading Ease Score (Original): 90.55622109822762
# Average Flesch Reading Ease Score (Llama): 83.12165369634366

In [92]:
def map_fkre_to_grade(fkre_score):
    if 90 <= fkre_score <= 100:
        return "5th grade", "very easy", "11"
    elif 80 <= fkre_score < 90:
        return "6th grade", "fairly easy", "11-12"
    elif 70 <= fkre_score < 80:
        return "7th grade", "easy", "12-13"
    elif 60 <= fkre_score < 70:
        return "8th–9th grade", "medium", "13-15"
    elif 50 <= fkre_score < 60:
        return "10th–12th grade", "difficult", "15-18"
    elif 30 <= fkre_score < 50:
        return "College", "fairly difficult", "18-19"
    elif 0 <= fkre_score < 30:
        return "College graduate", "very difficult", "22-23"
    else:
        return "Unknown", "Unknown", "Unknown"

og_grade = map_fkre_to_grade(average_score_og)
ll_grade = map_fkre_to_grade(average_score_ll)
print(f"Original grade mapping {og_grade}")
print(f"Llama grade mapping {ll_grade}")

Original grade mapping ('6th grade', 'fairly easy', '11-12')
Llama grade mapping ('7th grade', 'easy', '12-13')


In [76]:
smooth_fn = SmoothingFunction().method1

# Function to calculate BLEU score with smoothing
def calculate_bleu(reference, generated):
    reference_tokens = [reference.split()]  # BLEU expects a list of references
    generated_tokens = generated.split()
    return sentence_bleu(reference_tokens, generated_tokens, smoothing_function=smooth_fn)

# Calculate BLEU score for each row
df['BLEU_Score'] = df.apply(lambda row: calculate_bleu(row['original_response'], row['llama_response']), axis=1)

# Calculate the average BLEU score
average_bleu_score = df['BLEU_Score'].mean()

# Display the DataFrame with BLEU scores and the average score
#print(df)
print(f"Average BLEU Score: {average_bleu_score}")

Average BLEU Score: 0.04323106598821415


In [61]:
len(df)

7091

In [None]:
#need to calculate blue score
#avg number of rows is 159.5076923076923

In [77]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [78]:
# Initialize the TF-IDF Vectorizer
vectorizer = TfidfVectorizer()

# Function to calculate cosine similarity
def calculate_cosine_similarity(reference, generated):
    # Fit and transform both texts into TF-IDF vectors
    tfidf_matrix = vectorizer.fit_transform([reference, generated])
    # Calculate the cosine similarity between the vectors
    similarity = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])
    return similarity[0][0]

# Calculate cosine similarity for each row
df['Cosine_Similarity'] = df.apply(lambda row: calculate_cosine_similarity(row['original_response'], row['llama_response']), axis=1)

# Calculate the average cosine similarity
average_cosine_similarity = df['Cosine_Similarity'].mean()

# Display the DataFrame with cosine similarities and the average score
#print(df)
print(f"Average Cosine Similarity: {average_cosine_similarity}")
#Average Cosine Similarity: 0.18851960845236906

Average Cosine Similarity: 0.16277312423853718


In [79]:
def calculate_ttr(text):
    words = text.split()  # Split text into words (tokens)
    unique_words = set(words)  # Get unique words (types)
    ttr = len(unique_words) / len(words) if words else 0  # Calculate TTR, avoid division by zero
    return ttr

# Apply the TTR calculation to each row in the DataFrame
df['Type_Token_Ratio_og'] = df['original_response'].apply(calculate_ttr)
df['Type_Token_Ratio_ll'] = df['llama_response'].apply(calculate_ttr)

# Calculate the average TTR across all rows (optional)
average_ttr_og = df['Type_Token_Ratio_og'].mean()
average_ttr_ll = df['Type_Token_Ratio_ll'].mean()

# Display the DataFrame with TTR and the average TTR
#print(df)
print(f"Average Type-Token Ratio: {average_ttr_og}")
print(f"Average Type-Token Ratio: {average_ttr_ll}")
#Average Type-Token Ratio: 0.9605937843118938
#Average Type-Token Ratio: 0.7989883610452374

Average Type-Token Ratio: 0.9721520872870951
Average Type-Token Ratio: 0.7950575514405909


In [13]:
average_words = df['original_response'].apply(lambda x: len(str(x).split())).mean()

print(f"Average number of words: {average_words}")

Average number of words: 14.568627450980392


In [23]:
print(df['original_response'][0].apply(len).mean())

AttributeError: 'str' object has no attribute 'apply'

In [6]:
d = df['original_response'][0]
print(len(d))

32


In [97]:
df = pd.read_csv('processed.csv')

In [98]:
len(df)

11204

In [100]:
df = df.truncate(after=1010)
df.to_csv('trunc.csv', index=False)