In [1]:
pip install textstat

Collecting textstat
  Downloading textstat-0.7.8-py3-none-any.whl.metadata (15 kB)
Collecting pyphen (from textstat)
  Downloading pyphen-0.17.2-py3-none-any.whl.metadata (3.2 kB)
Collecting cmudict (from textstat)
  Downloading cmudict-1.1.1-py3-none-any.whl.metadata (3.6 kB)
Downloading textstat-0.7.8-py3-none-any.whl (239 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m239.1/239.1 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cmudict-1.1.1-py3-none-any.whl (939 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m939.7/939.7 kB[0m [31m32.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyphen-0.17.2-py3-none-any.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m62.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyphen, cmudict, textstat
Successfully installed cmudict-1.1.1 pyphen-0.17.2 textstat-0.7.8


In [15]:
import pandas as pd
import textstat

def interpret_flesch_reading_ease(score):
    if score >= 90:
        return "Very easy to read (5th grade)."
    elif score >= 80:
        return "Easy to read (6th grade)."
    elif score >= 70:
        return "Fairly easy to read (7th grade)."
    elif score >= 60:
        return "Plain English (8th–9th grade)."
    elif score >= 50:
        return "Fairly difficult (10th–12th grade)."
    elif score >= 30:
        return "Difficult to read (college level)."
    else:
        return "Very difficult (college graduate or higher)."

def interpret_fk_grade_level(score):
    if score < 5:
        return "Elementary school level."
    elif score < 9:
        return "Middle school level."
    elif score < 12:
        return "High school level."
    elif score < 16:
        return "College undergraduate level."
    else:
        return "Graduate/professional level."

def analyze_csv_readability(input_csv_path, output_csv_path, text_column="text"):
    # Load CSV
    df = pd.read_csv(input_csv_path)

    # Prepare new columns
    fre_scores = []
    fre_interps = []
    fkgl_scores = []
    fkgl_interps = []

    # Process each row
    for text in df[text_column]:
        try:
            fre = textstat.flesch_reading_ease(text)
            fkgl = textstat.flesch_kincaid_grade(text)

            fre_scores.append(fre)
            fre_interps.append(interpret_flesch_reading_ease(fre))
            fkgl_scores.append(fkgl)
            fkgl_interps.append(interpret_fk_grade_level(fkgl))
        except Exception as e:
            fre_scores.append(None)
            fre_interps.append("Error")
            fkgl_scores.append(None)
            fkgl_interps.append("Error")

    # Add columns to DataFrame
    df["Flesch_Reading_Ease"] = fre_scores
    df["FRE_Interpretation"] = fre_interps
    df["Flesch_Kincaid_Grade"] = fkgl_scores
    df["FKGL_Interpretation"] = fkgl_interps
    df["analysis"] = [
        f"Flesch Reading Ease: {fre:.2f} {fre_i} \n Flesch-Kincaid Grade Level: {fkgl:.2f} {fkgl_i}"
        if fre is not None and fkgl is not None else "Error computing readability."
        for fre, fre_i, fkgl, fkgl_i in zip(fre_scores, fre_interps, fkgl_scores, fkgl_interps)
    ]
    # Save to new CSV
    df.to_csv(output_csv_path, index=False)
    print(f"Readability analysis written to: {output_csv_path}")

# Example usage
analyze_csv_readability("/content/answer_gemini.csv", "Gemini_readability.csv", text_column="Answer")


Readability analysis written to: Gemini_readability.csv


In [18]:
import pandas as pd

def summarize_readability_from_existing(csv_path, model_name):
    df = pd.read_csv(csv_path)

    # Clean interpretation columns (if needed)
    df["FRE_Interpretation"] = df["FRE_Interpretation"].astype(str).str.strip()
    df["FKGL_Interpretation"] = df["FKGL_Interpretation"].astype(str).str.strip()

    summary = {
        "Model": model_name,
        "Average FRE": df["Flesch_Reading_Ease"].mean(),
        "Average FKGL": df["Flesch_Kincaid_Grade"].mean()
    }

    # Count FRE interpretation buckets
    fre_counts = df["FRE_Interpretation"].value_counts()
    for label in [
        "Very easy to read (5th grade).",
        "Easy to read (6th grade).",
        "Fairly easy to read (7th grade).",
        "Plain English (8th–9th grade).",
        "Fairly difficult (10th–12th grade).",
        "Difficult to read (college level).",
        "Very difficult (college graduate or higher)."
    ]:
        summary[f"# {label}"] = fre_counts.get(label, 0)

    # Count FKGL interpretation buckets
    fkgl_counts = df["FKGL_Interpretation"].value_counts()
    for label in [
        "Elementary school level.",
        "Middle school level.",
        "High school level.",
        "College undergraduate level.",
        "Graduate/professional level."
    ]:
        summary[f"# {label}"] = fkgl_counts.get(label, 0)

    return pd.DataFrame([summary])

# Run for both models
gpt_summary = summarize_readability_from_existing("GPT4o_readability.csv", "GPT-4o")
gem_summary = summarize_readability_from_existing("Gemini_readability.csv", "Gemini")

# Combine and print
summary_df = pd.concat([gpt_summary, gem_summary], ignore_index=True)
print(summary_df)

# Optional: Save to CSV
summary_df.to_csv("readability_summary_existing.csv", index=False)


    Model  Average FRE  Average FKGL  # Very easy to read (5th grade).  \
0  GPT-4o    12.710222     17.822441                                 0   
1  Gemini    11.136885     18.171366                                 1   

   # Easy to read (6th grade).  # Fairly easy to read (7th grade).  \
0                            0                                   0   
1                            0                                   0   

   # Plain English (8th–9th grade).  # Fairly difficult (10th–12th grade).  \
0                                 0                                      0   
1                                 0                                      2   

   # Difficult to read (college level).  \
0                                    52   
1                                   136   

   # Very difficult (college graduate or higher).  # Elementary school level.  \
0                                            1948                           0   
1                                      