In [1]:
import pandas as pd
from transformers import pipeline

def summarize_text(text, level="normal"):
    """
    Summarize text into different levels: easy, normal, difficult.
    Uses the Hugging Face summarization pipeline.
    """
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

    if level == "easy":
        max_length = 50
        min_length = 20
    elif level == "normal":
        max_length = 100
        min_length = 50
    elif level == "difficult":
        max_length = 200
        min_length = 100
    else:
        raise ValueError("Invalid level. Choose from 'easy', 'normal', or 'difficult'.")

    summary = summarizer(text, max_length=max_length, min_length=min_length, do_sample=False)
    return summary[0]['summary_text']

def summarize_csv(input_file, output_file):
    """
    Summarize content from a CSV file into three levels: Easy, Normal, and Difficult.
    The CSV file should contain a column named 'Content'.
    """
    # Read the CSV file
    df = pd.read_csv("/content/Conflict and Catastrophe Medicine.csv")

    if 'Content' not in df.columns:
        raise ValueError("Input CSV file must contain a 'Content' column.")

    summaries = []
    for idx, row in df.iterrows():
        content = row['Content']

        # Generate summaries for each level
        easy_summary = summarize_text(content, level="easy")
        normal_summary = summarize_text(content, level="normal")
        difficult_summary = summarize_text(content, level="difficult")

        summaries.append({
            "Original Content": content,
            "Easy Summary": easy_summary,
            "Normal Summary": normal_summary,
            "Difficult Summary": difficult_summary
        })

    # Convert the summaries into a DataFrame
    summary_df = pd.DataFrame(summaries)

    # Save to a new CSV file
    summary_df.to_csv(output_file, index=False)
    print(f"Summaries saved to {output_file}")

# Example usage
input_csv = "input_content.csv"  # Path to the input CSV file
output_csv = "summarized_output.csv"  # Path to save the summarized CSV file
summarize_csv(input_csv, output_csv)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Summaries saved to summarized_output.csv


In [2]:
!pip install textstat
!pip install transformers

Collecting textstat
  Downloading textstat-0.7.4-py3-none-any.whl.metadata (14 kB)
Collecting pyphen (from textstat)
  Downloading pyphen-0.17.0-py3-none-any.whl.metadata (3.2 kB)
Downloading textstat-0.7.4-py3-none-any.whl (105 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.1/105.1 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyphen-0.17.0-py3-none-any.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m35.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyphen, textstat
Successfully installed pyphen-0.17.0 textstat-0.7.4


In [3]:
import pandas as pd
from transformers import pipeline
import textstat

# Summarizer pipeline using Hugging Face
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def summarize_text(text, level="normal"):
    """
    Summarize text based on difficulty level.
    Levels: easy, normal, difficult.
    """
    if level == "easy":
        # Simplify: High-level overview
        summary = summarizer(text, max_length=50, min_length=20, do_sample=False)[0]['summary_text']
        simplified_summary = summarize_with_readability(summary, "simple")
        return simplified_summary

    elif level == "normal":
        # Moderate detail: Default summarization
        return summarizer(text, max_length=100, min_length=50, do_sample=False)[0]['summary_text']

    elif level == "difficult":
        # Add details: Advanced/technical terms
        return summarizer(text, max_length=200, min_length=100, do_sample=False)[0]['summary_text']

def summarize_with_readability(text, style="simple"):
    """
    Rephrase text to make it simpler or more complex based on readability.
    """
    if style == "simple":
        # Simplify: Remove jargon, reduce sentence complexity (e.g., GPT-based simplification)
        return f"Simple summary: {text}"
    elif style == "complex":
        # Add technical terms or detailed phrases
        return f"Detailed summary: {text}"

def calculate_readability(text):
    """
    Calculate readability metrics for the given text.
    Returns a dictionary with scores.
    """
    return {
        "Flesch Reading Ease": textstat.flesch_reading_ease(text), #Measures how easy a text is to read. A higher score indicates easier readability.
        "Gunning Fog Index": textstat.gunning_fog(text), #
        "SMOG Index": textstat.smog_index(text),
        "Automated Readability Index": textstat.automated_readability_index(text),
    }

def process_and_summarize_csv(input_file, output_file):
    """
    Process CSV with full content and summarize into three levels: Easy, Normal, and Difficult.
    """
    df = pd.read_csv("/content/Conflict and Catastrophe Medicine.csv")

    if 'Content' not in df.columns:
        raise ValueError("Input CSV must contain a 'Content' column.")

    summaries = []
    for idx, row in df.iterrows():
        content = row['Content']

        # Generate summaries for each level
        easy_summary = summarize_text(content, level="easy")
        normal_summary = summarize_text(content, level="normal")
        difficult_summary = summarize_text(content, level="difficult")

        # Calculate readability scores
        readability_scores = calculate_readability(content)

        summaries.append({
            "Original Content": content,
            "Easy Summary": easy_summary,
            "Normal Summary": normal_summary,
            "Difficult Summary": difficult_summary,
            **readability_scores
        })

    # Convert to DataFrame
    summary_df = pd.DataFrame(summaries)

    # Save to CSV
    summary_df.to_csv(output_file, index=False)
    print(f"Summaries saved to {output_file}")

# Example usage
input_csv = "input_content.csv"  # Input CSV file with a 'Content' column
output_csv = "summarized __output.csv"  # Output CSV file to save the summaries
process_and_summarize_csv(input_csv, output_csv)


Summaries saved to summarized __output.csv
