In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))


In [None]:
!pip install openai -q

In [None]:
import os
from tqdm import tqdm
from openai import OpenAI
import pandas as pd
import json

In [None]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
gpt_key = user_secrets.get_secret("GPT_key")


In [None]:
client = OpenAI(api_key = gpt_key)

In [None]:
def summarize(system, data):
    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": system},
            {"role": "user", "content": data}
        ],
        response_format={
            "type": "json_schema",
            "json_schema": {
                "name": "summarization_response",
                "strict": True,
                "schema": {
                    "type": "object",
                    "properties": {
                        "samples": {
                            "type": "array",
                            "items": {
                                "type": "object",
                                "properties": {
                                    "id": {
                                        "type": "integer"
                                    },
                                    "summary": {
                                        "type": "string"
                                    }
                                },
                                "required": [
                                    "id", "summary"
                                ],
                                "additionalProperties": False
                            }
                        }
                    },
                    "required": [
                        "samples"
                    ],
                    "additionalProperties": False
                }
            }
        }
    )

    return response.choices[0].message.content

In [None]:
output_prompt = ' You will output in JSON, where the samples are gonna be in a list.'

### bangla prompt

In [None]:
base_prompt = """
You are an expert in natural language processing and text summarization. Your task is to summarize Bengali text into a concise and meaningful version while preserving the key points and overall meaning.

Follow these steps:
1. Read the input Bengali text carefully.
2. Identify the main ideas, key points, and essential information.
3. Write a summary that is shorter than the original text but retains the core meaning.

You will receive an array of objects, each containing an 'id' and 'text'.
"""

bangla_system_prompt = base_prompt + output_prompt

### codemixed prompt

In [None]:
base_prompt = """
You are an expert in natural language processing and text summarization. Your task is to summarize codemixed Bengali text into a concise and meaningful version while preserving the key points and overall meaning.

Follow these steps:
1. Read the input Bengali text carefully.
2. Identify the main ideas, key points, and essential information.
3. Write a summary that is shorter than the original text but retains the core meaning.

You will receive an array of objects, each containing an 'id' and 'text'.
"""

codemixed_system_prompt = base_prompt + output_prompt


In [None]:
df = pd.read_csv('/kaggle/input/codemixed-further-experiments-dataset/further_experiments/xl_sum_80.csv')
# df = df[:5]
# test_df
df.rename(columns = {'id':'dataset_id'}, inplace = True)

In [None]:
df.columns

In [None]:
df = df.reset_index(drop=True)
df['id'] = df.index + 1

In [None]:
output_dir = 'output/gpt-4o'
os.makedirs(output_dir, exist_ok=True)

### helper functions

In [None]:
def process_text(text):
    return text.replace('\n', ' ')

In [None]:
chunk_size = 10

def process_chunks(column):
    chunks = [df[i:i + chunk_size] for i in range(0, len(df), chunk_size)]

    return chunks

In [None]:
def generate_chunk_list(chunks, column):
    chunk_list = []
    for i, chunk in enumerate(chunks):
        start_id = chunk.index[0] + 1
        end_id = chunk.index[-1] + 1
        
        user_prompt = "\n".join([f"{row['id']}: {process_text(row[column])}" for _, row in chunk.iterrows()])
        
        chunk_dict = {
            'chunk_name': f"{start_id}-{end_id}",
            'user_prompt': user_prompt
        }
        
        chunk_list.append(chunk_dict)
    return chunk_list

In [None]:
def llm_prediction(chunk_list):
    final_df = pd.DataFrame()
    for chunk in chunk_list:
        try:
            response = summarize(codemixed_system_prompt, chunk['user_prompt'])
            response_object = json.loads(response)
            output_df = pd.DataFrame(response_object['samples'])
            final_df = pd.concat([final_df, output_df], ignore_index = True)
            
    #         output_df.to_csv(output_file, index=False)
        except Exception as e:
            print(f"An error occurred: {e}" + chunk['chunk_name'])
    return final_df

In [None]:
def process_prediction(final_df):
    result_df = pd.merge(df, final_df, on = 'id', how = 'inner')
    result_df.dropna(inplace=True)
    return result_df

#### classification score

In [None]:
from sklearn.metrics import classification_report
def classification(y_true, y_pred):
    report = classification_report(y_true, y_pred, digits = 4)
    print(report)

#### generation score

In [None]:
!pip install torch -q
!pip install bert_score -q
!pip install torchmetrics -q
!pip install inltk -q

In [None]:
!pip install rouge_score -q

In [None]:
!pip install --upgrade nltk -q

In [None]:
!pip install evaluate -q

In [None]:
!pip install git+https://github.com/csebuetnlp/normalizer -q

In [None]:
import pandas as pd
from torchmetrics.text.bert import BERTScore
import torch
import bert_score
from bert_score import score
import nltk
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')
from nltk.translate.meteor_score import meteor_score
from nltk.tokenize import word_tokenize
from nltk.translate.bleu_score import sentence_bleu, corpus_bleu, SmoothingFunction, closest_ref_length, brevity_penalty
import math
from tqdm import tqdm
# from datasets import load_metric
# rouge_metric = load_metric("rouge")
import evaluate
metric = evaluate.load("rouge")
from normalizer import normalize
from nltk.util import ngrams

In [None]:
import nltk
nltk.download('punkt_tab')

In [None]:
def calculate_bleu(reference_sentence, candidate_sentence):
  reference = [word_tokenize(reference_sentence)]
  candidate = word_tokenize(candidate_sentence)

  smoothing_function = SmoothingFunction().method1

  bl = sentence_bleu(reference, candidate, weights=(1, 0.5, 0.33, 0.25), smoothing_function=SmoothingFunction().method1)

  hyp_len = len(candidate)
  ref_len = len(reference[0])
  closest_ref_len =  closest_ref_length(reference, hyp_len)
  bp = brevity_penalty(closest_ref_len, hyp_len)

  ratio = hyp_len/ref_len

  return bl, bp, ratio

# Example reference and candidate sentences in Bangla
reference_sentence = "আপনিও শুরু এখন, অ্যাপ নাম সাইজ কই।"
candidate_sentence = "আপনিও শুরু এখন, অ্যাপ নাম সাইজ কই।"
calculate_bleu(reference_sentence, candidate_sentence)

In [None]:
def run_bleu_script(df, true_col, pred_col):
    total_bleu = 0
    total_bp = 0
    total_ratio = 0
    for reference_sentence, candidate_sentence in zip(df[true_col], df[pred_col]):
      # print(reference_sentence)
      # print(candidate_sentence)
      bleu, bp, ratio = calculate_bleu(str(reference_sentence), str(candidate_sentence))
      total_bleu += bleu
      total_bp += bp
      total_ratio += ratio
    
    bleu = total_bleu/df.shape[0]
    bp = total_bp/df.shape[0]
    ratio = total_ratio/df.shape[0]
    print(f"bleu: {bleu}, bp: {bp}, ratio: {ratio}")
    return {'bleu': bleu,'bp': bp, 'ratio': ratio}

In [None]:
# Function to calculate ROUGE-1, ROUGE-2, and ROUGE-L scores for a pair of texts
def calculate_rouge_scores(reference_tokens, system_tokens):
    def lcs(X, Y):
        m, n = len(X), len(Y)
        dp = [[0] * (n + 1) for _ in range(m + 1)]

        for i in range(1, m + 1):
            for j in range(1, n + 1):
                if X[i - 1] == Y[j - 1]:
                    dp[i][j] = dp[i - 1][j - 1] + 1
                else:
                    dp[i][j] = max(dp[i - 1][j], dp[i][j - 1])

        return dp[m][n]

    # Calculate ROUGE-1 (unigram) scores
    reference_unigrams = set(reference_tokens)
    system_unigrams = set(system_tokens)
    overlap_rouge1 = len(reference_unigrams.intersection(system_unigrams))
    precision_rouge1 = overlap_rouge1 / len(system_unigrams)
    recall_rouge1 = overlap_rouge1 / len(reference_unigrams)
    r1_t = 1 if precision_rouge1 + recall_rouge1 == 0 else 0
    f1_rouge1 = 2 * (precision_rouge1 * recall_rouge1) / (precision_rouge1 + recall_rouge1 + r1_t)

    # Calculate ROUGE-2 (bigram) scores
    reference_bigrams = set(ngrams(reference_tokens, 2))
    system_bigrams = set(ngrams(system_tokens, 2))


    overlap_rouge2 = len(reference_bigrams.intersection(system_bigrams))
    if len(system_bigrams) == 0:
       precision_rouge2 = 0
    else:
      precision_rouge2 = overlap_rouge2 / len(system_bigrams)
    if len(reference_bigrams) == 0:
       recall_rouge2 = 0
    else:
      recall_rouge2 = overlap_rouge2 / len(reference_bigrams)
    r2_t = 1 if precision_rouge2 + recall_rouge2 == 0 else 1
    f1_rouge2 = 2 * (precision_rouge2 * recall_rouge2) / (precision_rouge2 + recall_rouge2 + r2_t)

    # Calculate ROUGE-L scores
    lcs_length = lcs(reference_tokens, system_tokens)
    precision_rougeL = lcs_length / len(system_tokens)
    recall_rougeL = lcs_length / len(reference_tokens)
    rL_t = 1 if precision_rougeL + recall_rougeL == 0 else 0
    f1_rougeL = 2 * (precision_rougeL * recall_rougeL) / (precision_rougeL + recall_rougeL + rL_t)

    return {
        'ROUGE-1 Precision': precision_rouge1,
        'ROUGE-1 Recall': recall_rouge1,
        'ROUGE-1 F1': f1_rouge1,
        'ROUGE-2 Precision': precision_rouge2,
        'ROUGE-2 Recall': recall_rouge2,
        'ROUGE-2 F1': f1_rouge2,
        'ROUGE-L Precision': precision_rougeL,
        'ROUGE-L Recall': recall_rougeL,
        'ROUGE-L F1': f1_rougeL,
    }

# Function to calculate the average of ROUGE scores for an array of text pairs
def calculate_average_rouge_scores(reference_texts, system_texts):
    total_scores = {
        'ROUGE-1 Precision': 0,
        'ROUGE-1 Recall': 0,
        'ROUGE-1 F1': 0,
        'ROUGE-2 Precision': 0,
        'ROUGE-2 Recall': 0,
        'ROUGE-2 F1': 0,
        'ROUGE-L Precision': 0,
        'ROUGE-L Recall': 0,
        'ROUGE-L F1': 0,
    }

    num_pairs = len(reference_texts)

    for i in range(num_pairs):
        reference_text = reference_texts[i]
        system_text = system_texts[i]

        reference_tokens = nltk.word_tokenize(reference_text)
        system_tokens = nltk.word_tokenize(system_text)

        scores = calculate_rouge_scores(reference_tokens, system_tokens)

        for key, value in scores.items():
            total_scores[key] += value

    # Calculate the average scores
    average_scores = {key: value / num_pairs for key, value in total_scores.items()}

    return average_scores

In [None]:
def calculate_rouge(df, true_col, pred_col):   
    # Example usage with an array of reference and system texts
    reference_texts = [normalize(str(sentence)) for sentence in df[true_col].tolist()]
    system_texts = [normalize(str(sentence)) for sentence in df[pred_col].tolist()]
    
    average_scores = calculate_average_rouge_scores(reference_texts, system_texts)
    print("Average ROUGE Scores:")
    print("-"*30)
    for key, value in average_scores.items():
        print(key + ": {:.4f}".format(value))

In [None]:
def calculate_scores_generation_task(result_df):
    true_col = 'summary_x'
    pred_col = 'summary_y'
    calculate_rouge(result_df, true_col, pred_col)
    run_bleu_script(result_df, true_col, pred_col)

## choose colum

In [None]:
columns = [ 'perturbed_text_words','perturbed_text_sentences', 'perturbed_text_salient']

In [None]:
for column in columns:
    print(column)
    chunks = process_chunks(column)
    chunk_list = generate_chunk_list(chunks, column)
    prediction_df = llm_prediction(chunk_list)
    result_df = process_prediction(prediction_df)
    calculate_scores_generation_task(result_df)

    final_output_file = os.path.join(output_dir, f"xl_sum_{column}.csv")
    result_df.to_csv(final_output_file, index=False)
    print(f"Final DataFrame saved to {final_output_file}")
    print()
    