In [None]:
# Load the dataset
#dataset = pd.read_csv("/content/drive/MyDrive/BugSum-master/excel sheet/scores.csv")  # Replace "bug_reports.csv" with your file path

import pandas as pd
import numpy as np

# Assuming df is your DataFrame containing the dataset
# Placeholder parameters
beam_size = 4  # Adjust based on experimentation
word_limit = 150  # Adjust based on dataset and preference

# Helper function to calculate word count
def word_count(sentence):
    return len(sentence.split())

# Function to generate summary for a single bug report
def beam_search_summary(group_df, beam_size, word_limit):
    # Sort sentences within each bug report based on combined score in descending order
    sorted_sentences = group_df.sort_values(by='Combined_Score', ascending=False)

    # Initialize beam search variables
    Lnew = []
    LChosen = []  # This will hold the final selected sentences

    # Start beam search
    for _, row in sorted_sentences.iterrows():
        sentence = row['Sentence']
        combined_score = row['Combined_Score']

        if len(LChosen) < beam_size and word_count(sentence) <= word_limit:
            LChosen.append((sentence, combined_score))
            word_limit -= word_count(sentence)  # Update word limit
        else:
            # Check if adding this sentence improves any existing choice in LChosen
            for i, (chosen_sentence, chosen_score) in enumerate(LChosen):
                if combined_score > chosen_score and word_count(sentence) <= word_limit:
                    LChosen[i] = (sentence, combined_score)
                    word_limit -= word_count(sentence)  # Update word limit
                    break  # Exit loop after replacing to maintain beam size

            # If not replaced, consider extending the search
            if len(Lnew) < beam_size:
                Lnew.append((sentence, combined_score))

    # Select the set of sentences with the highest combined scores
    final_sentences = sorted(LChosen + Lnew, key=lambda x: x[1], reverse=True)[:beam_size]

    # Concatenate selected sentences to form the summary
    summary = ' '.join([sentence for sentence, _ in final_sentences])

    return summary

# Load your dataset
df = pd.read_csv('/content/drive/MyDrive/BugSum-master/excel sheet/scores.csv')

# Apply the function to each bug report group
df_grouped = df.groupby('BugNum').apply(lambda x: beam_search_summary(x, beam_size, word_limit))

# Convert the series to a dataframe for better visualization
summaries_df = df_grouped.reset_index(name='Summary')

# Display the first few summaries
print(summaries_df.head())


# Ensure the 'summaries_df' DataFrame contains the summaries you want to save
output_file_path = '/content/drive/MyDrive/BugSum-master/excel sheet/beam_bug_report_summaries.xlsx'  # Specify the path and file name for the output Excel file

# Save the DataFrame to an Excel file
summaries_df.to_excel(output_file_path, index=False)




   BugNum                                            Summary
0       1  please adjust the target milestone, so it does...
1       2  build id: m20110210-1200 > build id: m20110210...
2       3  build id: 2.0.1 one idea to resolve this probl...
3       4  the workaround would require adopters to know ...
4       5  it now modifies the list, releases the lock on...


In [None]:
import pandas as pd
from transformers import BartTokenizer, BartForConditionalGeneration

# Load your dataset
df = pd.read_csv('/content/drive/MyDrive/BugSum-master/excel sheet/scores.csv')  # Ensure to update this path to your dataset location

# Initialize BART tokenizer and model for summarization
tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')

def refine_summary_with_bart(selected_sentences):
    # Combine selected sentences into a single text block
    input_text = ' '.join(selected_sentences)
    # Encode the text to tensor inputs
    inputs = tokenizer.encode("summarize: " + input_text, return_tensors="pt", max_length=1024, truncation=True)
    # Generate summary output
    summary_ids = model.generate(inputs, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Function to select top sentences based on combined scores and refine the summary with BART
def generate_and_refine_summary(group_df, top_fraction=0.25):
    # Select top N% sentences based on combined scores
    num_sentences = max(1, int(len(group_df) * top_fraction))
    top_sentences_df = group_df.sort_values(by='Combined_Score', ascending=False).head(num_sentences)

    # Refine these sentences into a summary using BART
    refined_summary = refine_summary_with_bart(top_sentences_df['Sentence'].tolist())
    return refined_summary

# Apply the function to each bug report group and generate refined summaries
refined_summaries_df = df.groupby('BugNum').apply(generate_and_refine_summary).reset_index(name='Refined_Summary')

# Save or display the refined summaries
print(refined_summaries_df.head())

# Ensure the 'summaries_df' DataFrame contains the summaries you want to save
output_file_path = '/content/drive/MyDrive/BugSum-master/excel sheet/bart_bug_report_summaries.xlsx'  # Specify the path and file name for the output Excel file

# Save the DataFrame to an Excel file
refined_summaries_df.to_excel(output_file_path, index=False)




   BugNum                                    Refined_Summary
0       1  summarize: please adjust the target milestone,...
1       2  Bug is similar to what bug# 297039 describes. ...
2       3  A fix to the 2.0.1 and 3.0 streams. If any ser...
3       4   java.io.file is used regardless of whether th...
4       5  i will need the stack trace of both threads un...
