In [1]:
import pandas as pd
from transformers import BartTokenizer, BartForConditionalGeneration

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
def generate_and_add_summary_column(csv_file_path, input_column_name, target_column_name, model_name, output_column_name):
    # Load the CSV data into a DataFrame
    df = pd.read_csv(csv_file_path,nrows=10)

    # Initialize the BART tokenizer and model
    tokenizer = BartTokenizer.from_pretrained(model_name)
    model = BartForConditionalGeneration.from_pretrained(model_name)

    # Initialize an empty list to store generated summaries
    generated_summaries = []

    # Iterate through the rows of the DataFrame
    for index, row in df.iterrows():
        input_text = row[input_column_name]

        # Tokenize and generate summary
        input_ids = tokenizer(input_text, return_tensors="pt", max_length=1024, truncation=True, padding=True)
        summary_ids = model.generate(input_ids["input_ids"], num_beams=4, max_length=150, early_stopping=True)

        # Decode and append the generated summary
        summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
        generated_summaries.append(summary)

    # Add the generated summaries to the DataFrame as a new column
    df[output_column_name] = generated_summaries

    # Save the DataFrame with the new column for generated summaries to a new CSV file
    output_csv_file = "C:/Users/ASUS/Downloads/Assignment/output_with_generated_summaries.csv"
    df.to_csv(output_csv_file, index=False)

    print(f"Generated summaries added to '{output_column_name}' column and saved to {output_csv_file}")

In [2]:
csv_file_path = "C:/Users/ASUS/Downloads/Assignment/data.csv"
input_column_name = "Content"
target_column_name = "Summary"
model_name = "facebook/bart-large-cnn"
output_column_name = "Generated_Summary"

In [3]:
df = pd.read_csv(csv_file_path,nrows=10)


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  10 non-null     int64 
 1   ID          10 non-null     object
 2   Content     10 non-null     object
 3   Summary     10 non-null     object
 4   Dataset     10 non-null     object
dtypes: int64(1), object(4)
memory usage: 532.0+ bytes


In [5]:
# Initialize the BART tokenizer and model
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)

In [7]:
df['Content'].iloc[0]

"New York police are concerned drones could become tools for terrorists, and are investigating ways to stop potential attacks. Until now police haven't acknowledged drones as a potential weapon, but the NYPD has now said the technology has advanced enough that someone could use them to carry out an air assault using chemical weapons and firearms. Police want to develop technology which will allow them to take control of drones as well as scan the skies for them before major events. The NYPD says drones carrying explosives are the number one threat as they investigate ways to stop attacks . Deputy Chief Salvatore DiPace, left, was concerned about an incident last year where a drone was landed in front of German Chancellor Angela Merkel and 'could have took the chancellor and her people out' A drone which was flown over a packed football stadium in Manchester, England, just over a week ago, resulting in the suspected pilot being arrested . They are consulting with the military and member

In [8]:
df['Summary'].iloc[0]

'Police have investigated criminals who have rigged guns to drones .\nAnd are working with the military to develop technology to stop attacks .\nIncidents involving drones have drastically increased in New York City .\nLast year a drone was landed in front of German Chancellor Angela Merkel .'

In [9]:
input_text = df['Content'].iloc[0]

In [10]:
input_ids = tokenizer(input_text, return_tensors="pt", max_length=1024, truncation=True, padding=True)

In [11]:
summary_ids = model.generate(input_ids["input_ids"], num_beams=4, max_length=150, early_stopping=True)


In [12]:
# Decode and print the generated summary
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
print("Generated Summary:", summary)

Generated Summary: New York police are concerned drones could become tools for terrorists. They are investigating ways to stop potential attacks using drones. Police want to develop technology which will allow them to take control of drones as well as scan the skies for them before major events. The NYPD hasn't received any intelligence indicating there is an imminent threat.
