In [7]:
import pandas as pd
import spacy
import random
from textblob import TextBlob
from nltk.corpus import wordnet

In [8]:
df = pd.read_csv("mapped_summaries_l3.csv")

In [9]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Initialize the model and tokenizer
model_name = 'minwhoo/bart-base-negative-claim-generation'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

In [11]:
filtered_df = df[df["book_num"] < 5]
filtered_df

Unnamed: 0,model_size,book_num,summary_sentence_num,summary_sentence,text_chunk
0,175b,0,0,"Uncle Pros is helping Laurella, his sick niece...","THE BIRTH OF A WOMAN-CHILD\n\n""Whose cradle's ..."
1,175b,0,1,"Laurella explains that her husband, Consadine,...",selfish--said she'd like to know how I was goi...
2,175b,0,2,Pros takes the cradle outside to get the last ...,"rich, broken light from the cavernous fireplac..."
3,175b,0,3,Mavity sends Bud and Mandy Ann to ask her fath...,"\n""Well, you go ask Pap to look in the green c..."
4,175b,0,4,Pros finishes repairing the cradle and places ...,"she wants--ain't ye, Pretty?""\n\nAnd, having m..."
...,...,...,...,...,...
8936,175b,4,1731,"Troy then leaves the room, and Bathsheba burst...","tones of wretchedness and coaxing, ""I only rep..."
8937,175b,4,1732,"Once there, she regained consciousness and ask...",who were as usual gazed upon by the burghers w...
8938,175b,4,1733,Tall answered the door and said that Laban was...,"keep a secret, Coggan?""\n\n""You've proved me, ..."
8939,175b,4,1734,Boldwood stood in the passage for a moment,who were as usual gazed upon by the burghers w...


In [12]:
import torch
from tqdm import tqdm

# Function to transform a sentence using the model
def transform_sentences(sentences):
    batch = tokenizer(sentences, max_length=1024, padding=True, truncation=True, return_tensors="pt")
    with torch.no_grad():
        out = model.generate(batch['input_ids'], num_beams=5)
    return tokenizer.batch_decode(out, skip_special_tokens=True)

# Process sentences in batches
max_tokens = 1024
batch_size = 10 # Adjust this batch size as needed
modified_sentences = []

for start_idx in tqdm(range(0, len(filtered_df), batch_size), total=len(filtered_df)/10, desc="Processing summary claims"):
    batch_sentences = filtered_df['summary_sentence'][start_idx:start_idx + batch_size].tolist()
    modified_sentences.extend(transform_sentences(batch_sentences))

# Add the modified sentences to the dataframe
filtered_df['summary_sentence'] = modified_sentences

Processing summary claims: 895it [17:55,  1.20s/it]                           
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['summary_sentence'] = modified_sentences


In [13]:
df = filtered_df[filtered_df['summary_sentence'].str.endswith('.')]


In [14]:
df.to_csv("negative_claims.csv", index=False)

In [15]:
df

Unnamed: 0,model_size,book_num,summary_sentence_num,summary_sentence,text_chunk
2,175b,0,2,Pros takes the cradle outside to get the first...,"rich, broken light from the cavernous fireplac..."
4,175b,0,4,Pros finishes repair the cradle and removes th...,"she wants--ain't ye, Pretty?""\n\nAnd, having m..."
5,175b,0,5,"Laurella decide to name the baby Johnnie, afte...",hit the name that should 'a' went with the clo...
7,175b,0,7,"Johnnie's mother, Laurella, is absent from the...","walk. Her mother would get up too, and that wa..."
8,175b,0,8,Laurella reluctantly refuses to let Johnnie go...,"and able.""\n\nAnd sighingly--yet light-hearted..."
...,...,...,...,...,...
8930,175b,4,1725,Liddy denies that this is true.,"don't care.""\n\n""Who are you, then, who can so..."
8934,175b,4,1729,"She stops to rest for a moment, and then start...",\n\nON CASTERBRIDGE HIGHWAY\n\n\nFor a conside...
8936,175b,4,1731,"Troy then stays in the room, and Bathsheba bur...","tones of wretchedness and coaxing, ""I only rep..."
8937,175b,4,1732,"Once there, she regained consciousness and ask...",who were as usual gazed upon by the burghers w...
