In [1]:
import pandas as pd
from transformers import AutoTokenizer
from tqdm.contrib.concurrent import process_map
import pickle
import torch
from transformers import pipeline
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import tqdm
from transformers import RobertaTokenizer, RobertaModel
from transformers import get_linear_schedule_with_warmup, AdamW
from tqdm import tqdm
from torch.optim.lr_scheduler import CosineAnnealingLR
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer


2024-07-08 14:23:14.844158: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
#IMPORTING DATASET
fp = '/data/nmamit-interns/grp3/new/train.csv'
df=pd.read_csv(fp, header=None)
col=['polarity', 'review']
df.columns = col
df.head()

Unnamed: 0,polarity,review
0,1,"Unfortunately, the frustration of being Dr. Go..."
1,2,Been going to Dr. Goldberg for over 10 years. ...
2,1,I don't know what Dr. Goldberg was like before...
3,1,I'm writing this review to give you a heads up...
4,2,All the food is great here. But the best thing...


In [5]:
#REMOVING sentences with length>512
mask = df['review'].apply(lambda review: len(review.split()) <= 512)
df = df[mask].reset_index(drop=True)
#USING CUDA
device = 'cuda' if torch.cuda.is_available() else 'cpu'
#SPLITTING DATASET
from sklearn.model_selection import train_test_split
train_df, val_df=train_test_split(df, test_size=0.2, random_state=42)

#TILL HERE 1

In [6]:
#TEXT CLEANING
def clean_text(text):
    # Convert to lowercase
    text = text.lower()   
    # Remove HTML tags
    text = re.sub(r'<.*?>', '', text)  
    # Remove URLs
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)
    # Remove special characters and numbers
    text = re.sub(r'[^a-zA-Z\s]', '', text)    
    # Tokenize
    tokens = word_tokenize(text)    
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words]    
    # Lemmatization
    lemmatizer = WordNetLemmatizer()
    tokens = [lemmatizer.lemmatize(word) for word in tokens]    
    # Join tokens back into string
    cleaned_text = ' '.join(tokens)    
    return cleaned_text

In [79]:
#CLEANING TEXT
tqdm.pandas()  # Use tqdm for progress bar
train_df['review'] = train_df['review'].progress_apply(clean_text)

100%|█████████████████████████████████████████████████████████████████████████| 439734/439734 [05:09<00:00, 1421.90it/s]


In [7]:
#TOKENIZATION FUNCTION
tok=RobertaTokenizer.from_pretrained('roberta-base')
def batch_tokenize(batch):
    return tok(
        batch,
        add_special_tokens=True,
        max_length=512,
        padding='max_length',
        truncation=True,
        return_attention_mask=True,
        return_tensors='pt'
    )

In [82]:
#TOKENIZATION
ei=[]
for a in tqdm(train_df['review'], desc='Tokenization'):
    e=batch_tokenize(a)
    ei.append(e)

Tokenization: 100%|███████████████████████████████████████████████████████████| 439734/439734 [05:07<00:00, 1428.35it/s]


In [83]:
#YOU HAVE TO EXECUTE THIS EVERYTIME
val_ei=[]
for a in tqdm(val_df['review'], desc='Tokenization'):
    e=batch_tokenize(a)
    val_ei.append(e)

Tokenization: 100%|███████████████████████████████████████████████████████████| 109934/109934 [01:18<00:00, 1404.26it/s]


In [141]:
#SAVING DATA
save_path = '/data/nmamit-interns/grp3/new/train_tokdata.pkl'
with open(save_path, 'wb') as f:
    pickle.dump((ei, train_df['polarity'].tolist()), f)
with open(save_path, 'wb') as f:
    pickle.dump((val_ei, val_df['polarity'].tolist()),f)

In [59]:
#FROM HERE
#Importing tokenized data from Drive
save_path = '/data/nmamit-interns/grp3/new/tokdata.pkl'
with open(save_path, 'rb') as f:
    ei, labels = pickle.load(f)

In [84]:
#Initializing inputs for training
inp=[]
mask=[]
for e in ei:
    inp.append(e['input_ids'])
    mask.append(e['attention_mask'])
labels=torch.tensor(train_df['polarity'].tolist(), dtype=torch.float32)-1


#Initializing inputs for validation
val_inp=[]
val_mask=[]
for e in val_ei:
    val_inp.append(e['input_ids'])
    val_mask.append(e['attention_mask'])
val_labels=torch.tensor(val_df['polarity'].tolist(), dtype=torch.float32)-1


In [85]:
print(len(labels), len(train_df), len(ei), len(lab), len(train_df))

439734 439734 439734 494701 439734


In [13]:
#creating a Dataset
class ReviewDataset(Dataset):
  def __init__(self, review, attention_mask, labels):
    self.review=review
    self.attention_mask=attention_mask
    self.labels=labels
  def __getitem__(self, idx):
    return self.review[idx], self.attention_mask[idx], self.labels[idx]
  def __len__(self):
    return len(self.review)
      
#train_ds=ReviewDataset(inp, mask, labels)


In [7]:
#Creating training and validation training loaders
train_loader=DataLoader(train_ds, batch_size=16, shuffle=True)


NameError: name 'train_ds' is not defined

In [97]:
save_dir = '/data/nmamit-interns/grp3/new/'
tl_path = os.path.join(save_dir, 'train_loader.pkl')
val_path = os.path.join(save_dir, 'val_loader.pkl')

os.makedirs(save_dir, exist_ok=True)

with open(tl_path, 'wb') as f:
    pickle.dump(train_loader, f)



In [98]:
with open(val_path, 'wb') as f:
    pickle.dump(val_loader, f)

In [5]:
import os
save_dir = '/data/nmamit-interns/grp3/new/'
tl_path = os.path.join(save_dir, 'train_loader.pkl')
val_path = os.path.join(save_dir, 'val_loader.pkl')
with open(tl_path, 'rb') as f:
    train_loader = pickle.load(f)
with open(val_path, 'rb') as f:
    val_loader=pickle.load(f)

In [63]:
#DIVIDING TRAINING LOADER BECAUSE TRAINING SIZE TOO LARGE
from torch.utils.data import DataLoader, Dataset, random_split

num_parts = 10
train_ds=train_loader.dataset
dataset_length = len(train_ds)
split_lengths = [dataset_length // num_parts] * num_parts
split_lengths[-1] += dataset_length % num_parts

# Split the dataset
train_subsets = random_split(train_ds, split_lengths)

train_loaders = [DataLoader(subset, batch_size=8, shuffle=True) for subset in train_subsets]

for i, loader in enumerate(train_loaders):
    print(f"DataLoader {i+1} length: {len(loader.dataset)}")

DataLoader 1 length: 43973
DataLoader 2 length: 43973
DataLoader 3 length: 43973
DataLoader 4 length: 43973
DataLoader 5 length: 43973
DataLoader 6 length: 43973
DataLoader 7 length: 43973
DataLoader 8 length: 43973
DataLoader 9 length: 43973
DataLoader 10 length: 43977


In [12]:
from torch import nn
class SentimentClassifier(nn.Module):
    def __init__(self, n_classes=2, dr=0.1):
        super(SentimentClassifier, self).__init__()
        self.roberta=RobertaModel.from_pretrained('roberta-base')
        #for p in self.roberta.parameters():
            #p.requires_grad=False
        self.dropout=nn.Dropout(dr)
        self.lstm=nn.LSTM(input_size=self.roberta.config.hidden_size, hidden_size=256, num_layers=2, batch_first=True, bidirectional=True)
        self.fc=nn.Linear(512,128)
        self.output=nn.Linear(128,n_classes)
        self.softmax=nn.Softmax(dim=1)
    def forward(self, inp, am):
        ro=self.roberta(input_ids=inp, attention_mask=am)
        lhs=ro.last_hidden_state
        x=self.dropout(lhs)
        lstm_out,_=self.lstm(x)
        lstm_out=lstm_out[:,-1,:]
        x=torch.flatten(lstm_out, start_dim=1)
        x=torch.relu(self.fc(x))
        logits=self.output(x)
        output=self.softmax(logits)
        return output
model=SentimentClassifier()


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [154]:
opt=torch.optim.AdamW(model.parameters(), lr=2e-5)
from torch.optim.lr_scheduler import ReduceLROnPlateau

sdr = ReduceLROnPlateau(
    opt,
    mode='min',           # Since we're monitoring loss, we want to minimize it
    factor=0.1,           # Multiply LR by this factor when reducing
    patience=100,         # Number of batches with no improvement after which LR will be reduced
    threshold=1e-3,       # Threshold for measuring the new optimum
    threshold_mode='rel', # Interpret threshold as a relative change
    cooldown=0,           # Number of batches to wait before resuming normal operation after lr has been reduced
    min_lr=1e-6,          # Lower bound on the learning rate
    verbose=True          # Print a message when the LR is reduced
)
loss_fn=torch.nn.CrossEntropyLoss()


In [155]:
import torch
cp = '/data/nmamit-interns/grp3/new/roberta_bilstm.pt'

# Load the checkpoint
checkpoint = torch.load(cp)  # 'cp' is the path to your saved checkpoint file

# Extract the saved data
epoch = checkpoint['epoch']
model_state_dict = checkpoint['model_state_dict']
optimizer_state_dict = checkpoint['optimizer_state_dict']
scheduler_state_dict = checkpoint['scheduler_state_dict']
loss = checkpoint['loss']

# Load the model state
model.load_state_dict(model_state_dict)

# Load the optimizer state
opt.load_state_dict(optimizer_state_dict)

# Load the scheduler state
#scheduler.load_state_dict(scheduler_state_dict)

# Now your model, optimizer, and scheduler are restored to the saved state

In [91]:
model.to('cpu')

SentimentClassifier(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm):

In [156]:
def training(tl,cp):
    lossi=[]
    checkpoint = torch.load(cp)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.to(device)
    avg_loss=0
    for e in range(1):
        model.train()
        tot_loss=0
        tb=tqdm(tl, desc=f"Epoch {e+1} training: ")
        for i,batch in enumerate(tb):
            if i==5497:
                break
            opt.zero_grad()
            inp_id=batch[0].squeeze(1).to(device)
            am=batch[1].squeeze(1).to(device)
            lab=batch[2].to(device)
            output=model(inp_id, am)
            l1_norm = sum(p.abs().sum() for p in model.parameters())
            l2_norm = sum(p.pow(2.0).sum() for p in model.parameters())
            elastic_net_reg = 0.5 * l1_norm + 0.5 * l2_norm
        # Add the regularization term to your existing loss

            loss=loss_fn(output, lab.long())
            loss = loss + 0.2 * elastic_net_reg
            lossi.append(loss)
            tot_loss+=loss
            loss.backward()
            opt.step()
            tb.set_postfix({'loss':f'{loss.item()}'})
        tb.close()
        sdr.step()
        avg_loss=tot_loss/(len(tl)*16)
        print('Loss : ', avg_loss)
        
    return avg_loss

In [163]:
torch.cuda.empty_cache()

In [157]:
for b in train_loaders[0]:
    print(b[0].squeeze(1).shape)
    break

torch.Size([8, 512])


In [160]:
save_path = '/data/nmamit-interns/grp3/new/roberta_bilstm.pt'
l=training(train_loaders[1], save_path)
l.item()
#TILL HERE

Epoch 1 training:   0%|                                               | 6/5497 [00:05<1:29:26,  1.02it/s, loss=835422.0]


KeyboardInterrupt: 

In [164]:
import torch

def print_gpu_memory_usage():
    if torch.cuda.is_available():
        # Get the current GPU device
        device = torch.cuda.current_device()
        
        # Get the total and allocated memory
        total_memory = torch.cuda.get_device_properties(device).total_memory
        allocated_memory = torch.cuda.memory_allocated(device)
        reserved_memory = torch.cuda.memory_reserved(device)
        
        print(f"Total GPU memory: {total_memory / (1024**3):.2f} GB")
        print(f"Allocated GPU memory: {allocated_memory / (1024**3):.2f} GB")
        print(f"Reserved GPU memory: {reserved_memory / (1024**3):.2f} GB")
    else:
        print("No GPU available.")

# Example usage
print_gpu_memory_usage()


Total GPU memory: 79.14 GB
Allocated GPU memory: 34.88 GB
Reserved GPU memory: 36.17 GB


In [None]:
torch.save({
            'epoch':e,
            'model_state_dict':model.state_dict(),
            'optimizer_state_dict':opt.state_dict(),
            'scheduler_state_dict':sdr.state_dict(),
            'loss':l,
        },temp_path)

In [None]:
temp_path= '/data/nmamit-interns/grp3/new/temp_roberta_bilstm.pt'


In [16]:
tqdm.pandas()
val_df['review']=val_df['review'].progress_apply(clean_text)
#YOU HAVE TO EXECUTE THIS EVERYTIME
val_ei=[]
for a in tqdm(val_df['review'], desc='Tokenization'):
    e=batch_tokenize(a)
    val_ei.append(e)
val_inp=[]
val_mask=[]
for e in val_ei:
    val_inp.append(e['input_ids'])
    val_mask.append(e['attention_mask'])
val_labels=torch.tensor(val_df['polarity'].tolist(), dtype=torch.float32)-1
val_ds=ReviewDataset(val_inp, val_mask, val_labels)
val_loader=DataLoader(val_ds, batch_size=16, shuffle=True)

NameError: name 'clean_text' is not defined

In [17]:
#DIVIDING VALIDAITON LOADER
from torch.utils.data import DataLoader, Dataset, random_split

num_parts = 8

dataset_length = len(val_ds)
split_lengths = [dataset_length // num_parts] * num_parts
split_lengths[-1] += dataset_length % num_parts

# Split the dataset
val_subsets = random_split(val_ds, split_lengths)

val_loaders = [DataLoader(subset, batch_size=16, shuffle=True) for subset in val_subsets]

for i, loader in enumerate(val_loaders):
    print(f"DataLoader {i+1} length: {len(loader.dataset)}")

NameError: name 'val_ds' is not defined

In [18]:
#TILL HERE
def evaluate(validation_loader, checkpoint_path=''):
    m = model.to(device)
    m.eval()
    tot_loss = 0
    correct_predictions = 0
    total_predictions = 0

    with torch.no_grad():
        tb = tqdm(validation_loader, desc="Evaluating: ")
        for batch in tb:
            inp_id = batch[0].squeeze(1).to(device)
            am = batch[1].squeeze(1).to(device)
            lab = batch[2].to(device)

            output = m(inp_id, am)
            loss = loss_fn(output, lab.long())
            tot_loss += loss.item()
            
            _, predicted_labels = torch.max(output, 1)
            correct_predictions += (predicted_labels == lab).sum().item()
            total_predictions += lab.size(0)

        avg_loss = tot_loss / len(validation_loader)
        accuracy = correct_predictions / total_predictions

        print(f'Validation Loss: {avg_loss}')
        print(f'Validation Accuracy: {accuracy * 100:.2f}%')

    return avg_loss, accuracy

# Example usage
# avg_loss, accuracy = evaluate(validation_loader, checkpoint_path)
avg_loss, accuracy=evaluate(val_loader)

Evaluating: 100%|███████████████████████████████████████████████████████████████████| 859/859 [1:20:24<00:00,  5.62s/it]

Validation Loss: 0.45928381416378533
Validation Accuracy: 85.40%





In [None]:
#NO USEEEEEEEEEEEEEE
def validation():
    model.eval()
    tot_val_loss=0
    inp_id=batch[1][0].to(torch.long).to(device)
    mask=batch[1][1].to(torch.long).to(device)
    labels=batch[1][2].to(torch.long).to(device)
    outputs=m(inp_id, attention_mask=mask, labels=labels)
    loss=outputs.loss
    tot_val_loss+=loss.item()
    avg_val_loss=tot_val_loss/len(batch[1])
    print(f'Epoch {e+1} val loss: {avg_val_loss}')

In [8]:
#IMPORTING DATASET
fp = '/data/nmamit-interns/grp3/new/test.csv'
test_df=pd.read_csv(fp, header=None)
col=['polarity', 'review']
test_df.columns = col
test_df.head()

Unnamed: 0,polarity,review
0,2,"Contrary to other reviews, I have zero complai..."
1,1,Last summer I had an appointment to get new ti...
2,2,"Friendly staff, same starbucks fair you get an..."
3,1,The food is good. Unfortunately the service is...
4,2,Even when we didn't have a car Filene's Baseme...


In [9]:
#REMOVING sentences with length>512
mask = test_df['review'].apply(lambda review: len(review.split()) <= 512)
test_df = test_df[mask].reset_index(drop=True)
#USING CUDA
device = 'cuda' if torch.cuda.is_available() else 'cpu'
#SPLITTING DATASET
from sklearn.model_selection import train_test_split
_, test_df=train_test_split(test_df, test_size=0.1, random_state=42)

#TILL HERE 1

In [17]:
test_ei=[]
for a in tqdm(test_df['review'], desc='Tokenization'):
    e=batch_tokenize(a)
    test_ei.append(e)

Tokenization: 100%|████████████████████████████████████████████████████████████████| 3732/3732 [00:04<00:00, 905.14it/s]


In [18]:
save_path = '/data/nmamit-interns/grp3/new/test_tokdata.pkl'
with open(save_path, 'wb') as f:
    pickle.dump((test_ei, test_df['polarity'].tolist()), f)

In [19]:
inp=[]
mask=[]
for e in test_ei:
    inp.append(e['input_ids'])
    mask.append(e['attention_mask'])
labels=torch.tensor(test_df['polarity'].tolist(), dtype=torch.float32)-1


In [21]:
test_ds=ReviewDataset(inp, mask, labels)
test_loader=DataLoader(test_ds, batch_size=16, shuffle=True)

In [22]:
save_dir = '/data/nmamit-interns/grp3/new/'
test_path = os.path.join(save_dir, 'test_loader.pkl')

os.makedirs(save_dir, exist_ok=True)

with open(test_path, 'wb') as f:
    pickle.dump(test_loader, f)



In [14]:
import os
save_dir = '/data/nmamit-interns/grp3/new/'
test_path = os.path.join(save_dir, 'test_loader.pkl')
with open(test_path, 'rb') as f:
    test_loader=pickle.load(f)

In [43]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def predict_and_evaluate(test_loader):
    # Load the trained model from the checkpoint
    checkpoint = torch.load(cp)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.to(device)
    model.eval()
    
    all_labels = []
    all_predictions = []
    
    with torch.no_grad():
        tb = tqdm(test_loader, desc="Predicting: ")
        for batch in tb:
            inp_id = batch[0].squeeze(1).to(device)
            am = batch[1].squeeze(1).to(device)
            lab = batch[2].to(device)

            output = model(inp_id, am)
            _, predicted_labels = torch.max(output, 1)
            
            all_labels.extend(lab.cpu().numpy())
            all_predictions.extend(predicted_labels.cpu().numpy())
    
    accuracy = accuracy_score(all_labels, all_predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_predictions, average='weighted')
    
    print(f'Test Accuracy: {accuracy * 100:.2f}%')
    print(f'Test Precision: {precision * 100:.2f}%')
    print(f'Test Recall: {recall * 100:.2f}%')
    print(f'Test F1 Score: {f1 * 100:.2f}%')

    return accuracy, precision, recall, f1


In [44]:
acc, precision, recall, f1=predict_and_evaluate(test_loader)

Predicting: 100%|█████████████████████████████████████████████████████████████████████| 234/234 [03:06<00:00,  1.25it/s]

Test Accuracy: 85.88%
Test Precision: 86.12%
Test Recall: 85.88%
Test F1 Score: 85.84%





In [37]:
index=111
review=test_df.iloc[index]['review']
label=test_df.iloc[index]['polarity']-1
sent='Positive' if label==1 else 'Negative'
print(review)
print('Actual sentiment :',sent )
cleaned_text=clean_text(review)
tokenized_text=batch_tokenize(cleaned_text)
model.to('cpu')
model.eval()
with torch.no_grad():
    outputs = model(tokenized_text['input_ids'], tokenized_text['attention_mask'])

predicted_class = torch.argmax(outputs, dim=1).item()
sentiment = "Positive" if predicted_class == 1 else "Negative"
print(f"The predicted sentiment is: {sentiment}")

Yes, loud music but who cares when it is GREAT music? Looking around, everyone was chair dancing, singing along, having a blast. Food was PERFECT!! Fabulous Vegas dinner experience- can't wait to go back to eat here again!  And yes, only  2 bathrooms, but walk a very short distance and take care of it....
Actual sentiment : Positive
The predicted sentiment is: Positive


In [24]:
wrongs={'review':[],'actual':[],'predicted':[]}
for _,a in tqdm(enumerate(test_df.iterrows()), desc='Checks'):
    review=a[1]['review']
    label=a[1]['polarity']-1
    cleaned_text=clean_text(review)
    tokenized_text=batch_tokenize(cleaned_text)
    model.to('cpu')
    model.eval()
    with torch.no_grad():
        outputs = model(tokenized_text['input_ids'], tokenized_text['attention_mask'])
    predicted_class = torch.argmax(outputs, dim=1).item()
    if predicted_class!=label:
        wrongs['review'].append(review)
        wrongs['actual'].append(label)
        wrongs['predicted'].append(predicted_class)


Checks: 3732it [13:50,  4.50it/s]


In [27]:
import csv
filename = '/data/nmamit-interns/grp3/new/wrongs_output.csv'

# Write the dictionary to a CSV file
with open(filename, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    
    # Write the header
    writer.writerow(wrongs.keys())
    
    # Write the rows
    writer.writerows(zip(*wrongs.values()))

print(f'Dictionary saved to {filename}')

Dictionary saved to /data/nmamit-interns/grp3/new/wrongs_output.csv


In [31]:
for i in range(1818):
    print('Review : ',wrongs['review'][i])
    print('Actual : ', wrongs['actual'][i], '    Predicted : ',wrongs['predicted'][i])
    print()

Review :  WOW. THE ONLY THING THAT IS KEEPING THESE GUYS OPEN IS THEIR LOCATION AND FOOD NETWORK FAME.\n\nMost of their customers are mainly gonna be tourists and first timers. Located in a busy part of the Fremont Street Experience, the name of the restaurant and the words \"350 Pounds and Over Eat Free\" are definite eye catchers.\n\nThe Heart Attack Grill or \"HAG\" as it's come to be known, has actually lived up to it's namesake with actual heart attack incidents. As a matter of fact, while I am typing this review, I find out the unofficial spokesperson for HAG has died from you guessed it, an apparent heart attack. \nhttp://www.huffingtonpost.com/huff-wires/20130212/nv-heart-attack-grill-heart-attack/?utm_hp_ref=politics&ir=politics\n\nThis place must be bad because I have gone at least 3 paragraphs before even having to mention their food. Ok, quick descriptions. They cook their beef patties and fries with lard. Their milkshakes have actual butter in them. Literally they have a l

In [111]:
text="Despite the CEO's enthusiastic claims of 'record-breaking performance', savvy investors noted the clever accounting tricks used to inflate numbers, leading to a brief stock surge before the inevitable market correction."
cleaned_text=clean_text(text)
tokenized_text=batch_tokenize(cleaned_text)
model.to('cpu')
model.eval()
with torch.no_grad():
    outputs = model(tokenized_text['input_ids'], tokenized_text['attention_mask'])

predicted_class = torch.argmax(outputs, dim=1).item()
sentiment = "Positive" if predicted_class == 1 else "Negative"
print(f"The predicted sentiment is: {sentiment}")

The predicted sentiment is: Negative
