# Requirements

In [None]:
from google.colab.drive import mount
mount("/content/drive")

Mounted at /content/drive


In [None]:
!pip install datasets transformers evaluate --upgrade accelerate

Collecting datasets
  Downloading datasets-2.13.1-py3-none-any.whl (486 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m486.2/486.2 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting transformers
  Downloading transformers-4.31.0-py3-none-any.whl (7.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.4/7.4 MB[0m [31m20.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting evaluate
  Downloading evaluate-0.4.0-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.4/81.4 kB[0m [31m10.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting accelerate
  Downloading accelerate-0.21.0-py3-none-any.whl (244 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.2/244.2 kB[0m [31m28.5 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.7,>=0.3.0 (from datasets)
  Downloading dill-0.3.6-py3-none-any.whl (110 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.5/110.5 kB[0m [31m15.

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, TrainingArguments, AutoModelForTokenClassification, Trainer, AutoModelForSequenceClassification, GPT2LMHeadModel
from evaluate import load
from torch.utils.data import DataLoader, Dataset
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
import torch
import pandas as pd
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import re
from sklearn.metrics import f1_score, precision_score, recall_score

# Aspect Extraction

In [None]:
#Root of the dataset path
root = "/content/drive/MyDrive/NLP Projects"

In [None]:
#Downloading dataset
raw_datasets  = load_dataset("csv", data_files = f"{root}/absa_dataset.csv", split = "train").train_test_split(test_size = 0.2, seed = 42)

Downloading and preparing dataset csv/default to /root/.cache/huggingface/datasets/csv/default-a1a751683233ae68/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/default-a1a751683233ae68/0.0.0/eea64c71ca8b46dd3f537ed218fc9bf495d5707789152eb2764f5c78fa66d59d. Subsequent calls will reuse this data.


In [None]:
# Hugginface
checkpoint = "bert-base-uncased"

In [None]:
# Download Tokenizer
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [None]:
#Function that brings the samples in the data set into the format desired by the model

def tokenize_function(examples):
    tokenized = tokenizer(examples["text"], max_length = 128, padding = "max_length", truncation = True)
    start_list = [start + 1 for start in examples["start"]]
    end_list = [end + 1 for end in examples["end"]]
    return {"input_ids": tokenized.input_ids, "token_type_ids": tokenized.token_type_ids, "attention_mask": tokenized.attention_mask,
            "labels": [[1 if (i >= start and i < end) else 0 for i in range(128)] for start, end in zip(start_list, end_list)],
            "start" : start_list,
            "end" : end_list}

In [None]:
# Adjusting the data setting to the format desired by the model
tokenized_datasets = raw_datasets.map(tokenize_function, batched = True)

Map:   0%|          | 0/2745 [00:00<?, ? examples/s]

Map:   0%|          | 0/687 [00:00<?, ? examples/s]

In [None]:

train_aspect_tokens = [tokenizer.tokenize(text)[tokenized_datasets["train"]["start"][i] - 1: tokenized_datasets["train"]["end"][i] - 1] for i, text in enumerate(raw_datasets["train"]["text"])]
test_aspect_tokens = [tokenizer.tokenize(text)[tokenized_datasets["test"]["start"][i] - 1: tokenized_datasets["test"]["end"][i] - 1] for i, text in enumerate(raw_datasets["test"]["text"])]
aspect_tokens = train_aspect_tokens + test_aspect_tokens

In [None]:
training_args = TrainingArguments("output", evaluation_strategy = "epoch", num_train_epochs = 5, save_strategy = "epoch",
                                  load_best_model_at_end = True)

In [None]:
model = AutoModelForTokenClassification.from_pretrained(checkpoint, num_labels = 2, output_hidden_states = True)

Downloading model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
def compute_metrics(eval_pred):
    metric = load("f1")
    logits, labels = eval_pred
    predictions = logits[0].argmax(-1).flatten()
    labels = labels.flatten()
    x = metric.compute(predictions = predictions, references = labels, average = None)
    x["f1"] = list(x["f1"])
    return x

In [None]:
trainer = Trainer(model,
                  training_args,
                  train_dataset = tokenized_datasets["train"],
                  eval_dataset = tokenized_datasets["test"],
                  compute_metrics = compute_metrics)

In [None]:
trainer.train()



Epoch,Training Loss,Validation Loss,F1
1,No log,0.012921,"[0.9975691804336306, 0.8139329805996472]"
2,0.019600,0.013753,"[0.9976777821955618, 0.8271128271128272]"
3,0.005000,0.016588,"[0.9977929524303454, 0.836114676936243]"
4,0.005000,0.019167,"[0.997809672027206, 0.8404701931150295]"
5,0.001200,0.021877,"[0.9978213758760605, 0.8403716216216216]"


Downloading builder script:   0%|          | 0.00/6.77k [00:00<?, ?B/s]

Trainer is attempting to log a value of "[0.9975691804336306, 0.8139329805996472]" of type <class 'list'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.9976777821955618, 0.8271128271128272]" of type <class 'list'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.9977929524303454, 0.836114676936243]" of type <class 'list'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.997809672027206, 0.8404701931150295]" of type <class 'list'> for key "eval/f1" as a scalar. This invocation of Tensorboard's writer.add_scalar() is incorrect so we dropped this attribute.
Trainer is attempting to log a value of "[0.9978213758760605, 0.840371

TrainOutput(global_step=1720, training_loss=0.007539260855247808, metrics={'train_runtime': 462.9918, 'train_samples_per_second': 29.644, 'train_steps_per_second': 3.715, 'total_flos': 896574496550400.0, 'train_loss': 0.007539260855247808, 'epoch': 5.0})

In [None]:
def calculate_scores(predictions, labels, class_label):
    true_positives = 0
    false_positives = 0
    false_negatives = 0

    for pred, true in zip(predictions, labels):
        pred_entities = extract_entities(pred, class_label)
        true_entities = extract_entities(true, class_label)

        for entity in pred_entities:
            if entity in true_entities:
                true_positives += 1
            else:
                false_positives += 1

        for entity in true_entities:
            if entity not in pred_entities:
                false_negatives += 1

    precision = true_positives / (true_positives + false_positives)
    recall = true_positives / (true_positives + false_negatives)
    f1 = (2 * precision * recall) / (precision + recall)

    return precision, recall, f1


def extract_entities(seq, class_label):
    entities = []
    start = None

    for i in range(len(seq)):
        if seq[i] == class_label:
            if start is None:
                start = i
        else:
            if start is not None:
                entities.append((start, i - 1))
                start = None

    if start is not None:
        entities.append((start, len(seq) - 1))

    return entities

In [None]:
tokenized_datasets = tokenized_datasets.remove_columns(["text"])
tokenized_datasets.set_format("torch")

test_dataloader = DataLoader(tokenized_datasets["test"], batch_size = 32)
scores =  []
for batch in test_dataloader:
    input_ids, attention_mask = batch["input_ids"].to("cuda"), batch["attention_mask"].to("cuda")
    predictions = model(input_ids, attention_mask = attention_mask).logits.argmax(-1)
    precision, recall, f1 = calculate_scores(predictions, batch["labels"], 1)
    scores.append([precision, recall, f1])

In [None]:
scores = np.array(scores).mean(0)

In [None]:
scores

array([0.75020013, 0.721875  , 0.73508541])

In [None]:
model.save_pretrained("/content/drive/MyDrive/emre_asena/bert_aspect")

# Sentiment Analysis v1 (Archive)

In [None]:
# Function that tries to find the aspect using the predictions of the token classification model
def find_aspects(x, texts):
    x = x.cpu().numpy()
    aspects = []
    for xx, text in zip(x, texts):
        num = int.from_bytes(np.packbits(xx), byteorder = "little")
        binary = bin(num)[2:]
        ones_sequences = binary.split('0')
        max_length = len(max(ones_sequences, key=len))
        max_sequences = [i for i, sequence in enumerate(ones_sequences) if len(sequence) == max_length]
        old_length = len(aspects)
        tokenized = tokenizer.tokenize(text)
        for index in max_sequences:
          start_index = sum(len(seq) for seq in ones_sequences[:index]) + index
          end_index = start_index + len(ones_sequences[index])
          if tokenized[start_index - 1 : end_index - 1] in aspect_tokens:
            aspects.append([start_index, end_index, tokenized[start_index - 1 : end_index - 1]])
            break
        if len(aspects) == old_length:
          start_index = sum(len(seq) for seq in ones_sequences[max_sequences[0]]) + max_sequences[0]
          end_index = start_index + len(ones_sequences[max_sequences[0]])
          aspects.append([start_index, end_index, tokenized[start_index - 1 : end_index - 1]])

    return aspects

In [None]:
# Convert the contents of the dataset to torch format and create DataLoader for batch operations
tokenized_datasets.set_format("torch")
train_dataloader = DataLoader(tokenized_datasets["train"], batch_size = 32)
test_dataloader = DataLoader(tokenized_datasets["test"], batch_size = 32)

In [None]:
c = 0
for batch in test_dataloader:
    input_ids, attention_mask = batch["input_ids"].to("cuda"), batch["attention_mask"].to("cuda")
    predictions = model(input_ids, attention_mask = attention_mask).logits.argmax(-1)
    aspect_predictions = find_aspects(predictions, batch["text"])
    for pred, y in zip(aspect_predictions, test_aspect_tokens):
        if pred[2] == y:
            c += 1
c / len(tokenized_datasets["test"])

KeyError: ignored

In [None]:
indices = [[], []]
for i, dataloader in enumerate([train_dataloader, test_dataloader]):
  with open(f"aspect_vectors_{i}","wb+") as f:
    all_batches = list()
    for batch in dataloader:
      input_ids, attention_mask = batch["input_ids"].to("cuda"), batch["attention_mask"].to("cuda")
      output = model(input_ids, attention_mask = attention_mask)
      predictions = output.logits.argmax(-1)
      indices[i].extend([found[:2] for found in find_aspects(predictions, batch['text'])])
      aspect_vectors = list()
      for j,sentence in enumerate(output.hidden_states[-1]): #sentence dediğimiz şey cümledeki tüm kelimelerin tokenları boyutu (128,768) 128 token sayım. her token uzunluğu 768
        aspect_vectors.append(sentence[indices[i][len(indices[i]) + j - 32][0]: indices[i][len(indices[i]) + j - 32][1]].mean(0).tolist())
      all_batches.extend(aspect_vectors)
    np.save(f, np.array(all_batches))

In [None]:
# Boyutları (N, 768), N: örnek sayısı, 768: vektör uzunluğu
x_train = np.load("aspect_vectors_0")
x_test = np.load("aspect_vectors_1")

In [None]:
y_train = np.array(raw_datasets["train"]["sentiment"])
y_test = np.array(raw_datasets["test"]["sentiment"])

In [None]:
def not_found_remover(X, y):
  not_found = list()
  for i in range(len(X)):
    if np.isnan(X[i]).sum() > 0:
      not_found.append(i)
  return np.delete(X, not_found, axis = 0), np.delete(y, not_found), not_found

In [None]:
x_train, y_train, train_not_found = not_found_remover(x_train, y_train)
x_test, y_test, test_not_found = not_found_remover(x_test, y_test)

In [None]:
classifier = LogisticRegression().fit(x_train,y_train)
print("Train Score: ", classifier.score(x_train, y_train))
print("Test Score: ", classifier.score(x_test, y_test))

In [None]:
ANN = MLPClassifier(hidden_layer_sizes = (200,150,80), random_state=1, max_iter=1000, alpha = 0.01, learning_rate_init = 0.01).fit(x_train, y_train)
print("Accuracy on Traing set: ",ANN.score(x_train, y_train))
print("Accuracy on Testing set: ",ANN.score(x_test, y_test))

In [None]:
def predict(x):
    inputs = tokenizer([x], padding = "max_length", max_length = 32, truncation = True, return_tensors = "pt").to("cuda")
    output = model(**inputs)
    aspect_location = find_aspects(output.logits.argmax(-1), [x])[0][:2]
    if not aspect_location[0] or not aspect_location[1] or aspect_location[0] >= aspect_location[1]:
      return None, None
    aspect = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][aspect_location[0]: aspect_location[1]])
    aspect_vector = output.hidden_states[-1][0][aspect_location[0]: aspect_location[1]].mean(0)
    if np.isnan(aspect_vector).sum() > 0:
        return aspect, None
    sentiment = classifier.predict(aspect_vector.cpu().detach().numpy().reshape(1, -1))
    return aspect, sentiment

# Sentiment Analysis v2

In [None]:
tokenized_datasets = tokenized_datasets.rename_columns({"labels": "aspect_labels", "sentiment": "labels"})

In [None]:
sentiment_training_args = TrainingArguments("sentiment_output", evaluation_strategy = "epoch", num_train_epochs = 5, save_strategy = "no",
                                            load_best_model_at_end = False)

In [None]:
# Sentiment analysis model(BERT + Linear Classifier)
sentiment_model = AutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels = 3)

In [None]:
def sentiment_compute_metrics(eval_pred):
    p = load("precision")
    r = load("recall")
    f1 = load("f1")
    logits, labels = eval_pred
    predictions = logits.argmax(-1)
    ps = p.compute(predictions = predictions, references = labels, average = "macro")
    rs = r.compute(predictions = predictions, references = labels, average = "macro")
    fs = f1.compute(predictions = predictions, references = labels, average = "macro")
    return {"p": ps, "r": rs, "f": fs}

In [None]:
sentiment_trainer = Trainer(sentiment_model,
                            sentiment_training_args,
                            train_dataset = tokenized_datasets["train"],
                            eval_dataset = tokenized_datasets["test"],
                            compute_metrics = sentiment_compute_metrics)

In [None]:
sentiment_trainer.train()

In [None]:
id2sentiment = {0: "negatif", 1: "nötr", 2: "pozitif"}

In [None]:
def predict(x):
    inputs = tokenizer([x], return_tensors = "pt").to("cuda")
    aspect = model(**inputs).logits.argmax(-1)[0]
    sentiment = id2sentiment[sentiment_model(**inputs).logits.argmax(-1)[0].item()]

In [None]:
predictions, labels = [], []
for batch in test_dataloader:
  input_ids, attention_mask = batch["input_ids"].to("cuda"), batch["attention_mask"].to("cuda")
  prediction = sentiment_model(input_ids, attention_mask = attention_mask).logits.argmax(-1)
  predictions.extend(prediction.cpu().tolist())
  labels.extend(batch["sentiment"].cpu().tolist())

In [None]:
precision = precision_score(predictions, labels, average = "macro")
recall = recall_score(predictions, labels, average = "macro")
f1 = f1_score(predictions, labels, average = "macro")

In [None]:
precision, recall, f1

# GPT

In [None]:
# Dataset class
class SentimentDataset(Dataset):
    def __init__(self, txt_list, label_list, aspect_list, tokenizer, max_length):
        # define variables
        self.input_ids = []
        self.attn_masks = []
        self.labels = []
        self.aspects = []
        # iterate through the dataset
        for txt, label, aspect in zip(txt_list, label_list, aspect_list):
            # prepare the text
            prep_txt = f'<|startoftext|>Review: {txt}<|pad|>Aspect: {aspect}<|pad|>Sentiment: {label}<|endoftext|>'
            print(prep_txt)
            # tokenize
            encodings_dict = tokenizer(prep_txt, truncation=True,
                                       max_length=max_length, padding="max_length")
            # append to list
            self.input_ids.append(torch.tensor(encodings_dict['input_ids']))
            self.attn_masks.append(torch.tensor(encodings_dict['attention_mask']))
            self.labels.append(label)
            self.aspects.append(aspect)

    def __len__(self):
        return len(self.input_ids)

    def __getitem__(self, idx):
        return self.input_ids[idx], self.attn_masks[idx], self.labels[idx],  self.aspects[idx]

# Data load function
def load_sentiment_dataset(df, tokenizer):
    x = df['Sentence'].tolist()
    label = df['polarity'].tolist()
    aspect = df['Aspect Term'].tolist()

    # format into SentimentDataset class
    train_dataset = SentimentDataset(x, label, aspect, tokenizer, max_length=512)
    # return
    return train_dataset, (x, label, aspect)

In [None]:
# checkpoint tanımla
model_name = "/content/drive/MyDrive/emre_asena/results/checkpoint-2700"
torch.manual_seed(42)

# model&tokenizer 
tokenizer = AutoTokenizer.from_pretrained("gpt2", bos_token='<|startoftext|>',
                                          eos_token='<|endoftext|>', pad_token='<|pad|>')
model = GPT2LMHeadModel.from_pretrained(model_name).cuda()
model.resize_token_embeddings(len(tokenizer))

In [None]:
df = pd.read_csv("/content/drive/MyDrive/emre_asena/processed_data.csv")

In [None]:
df = df[df["polarity"] != "neutral"]
df["Sentence"] = df["Sentence"].apply(lambda x: x.lower() if isinstance(x, str) else x)
df["Aspect Term"] = df["Aspect Term"].apply(lambda x: x.lower() if isinstance(x, str) else x)
df = df.drop_duplicates(subset=['Sentence'])
df = df.sample(frac=1, ignore_index= True)

In [None]:
train_df, test_df = train_test_split(df, test_size=0.1, random_state= 42, shuffle=True)

In [None]:
train_dataset, train_dataset_raw = load_sentiment_dataset(train_df,tokenizer)
test_dataset, test_dataset_raw = load_sentiment_dataset(test_df,tokenizer)

In [None]:

training_args = TrainingArguments(output_dir='/content/drive/MyDrive/emre_asena/results', num_train_epochs=4,
                                 load_best_model_at_end=True, save_strategy="epoch", evaluation_strategy="epoch",
                                 per_device_train_batch_size=2, per_device_eval_batch_size=2,
                                 warmup_steps=100, weight_decay=0.01)


trainer = Trainer(model=model, args=training_args, train_dataset=train_dataset, eval_dataset=test_dataset,
                  data_collator=lambda data: {'input_ids': torch.stack([f[0] for f in data]),
                                              'attention_mask': torch.stack([f[1] for f in data]),
                                              'labels': torch.stack([f[0] for f in data])})

In [None]:

trainer.train()

In [None]:
_ = model.eval()

all_preds ,original_label, predicted_label, original_text, predicted_texts, original_aspect, predicted_aspect = [], [], [], [], [], [], []

# tüm test veri kümesinde prediction yap
for text, label, aspect in tqdm(zip(test_dataset_raw[0], test_dataset_raw[1], test_dataset_raw[2])):
    # create prompt (in compliance with the one used during training)
    prompt = f'<|startoftext|>Review: {text}\nAspect:'

    # generate tokens
    generated = tokenizer(f"{prompt}", return_tensors="pt").input_ids.cuda()
    # perform prediction
    sample_outputs = model.generate(generated, do_sample=False, top_k=50, max_length=512, top_p=0.90,
                                    temperature=0, num_return_sequences=0)
    # decode the predicted tokens into texts
    predicted_text  = tokenizer.decode(sample_outputs[0], skip_special_tokens=True)
    all_preds.append(predicted_text)
    # extract the predicted sentiment
    try:
        pred_aspect = re.findall("Aspect: (.*)Sentiment", predicted_text)[-1]

    except:
        pred_aspect = "None"

    try:
      pred_sentiment = re.findall("Sentiment: (.*)", predicted_text)[-1]
    except:
      pred_sentiment = "None"

    # append results
    original_label.append(label)
    predicted_label.append(pred_sentiment)
    original_text.append(text)
    predicted_texts.append(predicted_text)
    original_aspect.append(aspect)
    predicted_aspect.append(pred_aspect)

In [None]:
df = pd.DataFrame({'original_text': original_text, 'predicted_label': predicted_label,
                   'predicted output': all_preds,
                    'original_label': original_label, 'predicted_text': predicted_text,
                   'original_aspect': original_aspect, 'predicted_aspect': predicted_aspect})

In [None]:
print("f1_score label micro", f1_score(original_label, predicted_label, average='micro'))
print("f1_score label macro", f1_score(original_label, predicted_label, average='macro'))
print("f1_score aspect micro", f1_score(original_aspect, predicted_aspect, average='micro'))
print("f1_score aspect macro", f1_score(original_aspect, predicted_aspect, average='macro'))
print("precision_score label ", precision_score(df["original_label"], df["predicted_label"],  average='macro'))
print("recall_score label ", recall_score(df["original_label"], df["predicted_label"], average='macro'))
print("precision_score aspect", precision_score(df["original_aspect"], df["predicted_aspect"], average='macro'))
print("recall_score aspect ", recall_score(df["original_aspect"], df["predicted_aspect"], average='macro'))

In [None]:
df.to_csv("/content/drive/MyDrive/emre_asena/results/gpt2_results.csv")

# **Inference**

In [None]:
model_path = "/content/drive/MyDrive/emre_asena/results/checkpoint-2700"
tokenizer = AutoTokenizer.from_pretrained('gpt2')
trained_model = GPT2LMHeadModel.from_pretrained(model_path).cuda()

Downloading (…)lve/main/config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

Downloading (…)olve/main/vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

Downloading (…)olve/main/merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

In [None]:
def predict(text):
    # Prompt'u istenen formata getir
    prompt = f'<|startoftext|>Review: {text}\nAspect:'

    # Prompt'u tokenize et
    generated = tokenizer(prompt, return_tensors="pt").input_ids.cuda()

    # Prediction yap
    sample_outputs = trained_model.generate(generated, do_sample=False, top_k=50, max_length=512, top_p=0.90, temperature=0, num_return_sequences=0)

    # Prediction'ları detokenize ederek text'e çevir
    outputs= []
    for sample in sample_outputs[0]:
        try:
            outputs.append(tokenizer.decode(sample, skip_special_tokens= True))
        except:
            pass
        predicted_text = "".join(outputs)
        predicted_text = predicted_text.replace("<|endoftext|>","").replace("<|pad|>","").replace("<|startoftext|>","")

        # Regex ile text'in içinden modelin Aspect ve Sentiment prediction'larını çıkar
        try:
            pred_aspect = re.findall("Aspect: (.*)Sentiment", predicted_text)[-1]
        except:
            pred_aspect = "None"
        try:
            pred_sentiment = re.findall("Sentiment: (.*)", predicted_text)[-1]
        except:
            pred_sentiment = "None"

    # Prediction'ları ekrana yazdır
    print("\nAspect: ", pred_aspect)
    print("Sentiment: ", pred_sentiment)

In [None]:
#Example prompts
text_list = {0: "Considering the extra price of this device, I think it is not a good choice.",
             1: "I want to buy a Tesla T4 GPU but it is too expensive for me.",
             2: "This restaurant has an excellent view. I think I should come here more often.",
             3: "Desktops are more powerful computers than laptops but the mobility the laptops give us is a great plus."}

text = "The Neapolitan is the best shake period (secret menu!) and the double-double is the perfect meat/burger ratio"
#text = ""
#
predict(text)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Aspect:  shake period
Sentiment:  positive
