In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
def preprocess_fabsa(csv_path):
    df = pd.read_csv(csv_path)

    all_samples = []

    for _, row in df.iterrows():
        sentence = row['text']
        label_list = eval(row['labels'])  # Convert string to list

        for label in label_list:
            if label.count('.') == 2:
                aspect_main, aspect_sub, sentiment = label.split('.')
                aspect = f"{aspect_main}.{aspect_sub}"
                sentiment = int(sentiment)
                all_samples.append({
                    "sentence": sentence,
                    "aspect": aspect,
                    "sentiment": sentiment
                })

    return pd.DataFrame(all_samples)

# Example
df = preprocess_fabsa("fabsa_dataset.csv")
df

Unnamed: 0,sentence,aspect,sentiment
0,Literally only place I get my shoes,company-brand.general-satisfaction,1
1,Will not let me login,account-management.account-access,-1
2,"I've already reviewed it, so please let me sig...",account-management.account-access,0
3,Easy to use and price is good,purchase-booking-experience.ease-of-use,1
4,Easy to use and price is good,value.price-value-for-money,1
...,...,...,...
18663,"Now when you look in a city, you only have tou...",purchase-booking-experience.ease-of-use,-1
18664,"Now when you look in a city, you only have tou...",company-brand.general-satisfaction,-1
18665,Great App easy to use. Love the posts from peo...,company-brand.reviews,1
18666,Great App easy to use. Love the posts from peo...,online-experience.app-website,1


In [None]:
label_map = {-1: 0, 0: 1, 1: 2}  # negative, neutral, positive
df['label'] = df['sentiment'].map(label_map)

In [None]:
df

  pd.to_datetime(column, errors="raise")


Unnamed: 0,sentence,aspect,sentiment,label
0,Literally only place I get my shoes,company-brand.general-satisfaction,1,2
1,Will not let me login,account-management.account-access,-1,0
2,"I've already reviewed it, so please let me sig...",account-management.account-access,0,1
3,Easy to use and price is good,purchase-booking-experience.ease-of-use,1,2
4,Easy to use and price is good,value.price-value-for-money,1,2
...,...,...,...,...
18663,"Now when you look in a city, you only have tou...",purchase-booking-experience.ease-of-use,-1,0
18664,"Now when you look in a city, you only have tou...",company-brand.general-satisfaction,-1,0
18665,Great App easy to use. Love the posts from peo...,company-brand.reviews,1,2
18666,Great App easy to use. Love the posts from peo...,online-experience.app-website,1,2


In [None]:
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
df['aspect'].unique()

array(['company-brand.general-satisfaction',
       'account-management.account-access',
       'purchase-booking-experience.ease-of-use',
       'value.price-value-for-money', 'online-experience.app-website',
       'staff-support.attitude-of-staff', 'logistics-rides.speed',
       'staff-support.email', 'company-brand.reviews',
       'company-brand.competitor', 'value.discounts-promotions',
       'staff-support.phone'], dtype=object)

In [None]:
# df['input_text'] = df['sentence'] + ' [SEP] ' + df['aspect']
def insert_asp_markers(sentence, aspect):
    if aspect in sentence:
        return sentence.replace(aspect, f"[ASP]{aspect}[/ASP]")
    else:
        return sentence + f" [ASP]{aspect}[/ASP]"
df['marked_text'] = df.apply(lambda row: insert_asp_markers(row['sentence'], row['aspect']), axis=1)
df

Unnamed: 0,sentence,aspect,sentiment,label,marked_text
0,Literally only place I get my shoes,company-brand.general-satisfaction,1,2,Literally only place I get my shoes [ASP]compa...
1,Will not let me login,account-management.account-access,-1,0,Will not let me login [ASP]account-management....
2,"I've already reviewed it, so please let me sig...",account-management.account-access,0,1,"I've already reviewed it, so please let me sig..."
3,Easy to use and price is good,purchase-booking-experience.ease-of-use,1,2,Easy to use and price is good [ASP]purchase-bo...
4,Easy to use and price is good,value.price-value-for-money,1,2,Easy to use and price is good [ASP]value.price...
...,...,...,...,...,...
18663,"Now when you look in a city, you only have tou...",purchase-booking-experience.ease-of-use,-1,0,"Now when you look in a city, you only have tou..."
18664,"Now when you look in a city, you only have tou...",company-brand.general-satisfaction,-1,0,"Now when you look in a city, you only have tou..."
18665,Great App easy to use. Love the posts from peo...,company-brand.reviews,1,2,Great App easy to use. Love the posts from peo...
18666,Great App easy to use. Love the posts from peo...,online-experience.app-website,1,2,Great App easy to use. Love the posts from peo...


In [None]:
df

Unnamed: 0,sentence,aspect,sentiment,label,marked_text
0,Literally only place I get my shoes,company-brand.general-satisfaction,1,2,Literally only place I get my shoes [ASP]compa...
1,Will not let me login,account-management.account-access,-1,0,Will not let me login [ASP]account-management....
2,"I've already reviewed it, so please let me sig...",account-management.account-access,0,1,"I've already reviewed it, so please let me sig..."
3,Easy to use and price is good,purchase-booking-experience.ease-of-use,1,2,Easy to use and price is good [ASP]purchase-bo...
4,Easy to use and price is good,value.price-value-for-money,1,2,Easy to use and price is good [ASP]value.price...
...,...,...,...,...,...
18663,"Now when you look in a city, you only have tou...",purchase-booking-experience.ease-of-use,-1,0,"Now when you look in a city, you only have tou..."
18664,"Now when you look in a city, you only have tou...",company-brand.general-satisfaction,-1,0,"Now when you look in a city, you only have tou..."
18665,Great App easy to use. Love the posts from peo...,company-brand.reviews,1,2,Great App easy to use. Love the posts from peo...
18666,Great App easy to use. Love the posts from peo...,online-experience.app-website,1,2,Great App easy to use. Love the posts from peo...


In [None]:
tokenizer = Tokenizer(oov_token="<OOV>", filters='')
tokenizer.fit_on_texts(df['marked_text'])

sequences = tokenizer.texts_to_sequences(df['marked_text'])
padded_sequences = pad_sequences(sequences, padding='post')
labels = df['label'].values

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    padded_sequences, labels, test_size=0.2, random_state=42
)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dense, Dropout

vocab_size = len(tokenizer.word_index) + 1  # include OOV
embedding_dim = 100

model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_len),
    Bidirectional(LSTM(64, return_sequences=False)),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(3, activation='softmax')  # 3 classes: neg, neu, pos
])

model.compile(loss='sparse_categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

model.summary()



In [None]:
model.fit(X_train, y_train, validation_data=(X_test, y_test),
          batch_size=32, epochs=5)


Epoch 1/5
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m385s[0m 810ms/step - accuracy: 0.8034 - loss: 0.5066 - val_accuracy: 0.9207 - val_loss: 0.2358
Epoch 2/5
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m373s[0m 798ms/step - accuracy: 0.9372 - loss: 0.1886 - val_accuracy: 0.9274 - val_loss: 0.2114
Epoch 3/5
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m386s[0m 806ms/step - accuracy: 0.9499 - loss: 0.1332 - val_accuracy: 0.9234 - val_loss: 0.2119
Epoch 4/5
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m378s[0m 799ms/step - accuracy: 0.9598 - loss: 0.1038 - val_accuracy: 0.9253 - val_loss: 0.2220
Epoch 5/5
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m369s[0m 791ms/step - accuracy: 0.9637 - loss: 0.0937 - val_accuracy: 0.9197 - val_loss: 0.2558


<keras.src.callbacks.history.History at 0x7dd82f6b3790>

In [None]:
loss, acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {acc:.2f}")

[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 164ms/step - accuracy: 0.9183 - loss: 0.2547
Test Accuracy: 0.92


In [None]:
def predict_sentiment(text, aspect):
    combined = text + ' [SEP] ' + aspect
    seq = tokenizer.texts_to_sequences([combined])
    padded = pad_sequences(seq, maxlen=max_len, padding='post')
    pred = model.predict(padded)
    label_map_inv = {0: "Negative", 1: "Neutral", 2: "Positive"}
    return label_map_inv[pred.argmax()]

print(predict_sentiment("camera was good but processor was slow", "camera")) # camera here is the aspect we're referring to

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 123ms/step
Positive
