In [1]:
!pip install --q emoji underthesea pyvi==0.1.1

In [2]:
%cd '/content/drive/MyDrive/NLU_NCKH/notebook/'

/content/drive/MyDrive/NLU_NCKH/notebook


# Import libraty

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import re
# from python_calamine.pandas import pandas_monkeypatch

# Load data and format data -> csv

In [4]:
restaurant_train = './Res_ABSA/Train.txt'
restaurant_test = './Res_ABSA/Test.txt'
restaurant_dev = './Res_ABSA/Dev.txt'

aspects = ['SERVICE#GENERAL',
  'LOCATION#GENERAL',
  'RESTAURANT#GENERAL',
  'AMBIENCE#GENERAL',
  'FOOD#PRICES',
  'RESTAURANT#MISCELLANEOUS',
  'DRINKS#PRICES',
  'FOOD#QUALITY',
  'FOOD#STYLE&OPTIONS',
  'RESTAURANT#PRICES',
  'DRINKS#QUALITY',
  'DRINKS#STYLE&OPTIONS']

def aspect_polarity(label, aspects):
    ap_stm = re.findall('{(.+?), (\w+)}', label)
    aspects = [aspect for aspect, pol in ap_stm]
    polaritys = [pol for aspect, pol in ap_stm]
    polaritys = [0 if polarity == 'negative' else 1 if polarity == 'neutral' else 2 for polarity in polaritys]
    return aspects, polaritys

def init_data(df):
    X = df.pop('review')
    y = df.replace({'negative': 1,
                    'neutral': 2,
                    'positive': 3}).astype(np.uint8)

    print('X.shape:', X.shape, 'y.shape:', y.shape)
    return X, y

def txt2df(filepath, aspect):
    with open(filepath, 'r', encoding='utf-8-sig') as txt:
        data = txt.read().split('\n')

    df = pd.DataFrame()
    df['review'] = [review for review in data[1::4]]
    df['labels'] = [aspect for aspect in data[2::4]]
    df['aspects'], df['polaritys'] = zip(*[aspect_polarity(label, aspect) for label in data[2::4]])

    return df


train = txt2df(restaurant_train, aspects)
dev = txt2df(restaurant_dev, aspects)
test = txt2df(restaurant_test, aspects)

# Xtrain, ytrain = init_data(train)
# Xdev,   ydev   = init_data(dev)
# Xtest,  ytest  = init_data(test)

In [5]:
train.head()

Unnamed: 0,review,labels,aspects,polaritys
0,Giá 53k size vừa.,"{DRINKS#PRICES, neutral}, {DRINKS#STYLE&OPTION...","[DRINKS#PRICES, DRINKS#STYLE&OPTIONS]","[1, 1]"
1,Nhưng nói chung cũng hơi đắt.,"{RESTAURANT#PRICES, negative}",[RESTAURANT#PRICES],[0]
2,Mình ăn rất hôi mùi dầu.,"{FOOD#QUALITY, negative}",[FOOD#QUALITY],[0]
3,Mình ăn chưa baoh thấy mùi hôi hải sản.,"{FOOD#QUALITY, positive}",[FOOD#QUALITY],[2]
4,3 dĩa vs 2 lon Revive mà có 190k thui(.,"{RESTAURANT#PRICES, positive}",[RESTAURANT#PRICES],[2]


# Preprocessing

In [6]:
# https://github.com/nguyenvanhieuvn/text-classification-tutorial/blob/master/text_classification_tutorial.ipynb
# https://nguyenvanhieu.vn/phan-loai-van-ban-tieng-viet

import regex as re
import string
import emoji

#from vncorenlp import VnCoreNLP
import underthesea
from nltk import flatten


# Remove HTML code
def remove_HTML(text):
    return re.sub(r'<[^>]*>', '', text)

# Standardize unicode
def convert_unicode(text):
    char1252 = 'à|á|ả|ã|ạ|ầ|ấ|ẩ|ẫ|ậ|ằ|ắ|ẳ|ẵ|ặ|è|é|ẻ|ẽ|ẹ|ề|ế|ể|ễ|ệ|ì|í|ỉ|ĩ|ị|ò|ó|ỏ|õ|ọ|ồ|ố|ổ|ỗ|ộ|ờ|ớ|ở|ỡ|ợ|ù|ú|ủ|ũ|ụ|ừ|ứ|ử|ữ|ự|ỳ|ý|ỷ|ỹ|ỵ|À|Á|Ả|Ã|Ạ|Ầ|Ấ|Ẩ|Ẫ|Ậ|Ằ|Ắ|Ẳ|Ẵ|Ặ|È|É|Ẻ|Ẽ|Ẹ|Ề|Ế|Ể|Ễ|Ệ|Ì|Í|Ỉ|Ĩ|Ị|Ò|Ó|Ỏ|Õ|Ọ|Ồ|Ố|Ổ|Ỗ|Ộ|Ờ|Ớ|Ở|Ỡ|Ợ|Ù|Ú|Ủ|Ũ|Ụ|Ừ|Ứ|Ử|Ữ|Ự|Ỳ|Ý|Ỷ|Ỹ|Ỵ'
    charutf8 = 'à|á|ả|ã|ạ|ầ|ấ|ẩ|ẫ|ậ|ằ|ắ|ẳ|ẵ|ặ|è|é|ẻ|ẽ|ẹ|ề|ế|ể|ễ|ệ|ì|í|ỉ|ĩ|ị|ò|ó|ỏ|õ|ọ|ồ|ố|ổ|ỗ|ộ|ờ|ớ|ở|ỡ|ợ|ù|ú|ủ|ũ|ụ|ừ|ứ|ử|ữ|ự|ỳ|ý|ỷ|ỹ|ỵ|À|Á|Ả|Ã|Ạ|Ầ|Ấ|Ẩ|Ẫ|Ậ|Ằ|Ắ|Ẳ|Ẵ|Ặ|È|É|Ẻ|Ẽ|Ẹ|Ề|Ế|Ể|Ễ|Ệ|Ì|Í|Ỉ|Ĩ|Ị|Ò|Ó|Ỏ|Õ|Ọ|Ồ|Ố|Ổ|Ỗ|Ộ|Ờ|Ớ|Ở|Ỡ|Ợ|Ù|Ú|Ủ|Ũ|Ụ|Ừ|Ứ|Ử|Ữ|Ự|Ỳ|Ý|Ỷ|Ỹ|Ỵ'
    char1252 = char1252.split('|')
    charutf8 = charutf8.split('|')

    dic = {}
    for i in range(len(char1252)): dic[char1252[i]] = charutf8[i]
    return re.sub(
        r'à|á|ả|ã|ạ|ầ|ấ|ẩ|ẫ|ậ|ằ|ắ|ẳ|ẵ|ặ|è|é|ẻ|ẽ|ẹ|ề|ế|ể|ễ|ệ|ì|í|ỉ|ĩ|ị|ò|ó|ỏ|õ|ọ|ồ|ố|ổ|ỗ|ộ|ờ|ớ|ở|ỡ|ợ|ù|ú|ủ|ũ|ụ|ừ|ứ|ử|ữ|ự|ỳ|ý|ỷ|ỹ|ỵ|À|Á|Ả|Ã|Ạ|Ầ|Ấ|Ẩ|Ẫ|Ậ|Ằ|Ắ|Ẳ|Ẵ|Ặ|È|É|Ẻ|Ẽ|Ẹ|Ề|Ế|Ể|Ễ|Ệ|Ì|Í|Ỉ|Ĩ|Ị|Ò|Ó|Ỏ|Õ|Ọ|Ồ|Ố|Ổ|Ỗ|Ộ|Ờ|Ớ|Ở|Ỡ|Ợ|Ù|Ú|Ủ|Ũ|Ụ|Ừ|Ứ|Ử|Ữ|Ự|Ỳ|Ý|Ỷ|Ỹ|Ỵ',
        lambda x: dic[x.group()], text
    )


# Standardize accent typing
vowels_to_ids = {}
vowels_table = [
    ['a', 'à', 'á', 'ả', 'ã', 'ạ', 'a' ],
    ['ă', 'ằ', 'ắ', 'ẳ', 'ẵ', 'ặ', 'aw'],
    ['â', 'ầ', 'ấ', 'ẩ', 'ẫ', 'ậ', 'aa'],
    ['e', 'è', 'é', 'ẻ', 'ẽ', 'ẹ', 'e' ],
    ['ê', 'ề', 'ế', 'ể', 'ễ', 'ệ', 'ee'],
    ['i', 'ì', 'í', 'ỉ', 'ĩ', 'ị', 'i' ],
    ['o', 'ò', 'ó', 'ỏ', 'õ', 'ọ', 'o' ],
    ['ô', 'ồ', 'ố', 'ổ', 'ỗ', 'ộ', 'oo'],
    ['ơ', 'ờ', 'ớ', 'ở', 'ỡ', 'ợ', 'ow'],
    ['u', 'ù', 'ú', 'ủ', 'ũ', 'ụ', 'u' ],
    ['ư', 'ừ', 'ứ', 'ử', 'ữ', 'ự', 'uw'],
    ['y', 'ỳ', 'ý', 'ỷ', 'ỹ', 'ỵ', 'y' ]
]

for i in range(len(vowels_table)):
    for j in range(len(vowels_table[i]) - 1):
        vowels_to_ids[vowels_table[i][j]] = (i, j)


def is_valid_vietnamese_word(word):
    chars = list(word)
    vowel_indexes = -1
    for index, char in enumerate(chars):
        x, y = vowels_to_ids.get(char, (-1, -1))
        if x != -1:
            if vowel_indexes == -1: vowel_indexes = index
            else:
                if index - vowel_indexes != 1: return False
                vowel_indexes = index
    return True


def standardize_word_typing(word):
    if not is_valid_vietnamese_word(word): return word
    chars = list(word)
    dau_cau = 0
    vowel_indexes = []
    qu_or_gi = False

    for index, char in enumerate(chars):
        x, y = vowels_to_ids.get(char, (-1, -1))
        if x == -1: continue
        elif x == 9:  # check qu
            if index != 0 and chars[index - 1] == 'q':
                chars[index] = 'u'
                qu_or_gi = True
        elif x == 5:  # check gi
            if index != 0 and chars[index - 1] == 'g':
                chars[index] = 'i'
                qu_or_gi = True

        if y != 0:
            dau_cau = y
            chars[index] = vowels_table[x][0]

        if not qu_or_gi or index != 1:
            vowel_indexes.append(index)

    if len(vowel_indexes) < 2:
        if qu_or_gi:
            if len(chars) == 2:
                x, y = vowels_to_ids.get(chars[1])
                chars[1] = vowels_table[x][dau_cau]
            else:
                x, y = vowels_to_ids.get(chars[2], (-1, -1))
                if x != -1: chars[2] = vowels_table[x][dau_cau]
                else: chars[1] = vowels_table[5][dau_cau] if chars[1] == 'i' else vowels_table[9][dau_cau]
            return ''.join(chars)
        return word

    for index in vowel_indexes:
        x, y = vowels_to_ids[chars[index]]
        if x == 4 or x == 8:  # ê, ơ
            chars[index] = vowels_table[x][dau_cau]
            return ''.join(chars)

    if len(vowel_indexes) == 2:
        if vowel_indexes[-1] == len(chars) - 1:
            x, y = vowels_to_ids[chars[vowel_indexes[0]]]
            chars[vowel_indexes[0]] = vowels_table[x][dau_cau]
        else:
            x, y = vowels_to_ids[chars[vowel_indexes[1]]]
            chars[vowel_indexes[1]] = vowels_table[x][dau_cau]
    else:
        x, y = vowels_to_ids[chars[vowel_indexes[1]]]
        chars[vowel_indexes[1]] = vowels_table[x][dau_cau]
    return ''.join(chars)


def standardize_sentence_typing(text):
    words = text.lower().split()
    for index, word in enumerate(words):
        cw = re.sub(r'(^\p{P}*)([p{L}.]*\p{L}+)(\p{P}*$)', r'\1/\2/\3', word).split('/')
        if len(cw) == 3: cw[1] = standardize_word_typing(cw[1])
        words[index] = ''.join(cw)
    return ' '.join(words)


# Normalize acronyms
# !wget https://gist.githubusercontent.com/nguyenvanhieuvn/7d9441c10b3c2739499fc5a4d9ea06fb/raw/df939245b3e841b62af115be4dcb3516dadc9fc5/teencode.txt
replace_list = {
    'ô kêi': 'ok', 'okie': 'ok', 'o kê': 'ok', 'okey': 'ok', 'ôkê': 'ok', 'oki': 'ok', 'oke': 'ok', 'okay': 'ok', 'okê': 'ok',
    'tks': 'cảm ơn', 'thks': 'cảm ơn', 'thanks': 'cảm ơn', 'ths': 'cảm ơn', 'thank': 'cảm ơn',
    'kg': 'không', 'not': 'không', 'k': 'không', 'kh': 'không', 'kô': 'không', 'hok': 'không', 'ko': 'không', 'khong': 'không', 'kp': 'không phải',
    'he he': 'tích cực', 'hehe': 'tích cực', 'hihi': 'tích cực', 'haha': 'tích cực', 'hjhj': 'tích cực', 'thick': 'tích cực',
    'cc': 'tiêu cực', 'huhu': 'tiêu cực', 'cute': 'dễ thương',

    'sz': 'cỡ', 'size': 'cỡ',
    'wa': 'quá', 'wá': 'quá', 'qá': 'quá',
    'đx': 'được', 'dk': 'được', 'dc': 'được', 'đk': 'được', 'đc': 'được',
    'vs': 'với', 'j': 'gì', '“': ' ', 'time': 'thời gian', 'm': 'mình', 'mik': 'mình', 'r': 'rồi', 'bjo': 'bao giờ', 'very': 'rất',

    'authentic': 'chuẩn chính hãng', 'aut': 'chuẩn chính hãng', 'auth': 'chuẩn chính hãng', 'date': 'hạn sử dụng', 'hsd': 'hạn sử dụng',
    'store': 'cửa hàng', 'sop': 'cửa hàng', 'shopE': 'cửa hàng', 'shop': 'cửa hàng',
    'sp': 'sản phẩm', 'product': 'sản phẩm', 'hàg': 'hàng',
    'ship': 'giao hàng', 'delivery': 'giao hàng', 'síp': 'giao hàng', 'order': 'đặt hàng',

    'gud': 'tốt', 'wel done': 'tốt', 'good': 'tốt', 'gút': 'tốt', 'tot': 'tốt', 'nice': 'tốt', 'perfect': 'rất tốt',
    'quality': 'chất lượng', 'chất lg': 'chất lượng', 'chat': 'chất', 'excelent': 'hoàn hảo', 'bt': 'bình thường',
    'sad': 'tệ', 'por': 'tệ', 'poor': 'tệ', 'bad': 'tệ',
    'beautiful': 'đẹp tuyệt vời', 'dep': 'đẹp',
    'xau': 'xấu', 'sấu': 'xấu',

    'thik': 'thích', 'iu': 'yêu', 'fake': 'giả mạo',
    'quickly': 'nhanh', 'quick': 'nhanh', 'fast': 'nhanh',
    'fresh': 'tươi', 'delicious': 'ngon',

    'dt': 'điện thoại', 'fb': 'facebook', 'face': 'facebook', 'ks': 'khách sạn', 'nv': 'nhân viên',
    'nt': 'nhắn tin', 'ib': 'nhắn tin', 'tl': 'trả lời', 'trl': 'trả lời', 'rep': 'trả lời',
    'fback': 'feedback', 'fedback': 'feedback',
    'sd': 'sử dụng', 'sài': 'xài',

    '^_^': 'tích cực', ':)': 'tích cực', ':(': 'tiêu cực',
    '❤️': 'tích cực', '👍': 'tích cực', '🎉': 'tích cực', '😀': 'tích cực', '😍': 'tích cực', '😂': 'tích cực', '🤗': 'tích cực', '😙': 'tích cực', '🙂': 'tích cực',
    '😔': 'tiêu cực', '😓': 'tiêu cực',
    '⭐': 'star', '*': 'star', '🌟': 'star',
}

with open('/content/drive/MyDrive/NLU_NCKH/notebook/Res_ABSA/teencode.txt', encoding='utf-8') as f:
    for pair in f.readlines():
        key, value = pair.split('\t')
        replace_list[key] = value.strip()


def normalize_acronyms(text):
    words = []
    for word in text.strip().split():
        # word = word.strip(string.punctuation)
        if word.lower() not in replace_list.keys(): words.append(word)
        else: words.append(replace_list[word.lower()])
    return emoji.demojize(' '.join(words)) # Remove Emojis

# Remove unnecessary characters
def remove_unnecessary_characters(text):
    text = re.sub(r'[^\s\wáàảãạăắằẳẵặâấầẩẫậéèẻẽẹêếềểễệóòỏõọôốồổỗộơớờởỡợíìỉĩịúùủũụưứừửữựýỳỷỹỵđÁÀẢÃẠĂẮẰẲẴẶÂẤẦẨẪẬÉÈẺẼẸÊẾỀỂỄỆÓÒỎÕỌÔỐỒỔỖỘƠỚỜỞỠỢÍÌỈĨỊÚÙỦŨỤƯỨỪỬỮỰÝỲỶỸỴĐ_]', ' ', text)
    text = re.sub(r'\s+', ' ', text).strip() # Remove extra whitespace
    return text

def preprocess_fn(text, word_tokenize = None):
    text = text.strip()
    #tokens = sent.split()
    #for token in tokens:
        #for t in token:
            #if t.isnumeric() or t.isdigit():
                #tokens.remove(token)
                #break
    text = remove_HTML(text)
    text = convert_unicode(text)
    text = standardize_sentence_typing(text)
    text = normalize_acronyms(text)

    tokens = text.split()
    if word_tokenize is None:
        text= underthesea.word_tokenize(" ".join(tokens), format="text")
    else:
        #for using VnCoreNLP
        text = " ".join(word_tokenize.tokenize(" ".join(tokens))[0])


    text = remove_unnecessary_characters(text)
    return text.lower()

No module named 'fasttext'


## Save data preprocesed

In [7]:
# Save data was preprocessed
train['review'] = train['review'].apply(preprocess_fn)
dev['review'] = dev['review'].apply(preprocess_fn)
test['review'] = test['review'].apply(preprocess_fn)

# train.to_csv('res_data_preprocesed/train_res_MT.csv', index=False)
# dev.to_csv('res_data_preprocesed/dev_res_MT.csv', index=False)
# test.to_csv('res_data_preprocesed/test_res_MT.csv', index=False)

# Train

In [49]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import MultiLabelBinarizer
from tqdm import tqdm

class AspectDetector:
    def __init__(self, aspects):
        self.aspect_detectors = {a: LinearSVC() for a in aspects}
        self.polarity_detectors = {a: LinearSVC() for a in aspects}
        self.vectorizer = TfidfVectorizer(ngram_range=(1, 3))

    def fit(self, X, y_aspects, y_polarity):
        X_vec = self.vectorizer.fit_transform(X)
        # Train aspect detectors
        for a in tqdm(self.aspect_detectors, desc="Training Aspect Detector"):
          y_aspect = [1 if a in y_i else 0 for y_i in y_aspects]
          self.aspect_detectors[a].fit(X_vec, y_aspect)

        # Train polarity detectors
        for a in tqdm(self.polarity_detectors, desc="Training Polarity Detector"):
            y_polarity_a = []
            for i in range(len(y_aspects)):
                if a in y_aspects[i]:
                    idx = y_aspects[i].index(a)
                    y_polarity_a.append(y_polarity[i][idx])
                else:
                    y_polarity_a.append(1)
            self.polarity_detectors[a].fit(X_vec, y_polarity_a)

    def predict(self, X):
        X_vec = self.vectorizer.transform(X)
        y_pred_aspects = []
        y_pred_polarity = []
        y_combine = []
        for x in tqdm(X_vec, desc="Predicting", total=X_vec.shape[0]):
            aspects = []
            polarity = []
            aspects_polarity = []
            for a, d in self.aspect_detectors.items():
                if d.predict(x)[0] == 1:
                    aspects.append(a)
                    pol_pred = self.polarity_detectors[a].predict(x)[0]
                    polarity.append(pol_pred)
                    aspects_polarity.append(a + "#" + ("positive" if pol_pred == 0 else "neutral" if pol_pred == 1 else "negative"))
            if not aspects:
                aspects = ['RESTAURANT#GENERAL']
                polarity = [1]
                aspects_polarity = ["RESTAURANT#GENERAL#neutral"]
            y_pred_aspects.append(aspects)
            y_pred_polarity.append(polarity)
            y_combine.append(aspects_polarity)
        return y_pred_aspects, y_pred_polarity, y_combine


aspects = list(set(a for sublist in train['aspects'] for a in sublist))

# Create train, test
X_train = train['review']
y_train_aspects = train['aspects']
y_train_polarity = train['polaritys']
X_test = test['review']
y_test_aspects = test['aspects']
y_test_polarity = test['polaritys']

aspects = list(set(a for sublist in train['aspects'] for a in sublist))

# Initialize and train the aspect detector
aspect_detector = AspectDetector(aspects)
aspect_detector.fit(X_train, y_train_aspects, y_train_polarity)

Training Aspect Detector: 100%|██████████| 12/12 [00:01<00:00,  7.37it/s]
Training Polarity Detector: 100%|██████████| 12/12 [00:04<00:00,  2.48it/s]


In [50]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.preprocessing import MultiLabelBinarizer

y_pred_aspects, y_pred_polarity, y_combine = aspect_detector.predict(X_test)
# Convert to uppercase for consistency
y_pred_aspects_cv = [[aspect.upper() for aspect in y] for y in y_pred_aspects]

def evaluate(y_test, y_pred, task="AD"):
    """
    y_test: grouth_true test, DataFrame
    y_pred: grouth_true predict, DataFrame
    """
    # Binarize the output
    mlb = MultiLabelBinarizer()
    y_test_bin = mlb.fit_transform(y_test)
    y_pred_bin = mlb.transform(y_pred)

    precision = precision_score(y_test_bin, y_pred_bin, average='micro')
    recall = recall_score(y_test_bin, y_pred_bin, average='micro')
    f1 = f1_score(y_test_bin, y_pred_bin, average='micro')
    accuracy = accuracy_score(y_test_bin, y_pred_bin)
    if task == "AD":
      report = classification_report(y_test_bin, y_pred_bin, zero_division=1, target_names=mlb.classes_)
    else:
      report = classification_report(y_test_bin, y_pred_bin)
    print(f'Precision: {precision:.4f}')
    print(f'Recall: {recall:.4f}')
    print(f'F1-score: {f1:.4f}')
    print(f'Accuracy: {accuracy:.4f}')
    print(f'Report:\n{report}')

print("\nEvaluate on task Aspect Detection")
evaluate(y_test_aspects, y_pred_aspects_cv)

Predicting: 100%|██████████| 1938/1938 [00:03<00:00, 643.68it/s]


Evaluate on task Aspect Detection
Precision: 0.7362
Recall: 0.7143
F1-score: 0.7251
Accuracy: 0.5501
Report:
                          precision    recall  f1-score   support

        AMBIENCE#GENERAL       0.94      0.81      0.87       227
           DRINKS#PRICES       0.83      0.11      0.19        47
          DRINKS#QUALITY       0.72      0.74      0.73       203
    DRINKS#STYLE&OPTIONS       0.71      0.56      0.62       129
             FOOD#PRICES       0.62      0.23      0.34       112
            FOOD#QUALITY       0.77      0.83      0.80       554
      FOOD#STYLE&OPTIONS       0.74      0.74      0.74       437
        LOCATION#GENERAL       0.97      0.69      0.81       104
      RESTAURANT#GENERAL       0.42      0.71      0.53       251
RESTAURANT#MISCELLANEOUS       0.91      0.48      0.62       145
       RESTAURANT#PRICES       0.77      0.67      0.72       117
         SERVICE#GENERAL       0.92      0.85      0.88       303

               micro avg      




In [51]:
print("Evaluate on task Aspect Polarity")
evaluate(y_test_polarity, y_pred_polarity, task="AS")

Evaluate on task Aspect Polarity
Precision: 0.6427
Recall: 0.6595
F1-score: 0.6510
Accuracy: 0.5046
Report:
              precision    recall  f1-score   support

           0       0.86      0.35      0.49       402
           1       0.43      0.79      0.56       619
           2       0.84      0.70      0.76      1205

   micro avg       0.64      0.66      0.65      2226
   macro avg       0.71      0.61      0.61      2226
weighted avg       0.73      0.66      0.66      2226
 samples avg       0.65      0.67      0.64      2226



In [52]:
y_test_combine = []
for aspects, polaritys in zip(y_test_aspects, y_test_polarity):
    a_p = [
        aspect + "#" + ("positive" if pol == 0 else "neutral" if pol == 1 else "negative")
         for aspect, pol in zip(aspects, polaritys)
        ]
    y_test_combine.append(a_p)

print("Evaluate combine on task Aspect Polarity")
evaluate(y_test_combine, y_combine)

Evaluate combine on task Aspect Polarity
Precision: 0.5198
Recall: 0.5044
F1-score: 0.5120
Accuracy: 0.3633
Report:
                                   precision    recall  f1-score   support

        AMBIENCE#GENERAL#negative       0.86      0.74      0.80       160
         AMBIENCE#GENERAL#neutral       0.16      0.23      0.19        30
        AMBIENCE#GENERAL#positive       0.86      0.32      0.47        37
           DRINKS#PRICES#negative       1.00      0.00      0.00        10
            DRINKS#PRICES#neutral       0.80      0.12      0.22        32
           DRINKS#PRICES#positive       0.00      0.00      0.00         5
          DRINKS#QUALITY#negative       0.56      0.57      0.57       129
           DRINKS#QUALITY#neutral       0.30      0.48      0.37        48
          DRINKS#QUALITY#positive       0.00      0.00      0.00        26
    DRINKS#STYLE&OPTIONS#negative       1.00      0.07      0.13        57
     DRINKS#STYLE&OPTIONS#neutral       0.43      0.68    