# Opinion Triplet Extraction || Aspect Sentiment Triplet Extraction
<a href="https://colab.research.google.com/github/gamapradipta/aste/blob/development/src/notebook/demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Preparation

In [1]:
# !rm -r aste

In [None]:
!git clone -b development https://github.com/gamapradipta/aste

In [None]:
!git -C aste status

In [None]:
!git -C aste pull

In [5]:
import sys

sys.path.insert(0, '/content/aste/src/model')
sys.path.insert(0, '/content/aste/src')
sys.path.insert(0, '/content/aste/data')
sys.path.insert(0, '/content/aste')

In [None]:
!pip install transformers

In [None]:
import tensorflow as tf
gpu = tf.config.list_physical_devices('GPU')
print("Num GPUs Available: ", len(gpu))

## Import and Load Data

### Import & Configuration

In [None]:
import os
import json
import argparse
from transformers import BertTokenizer

from model.data import SentenceExample, create_inputs_targets, create_sentence_example, BaseSentence
from model.config import Config
from model.model import ASTE

tf.random.set_seed(1234)

#@title Configuration { display-mode: "form" }
model_name = "model_888" #@param {type:"string"}
fine_tuned = True #@param {type:"boolean"}
bert_version = "indobenchmark/indobert-base-p1" #@param ["bert-base-multilingual-cased", "indobenchmark/indobert-base-p1", "bert-base-multilingual-uncased"]
data_cleaned = False #@param {type:"boolean"}
max_len =  170#@param {type:"number"}

model_type = {
    "bert-base-multilingual-cased" : "multilingual",
    "bert-base-multilingual-uncased" : "multilingual",
    "indobenchmark/indobert-base-p1" : "monolingual"
}

def check_fine_tuned(fine_tuned):
  if fine_tuned:
    return "finetuned"
  return "featextract"

def check_data_cleaned(data_cleaned):
  if data_cleaned:
    return "cleaned"
  return "uncleaned"

model_save_name = "{}_{}_{}_{}_{}".format(model_name,
                                          check_data_cleaned(data_cleaned),
                                          model_type.get(bert_version, "unknown"),
                                          check_fine_tuned(fine_tuned),
                                          max_len)
print(model_save_name)


config = Config()
config.max_len = max_len
config.fine_tuned = fine_tuned
config.bert_version = bert_version
tokenizer = BertTokenizer.from_pretrained(config.bert_version)

### Data

#### Prep Data

In [None]:
%%bash
cd /content/aste/src/data

python parse.py \
--dataset hotel \
--input raw/ \
--output interim/ \
--mode parse_all

python parse.py \
--dataset hotel \
--input interim/ \
--output processed/ \
--mode remove_unvalid_data_json


cp /content/aste/data/interim/hotel/test.json /content/aste/data/processed/hotel
cp /content/aste/data/interim/hotel/validation.json /content/aste/data/processed/hotel

#### Load Data

In [11]:
if data_cleaned:
  BASE_DATA_DIR = '/content/aste/data/processed/hotel/'
else:
  BASE_DATA_DIR = '/content/aste/data/interim/hotel/'

In [None]:
TRAIN_DATA_DIR = os.path.join(BASE_DATA_DIR, 'train.json')  

train_examples = create_sentence_example(TRAIN_DATA_DIR, tokenizer, config)
X_train, y_train = create_inputs_targets(train_examples)

print(X_train[0].shape, X_train[1].shape)
print(y_train[0].shape)

In [None]:
TEST_DATA_DIR = os.path.join(BASE_DATA_DIR, 'test.json')

test_examples = create_sentence_example(TEST_DATA_DIR, tokenizer, config)
X_test, y_test, token_ranges_test = create_inputs_targets(test_examples, include_token_ranges=True)

print(X_test[0].shape, X_test[1].shape)
print(y_test[0].shape)

In [None]:
VALID_DATA_DIR = os.path.join(BASE_DATA_DIR, 'validation.json')

valid_examples = create_sentence_example(VALID_DATA_DIR, tokenizer, config)
X_valid, y_valid, token_ranges_valid = create_inputs_targets(valid_examples, include_token_ranges=True)

print(X_valid[0].shape, X_valid[1].shape)
print(y_valid[0].shape)

## Additional Function for Visualization

In [15]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

def get_text_color(n):
  if n == 1:
    return 'r'
  if n == 2:
    return 'b'
  if n == 3:
    return 'm'
  if n == 4:
    return 'm'
  if n == 5:
    return 'm'
  if n== 0:
    return 'k'

def plot_tag(tag, token=None, max=None, triu=True):
  if not max or max==0:
    max = len(tag[0])
  if max > len(tag[0]):
    max = len(tag[0])

  show_tag = tag[:max, :max]
  if token:
    show_token = token[:max]
  colors = 'gray lime purple green red yellow'.split()
  colors = 'white'.split()
  cmap = matplotlib.colors.ListedColormap(colors, name='colors', N=None)

  if triu:
    show_tag = np.triu(show_tag)

  fig, ax = plt.subplots( figsize=(max/2, max/2))
  im = ax.imshow(show_tag,cmap=cmap)

  if token:
    ax.set_xticks(np.arange(len(show_token)))
    ax.set_yticks(np.arange(len(show_token)))
    ax.set_xticklabels(show_token)
    ax.set_yticklabels(show_token)
    plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
         rotation_mode="anchor")
  
  for i in range(len(show_tag[0])):
    for j in range(len(show_tag[0])):
      if triu and j<i:
          continue
      text = ax.text(j, i, show_tag[i, j],
                       ha="center", va="center", color=get_text_color(show_tag[i,j]), weight='bold')
  plt.show()

## Model 

### Model Init

In [None]:
aste = ASTE(config)
aste.init_model()

In [None]:
aste.model.summary()

### Model Training

In [None]:
#@title HYPERPARAM TRAINING
batch_size =  10#@param {type:"integer"}
epochs =  10#@param {type:"integer"}

import tensorflow as tf

if gpu:
  with tf.device('/device:GPU:0'):
    aste.train(X_train,
              y_train,
              batch_size=batch_size,
              verbose=1,
              epochs=epochs,
              X_val=X_valid,
              y_val=y_valid
              )
else:
  aste.train(X_train,
              y_train,
              batch_size=batch_size,
              verbose=1,
              epochs=epochs,
              X_val=X_valid,
              y_val=y_valid
              )

### Save Model

In [None]:
BASE_MODEL_PATH = "/content/ASTE/saved_model/"
MODEL_PATH = os.path.join(BASE_MODEL_PATH, model_save_name, "model")

aste.save_model(MODEL_PATH)

### Load Model

In [None]:
BASE_MODEL_PATH = "/content/ASTE/saved_model/"
MODEL_PATH = os.path.join(BASE_MODEL_PATH, model_save_name, "model")

aste.init_model()
aste.load_model(MODEL_PATH)
aste.model.summary()

### Model Evaluation

#### Validation

In [None]:
aste.evaluate(X_valid, y_valid[0], token_ranges_valid[0])

#### Test

In [None]:
aste.evaluate(X_test, y_test[0], token_ranges_test[0])

# DEMO

In [None]:
#@title DEMO
Ulasan = "kamar mandi sangat bersih tetapi tamannya sangat kotor" #@param ["Hotel bersih , fasilitas lengkap , tapi sarapan kurang enak", "kamarnya luas dan bersih , tapi toilet dan taman tidak bersih", "cukup bersih dan wifi okelah .", "bagus tapi mahal kamarnya", "bersih dan luas kamarnya , tapi toiletnya kotor dan menjijikan", "kamar sangat bersih dan nyaman , hotel bersih , akan lebih baik ada hair dryer di kamar"] {allow-input: true}
# example = train_examples[0]
temp = "luas dan bersih kamarnya , toilet dan taman bersih sekali "
sentence_pack = {
    "sentence" : Ulasan
}
aste.model.run_eagerly=True
with tf.device('/GPU:0') :
  example = BaseSentence(sentence_pack, tokenizer, config)

triples, aspects, sentiments = aste.predict_one(example, example.token_ranges, triple_only=False)
print("TRIPLES")
for triple in triples:
  print(triple)
print("---------------------------------------------")
print("ASPECTS")
for aspect in aspects:
  print(aspect)
print("---------------------------------------------")
print("SENTIMENTS")
for sentiment in sentiments:
  print(sentiment)
print("---------------------------------------------")

pred = aste.predict(example.get_X(), logits=False)[0]



In [None]:
last_tag_num = example.token_ranges[-1][-1]+2

#@title DEMO TAG VISUALIZATION
# last_tag = True #@param {type:"boolean"}
max_len =  0#@param {type:"integer"}
with_token = True #@param {type:"boolean"}
triu = True #@param {type:"boolean"}
include_pad = False #@param {type:"boolean"}

token = tokenizer.convert_ids_to_tokens(example.input_ids) if with_token else None

first, last = example.token_ranges[0][-1],  example.token_ranges[-1][-1]+1

# print(first, last)

show_pred = pred
show_token = token

if not include_pad:
  show_pred = pred[first:last, first:last]
  show_token = token[first:last] if with_token else None


plot_tag(show_pred, show_token,max_len ,triu=triu)

# Terima Kasih :)