# 使用crawl-300d的pretrain model來做 LSTM

In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import re

### 對處理好的laptop、restaurant的train、test資料作前處理

In [2]:
#把dataframe裡的text切成text左邊跟右邊並做一些處理的function
def split_text(df):
    df['left_text'] = 'N/A'
    df['right_text'] = 'N/A'
    
    for i in tqdm(range(len(df))):
        text = df.loc[i, 'text']
        aspect = df.loc[i, 'aspect']
        text_split = text.split(aspect) # 根據aspect切割text左右邊
        
        left_text = text_split[0]+aspect
        right_text = aspect+text_split[1]
        left_text = left_text.lower() # 把字串變成小寫
        right_text = right_text.lower()
        left_text = re.sub('-', ' ', left_text)
        right_text = re.sub('-', ' ', right_text)
        left_text = re.sub('[.,!"()#%&/:?~]', '', left_text) # 把字串中的一些符號刪除
        right_text = re.sub('[.,!"()#%&/:?~]', '', right_text)
        
        df.loc[i,'left_text'] = left_text
        df.loc[i,'right_text'] = right_text
        df.loc[i, 'left_right_text'] = left_text +' '+ right_text # 用來文字encoding
        
    return df

In [3]:
laptop_train = pd.read_csv('dataset/laptop_train_processed.csv', encoding='utf-8')
restaurant_train = pd.read_csv('dataset/restaurant_train_processed.csv', encoding='utf-8')
laptop_test = pd.read_csv('dataset/laptop_test_processed.csv', encoding='utf-8')
restaurant_test = pd.read_csv('dataset/restaurant_test_processed.csv', encoding='utf-8')

# 把train的資料串在一起
train_data = laptop_train.append(restaurant_train)
train_data = train_data.reset_index(drop=True)

#把test的資料串在一起
test_data = laptop_test.append(restaurant_test)
test_data = test_data.reset_index(drop=True)

#把train、test資料串在一起
data = train_data.append(test_data)
data = data.reset_index(drop=True)

# data切割text
data = split_text(data)

print('訓練資料集:', len(train_data))
print('測試資料集:', len(test_data))
print('所有資料集:', len(data))
data.head(10)

100%|██████████| 7673/7673 [00:02<00:00, 2763.08it/s]

訓練資料集: 5915
測試資料集: 1758
所有資料集: 7673





Unnamed: 0,text,aspect,polarity,left_text,right_text,left_right_text
0,I charge it at night and skip taking the cord ...,cord,neutral,i charge it at night and skip taking the cord,cord with me because of the good battery life,i charge it at night and skip taking the cord ...
1,I charge it at night and skip taking the cord ...,battery life,positive,i charge it at night and skip taking the cord ...,battery life,i charge it at night and skip taking the cord ...
2,The tech guy then said the service center does...,service center,negative,the tech guy then said the service center,service center does not do 1 to 1 exchange and...,the tech guy then said the service center serv...
3,The tech guy then said the service center does...,"""sales"" team",negative,the tech guy then said the service center does...,sales team which is the retail shop which i bo...,the tech guy then said the service center does...
4,The tech guy then said the service center does...,tech guy,neutral,the tech guy,tech guy then said the service center does not...,the tech guy tech guy then said the service ce...
5,"it is of high quality, has a killer GUI, is ex...",quality,positive,it is of high quality,quality has a killer gui is extremely stable i...,it is of high quality quality has a killer gui...
6,"it is of high quality, has a killer GUI, is ex...",GUI,positive,it is of high quality has a killer gui,gui is extremely stable is highly expandable i...,it is of high quality has a killer gui gui is ...
7,"it is of high quality, has a killer GUI, is ex...",applications,positive,it is of high quality has a killer gui is extr...,applications is easy to use and is absolutely ...,it is of high quality has a killer gui is extr...
8,"it is of high quality, has a killer GUI, is ex...",use,positive,it is of high quality has a killer gui is extr...,use and is absolutely gorgeous,it is of high quality has a killer gui is extr...
9,Easy to start up and does not overheat as much...,start up,positive,easy to start up,start up and does not overheat as much as othe...,easy to start up start up and does not overhea...


In [4]:
# print一個出來看看
n = 3
print(data.loc[n, 'text'])
print()
print(data.loc[n, 'left_text'])
print()
print(data.loc[n, 'right_text'])
print()
print(data.loc[n, 'left_right_text'])

The tech guy then said the service center does not do 1-to-1 exchange and I have to direct my concern to the "sales" team, which is the retail shop which I bought my netbook from.

the tech guy then said the service center does not do 1 to 1 exchange and i have to direct my concern to the sales team

sales team which is the retail shop which i bought my netbook from

the tech guy then said the service center does not do 1 to 1 exchange and i have to direct my concern to the sales team sales team which is the retail shop which i bought my netbook from


In [5]:
# 把文字Label變成數字label
data.loc[data['polarity'] == 'positive', 'label'] = 2
data.loc[data['polarity'] == 'neutral', 'label'] = 1
data.loc[data['polarity'] == 'negative', 'label'] = 0
data['label'] = data['label'].astype(int)

data.head(10)

Unnamed: 0,text,aspect,polarity,left_text,right_text,left_right_text,label
0,I charge it at night and skip taking the cord ...,cord,neutral,i charge it at night and skip taking the cord,cord with me because of the good battery life,i charge it at night and skip taking the cord ...,1
1,I charge it at night and skip taking the cord ...,battery life,positive,i charge it at night and skip taking the cord ...,battery life,i charge it at night and skip taking the cord ...,2
2,The tech guy then said the service center does...,service center,negative,the tech guy then said the service center,service center does not do 1 to 1 exchange and...,the tech guy then said the service center serv...,0
3,The tech guy then said the service center does...,"""sales"" team",negative,the tech guy then said the service center does...,sales team which is the retail shop which i bo...,the tech guy then said the service center does...,0
4,The tech guy then said the service center does...,tech guy,neutral,the tech guy,tech guy then said the service center does not...,the tech guy tech guy then said the service ce...,1
5,"it is of high quality, has a killer GUI, is ex...",quality,positive,it is of high quality,quality has a killer gui is extremely stable i...,it is of high quality quality has a killer gui...,2
6,"it is of high quality, has a killer GUI, is ex...",GUI,positive,it is of high quality has a killer gui,gui is extremely stable is highly expandable i...,it is of high quality has a killer gui gui is ...,2
7,"it is of high quality, has a killer GUI, is ex...",applications,positive,it is of high quality has a killer gui is extr...,applications is easy to use and is absolutely ...,it is of high quality has a killer gui is extr...,2
8,"it is of high quality, has a killer GUI, is ex...",use,positive,it is of high quality has a killer gui is extr...,use and is absolutely gorgeous,it is of high quality has a killer gui is extr...,2
9,Easy to start up and does not overheat as much...,start up,positive,easy to start up,start up and does not overheat as much as othe...,easy to start up start up and does not overhea...,2


In [6]:
#找出left_text跟right_text裡面最多是多少字
max_count = 0
for i in range(len(data)):
    left_text_word_count = len(data.loc[i,'left_text'].split())
    right_text_word_count = len(data.loc[i,'right_text'].split())
    big_count = max(left_text_word_count, right_text_word_count)
    if big_count>max_count:
        max_count = big_count
print('left_text與right_text最多的字數:', max_count)

left_text與right_text最多的字數: 72


### 對文字做encoding

In [7]:
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

Using TensorFlow backend.


In [8]:
max_words = 7000 # 最大的字數
max_seq_length = 80 # 句子最長長度
embedding_dim = 300 # 每個字維度

In [9]:
# 把字變成token
tokenizer = Tokenizer(num_words = max_words)
tokenizer.fit_on_texts(data['left_right_text'].to_numpy())

word_index = tokenizer.word_index
print('Found %s unique tokens.' % len(word_index))
# word_index就是根據left_right_text內容彙整出來的切字跟代表那個字的token number (每個字的dict)

Found 6557 unique tokens.


In [10]:
# 檢查word_index(dictionary)裡面的東西，前面是字，後面是token
for x in list(word_index)[0:10]:
    print (x, ':', word_index[x])

the : 1
and : 2
a : 3
to : 4
is : 5
i : 6
of : 7
for : 8
food : 9
it : 10


In [11]:
# 檢查其中一項字串的token
n = 0 # index number
left_text = data['left_text'].to_numpy() # 轉成向量
right_text = data['right_text'].to_numpy()
left_text_seq = tokenizer.texts_to_sequences(left_text)
right_text_seq = tokenizer.texts_to_sequences(right_text)
print(data.loc[n, 'left_text'])
print(data.loc[n, 'right_text'])
print(left_text_seq[n])
print(right_text_seq[n])
print(type(right_text_seq))
# 把右邊的字串token倒過來，因為要從後面讀到前面
print('right text 倒過來')
for i in range(len(right_text_seq)):
    right_text_seq[i] = right_text_seq[i][::-1]
print(left_text_seq[n])
print(right_text_seq[n])
print(type(right_text_seq))

i charge it at night and skip taking the cord
cord with me because of the good battery life
[6, 353, 10, 31, 267, 2, 1779, 899, 1, 1146]
[1146, 12, 56, 94, 7, 1, 26, 49, 90]
<class 'list'>
right text 倒過來
[6, 353, 10, 31, 267, 2, 1779, 899, 1, 1146]
[90, 49, 26, 1, 7, 94, 56, 12, 1146]
<class 'list'>


In [12]:
# token sequence 後面補0的方法
def text_seq_padding(text_seq):
    if len(text_seq) < max_seq_length:
        n = max_seq_length - len(text_seq)
        text_seq = np.pad(text_seq, (0, n), mode ='constant', constant_values=(0)) # array右邊append n 個 0
    return text_seq
# 把每個left_text_seq，right_text_seq padding到同樣的長度 (後面補0)
left_text_seq = [text_seq_padding(i) for i in left_text_seq] # 必須要 [ ] 輸出是list
left_text_seq = np.array(left_text_seq)

right_text_seq = [text_seq_padding(i) for i in right_text_seq]
right_text_seq = np.array(right_text_seq)

n = 0 # index number
print(left_text_seq[n])
print(right_text_seq[n])

[   6  353   10   31  267    2 1779  899    1 1146    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0]
[  90   49   26    1    7   94   56   12 1146    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0]


### 使用預先處理的詞向量 (crawl 300 dim)
#### https://fasttext.cc/docs/en/english-vectors.html

In [13]:
# import os
# import sys

In [14]:
# # 載入詞向量
# embeddings_index = {}
# file = open('dataset/crawl-300d-2M.vec', 'r', encoding='utf-8')
# for line in tqdm(file):
#     values = line.split()
#     word = values[0]
#     coefs = np.asarray(values[1:], dtype='float32')
#     embeddings_index[word] = coefs
# file.close()

# print('Found %s word vectors.' % len(embeddings_index))

In [15]:
# UNK = embeddings_index['UNK'] # unknown token
# print(UNK)

In [16]:
# # 根據得到的字典生成word_index裡每個字的詞向量
# real_word = 0
# embedding_matrix = np.zeros((len(word_index)+1, embedding_dim))       #預設一個全部都是0的matrix，總共有每一個unique token的數量
# for word, i in word_index.items():                                    #dict的index從1開始，所以np.zeros()數量要 +1
#     embedding_vector = embeddings_index.get(word)
#     if embedding_vector is not None:
#         embedding_matrix[i] = embedding_vector         #將找到的embedding vector丟到他位置的matrix, 如果找不到一樣維持0
#         real_word = real_word + 1 # 看真正有找到的詞有幾個
#     else:
#         embedding_matrix[i] = UNK
# print(embedding_matrix.shape)
# print(embedding_matrix)
# print('總共不重複的字數:', len(word_index))
# print('在字典裡找到的字數:', real_word)
# # embedding_matrix就是把word_index裡面的每個字所代表word embedding對應變成一個matrix (每個字的word embedding)

In [26]:
# 把embedding_matrix load 近來
embedding_matrix = np.load('dataset/embedding_matrix.npy')
print(type(embedding_matrix))
print(embedding_matrix.shape)
print(embedding_matrix)

<class 'numpy.ndarray'>
(6558, 300)
[[ 0.          0.          0.         ...  0.          0.
   0.        ]
 [ 0.0231      0.017       0.0157     ...  0.0744     -0.1118
   0.0963    ]
 [-0.1081      0.0191      0.0354     ...  0.1104      0.0475
  -0.0599    ]
 ...
 [ 0.16580001 -0.0169     -0.4138     ...  0.0933     -0.1168
  -0.1777    ]
 [-0.1179      0.0726     -0.005      ...  0.2079      0.0322
  -0.26879999]
 [ 0.24439999  0.1206      0.1123     ... -0.147      -0.0186
  -0.3204    ]]


### 確認資料、並切割成train、test

In [17]:
data.loc[[0]]

Unnamed: 0,text,aspect,polarity,left_text,right_text,left_right_text,label
0,I charge it at night and skip taking the cord ...,cord,neutral,i charge it at night and skip taking the cord,cord with me because of the good battery life,i charge it at night and skip taking the cord ...,1


In [18]:
data.loc[[5915]]

Unnamed: 0,text,aspect,polarity,left_text,right_text,left_right_text,label
5915,"Boot time is super fast, around anywhere from ...",Boot time,positive,boot time,boot time is super fast around anywhere from 3...,boot time boot time is super fast around anywh...,2


In [19]:
data.loc[[6553]]

Unnamed: 0,text,aspect,polarity,left_text,right_text,left_right_text,label
6553,The bread is top notch as well.,bread,positive,the bread,bread is top notch as well,the bread bread is top notch as well,2


In [20]:
# 稽查dataframe、token sequence裡面laptop_test、restaurant_test資料是否一致
# laptop_test第一筆在5915；restaurant_test第一筆在6553
print(data.loc[5915, 'left_text'])
print(data.loc[5915, 'right_text'])
print(left_text_seq[5915])
print(right_text_seq[5915])
print()
print(data.loc[6553, 'left_text'])
print(data.loc[6553, 'right_text'])
print(left_text_seq[6553])
print(right_text_seq[6553])

boot time
boot time is super fast around anywhere from 35 seconds to 1 minute
[500  98   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0]
[1318  434    4 1017 2018   44  844  261  139  532    5   98  500    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0    0    0    0    0
    0    0    0    0    0    0    0    0    0    0]

the bread
bread is top notch as well
[  1 309   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
   0 

In [21]:
# 把label轉成2維矩陣
Y = pd.get_dummies(data['label']).to_numpy() # label轉乘2維矩陣   # keras不吃1維label
print('Shape of Y:', Y.shape)
for i in range(10):
    print(data.loc[i, 'label'], Y[i])
#[1 0 0] = negative
#[0 1 0] = neutral
#[0 0 1] = positve

Shape of Y: (7673, 3)
1 [0 1 0]
2 [0 0 1]
0 [1 0 0]
0 [1 0 0]
1 [0 1 0]
2 [0 0 1]
2 [0 0 1]
2 [0 0 1]
2 [0 0 1]
2 [0 0 1]


In [22]:
#把資料切割成train、test
X_left_train = left_text_seq[:5915]
X_right_train = right_text_seq[:5915]
Y_train = Y[:5915]
X_left_test = left_text_seq[5915:]
X_right_test = right_text_seq[5915:]
Y_test = Y[5915:]
print(len(X_left_train), len(X_right_train), len(Y_train))
print(len(X_left_test), len(X_right_test), len(Y_test))

5915 5915 5915
1758 1758 1758


In [23]:
# 檢查polarity跟label有沒有不一樣
print('laptop_test', '         ','restaurant_test')
for i in range(20):
    print(laptop_test.loc[i, 'polarity'], data.loc[5915+i, 'label'], Y_test[i], '  ', restaurant_test.loc[i, 'polarity'], data.loc[6553+i, 'label'], Y_test[638+i])
    

laptop_test           restaurant_test
positive 2 [0 0 1]    positive 2 [0 0 1]
negative 0 [1 0 0]    positive 2 [0 0 1]
positive 2 [0 0 1]    positive 2 [0 0 1]
negative 0 [1 0 0]    positive 2 [0 0 1]
negative 0 [1 0 0]    positive 2 [0 0 1]
negative 0 [1 0 0]    positive 2 [0 0 1]
positive 2 [0 0 1]    positive 2 [0 0 1]
negative 0 [1 0 0]    positive 2 [0 0 1]
neutral 1 [0 1 0]    positive 2 [0 0 1]
positive 2 [0 0 1]    positive 2 [0 0 1]
positive 2 [0 0 1]    neutral 1 [0 1 0]
positive 2 [0 0 1]    positive 2 [0 0 1]
positive 2 [0 0 1]    positive 2 [0 0 1]
positive 2 [0 0 1]    positive 2 [0 0 1]
positive 2 [0 0 1]    negative 0 [1 0 0]
positive 2 [0 0 1]    positive 2 [0 0 1]
negative 0 [1 0 0]    neutral 1 [0 1 0]
negative 0 [1 0 0]    neutral 1 [0 1 0]
positive 2 [0 0 1]    positive 2 [0 0 1]
positive 2 [0 0 1]    positive 2 [0 0 1]


## Model

In [24]:
import tensorflow as tf

from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import LSTM, Activation, Dense, Dropout, Input, Embedding, Flatten, InputLayer, Bidirectional, concatenate, add, average, Reshape
from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing import sequence
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model

### 把兩邊input merge起來，有加上dropout的模型

In [28]:
# first input model 1
input_layer_1 = Input(shape = (max_seq_length,), dtype='int64')
embedding_1 = Embedding(len(word_index) + 1, embedding_dim, weights=[embedding_matrix], mask_zero=True, trainable=True)(input_layer_1)
lstm_hidden_1 = LSTM(512, return_sequences=False, dropout=0.3)(embedding_1) 
# lstm_hidden_1 = Bidirectional(LSTM(512, return_sequences=False, dropout=0.4))(embedding_1) 

#second input model 2
input_layer_2 = Input(shape = (max_seq_length,), dtype='int64')
embedding_2 = Embedding(len(word_index) + 1, embedding_dim, weights=[embedding_matrix], mask_zero=True, trainable=True)(input_layer_2)
lstm_hidden_2 = LSTM(512, return_sequences=False, dropout=0.3)(embedding_2)
# lstm_hidden_2 = Bidirectional(LSTM(512, return_sequences=False, dropout=0.4))(embedding_2)

#merge input model
merge = concatenate([lstm_hidden_1, lstm_hidden_2])
# dropout = Dropout(0.2)(merge)
hidden_1 = Dense(128, activation='relu')(merge)
dropout_1 = Dropout(0.2)(hidden_1)
hidden_2 = Dense(64, activation='relu')(dropout_1)
dropout_2 = Dropout(0.2)(hidden_2)
output = Dense(3, activation='softmax')(dropout_2)
model = Model(inputs=[input_layer_1, input_layer_2], outputs=output)
print(model.summary())
adam = Adam(lr=1e-3)
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=["accuracy"])
early_stopping = EarlyStopping(monitor='val_loss', patience=8, verbose=1, restore_best_weights=True)

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 80)]         0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            [(None, 80)]         0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 80, 300)      1967400     input_3[0][0]                    
__________________________________________________________________________________________________
embedding_2 (Embedding)         (None, 80, 300)      1967400     input_4[0][0]                    
______________________________________________________________________________________________

In [29]:
model_fit = model.fit([X_left_train, X_right_train],Y_train, batch_size=64,epochs=30,
                      validation_data=([X_left_test, X_right_test],Y_test), callbacks=[early_stopping])

Train on 5915 samples, validate on 1758 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 00010: early stopping


### 把兩邊input add or average起來，有加上dropout的模型

In [None]:
# first input model 1
input_layer_1 = Input(shape = (max_seq_length,), dtype='int64')
embedding_1 = Embedding(len(word_index) + 1, embedding_dim, weights=[embedding_matrix], mask_zero=True, trainable=True)(input_layer_1)
lstm_hidden_1 = LSTM(512, return_sequences=False, dropout=0.3)(embedding_1) 
# lstm_hidden_1 = Bidirectional(LSTM(512, return_sequences=False, dropout=0.4))(embedding_1) 

#second input model 2
input_layer_2 = Input(shape = (max_seq_length,), dtype='int64')
embedding_2 = Embedding(len(word_index) + 1, embedding_dim, weights=[embedding_matrix], mask_zero=True, trainable=True)(input_layer_2)
lstm_hidden_2 = LSTM(512, return_sequences=False, dropout=0.3)(embedding_2)
# lstm_hidden_2 = Bidirectional(LSTM(512, return_sequences=False, dropout=0.4))(embedding_2)

#merge input model
# added = add([lstm_hidden_1, lstm_hidden_2])
averaged = average([lstm_hidden_1, lstm_hidden_2])
# dropout = Dropout(0.2)(averaged)
hidden_1 = Dense(128, activation='relu')(averaged)
dropout_1 = Dropout(0.2)(hidden_1)
hidden_2 = Dense(64, activation='relu')(dropout_1)
dropout_2 = Dropout(0.2)(hidden_2)
output = Dense(3, activation='softmax')(dropout_2)
model1 = Model(inputs=[input_layer_1, input_layer_2], outputs=output)
print(model.summary())
adam = Adam(lr=1e-3)
model1.compile(loss='categorical_crossentropy', optimizer=adam, metrics=["accuracy"])
early_stopping = EarlyStopping(monitor='val_loss', patience=8, verbose=1, restore_best_weights=True)

In [None]:
model1_fit = model1.fit([X_left_train, X_right_train],Y_train, batch_size=64,epochs=30,
                      validation_data=([X_left_test, X_right_test],Y_test), callbacks=[early_stopping])

### 把兩邊input 丟到另一個LSTM，有加上dropout的模型

In [None]:
# first input model 1
input_layer_1 = Input(shape = (max_seq_length,), dtype='int64')
embedding_1 = Embedding(len(word_index) + 1, embedding_dim, weights=[embedding_matrix], mask_zero=True, trainable=True)(input_layer_1)
lstm_hidden_1 = LSTM(512, return_sequences=False, dropout=0.3)(embedding_1) 
# lstm_hidden_1 = Bidirectional(LSTM(512, return_sequences=False, dropout=0.4))(embedding_1) 

#second input model 2
input_layer_2 = Input(shape = (max_seq_length,), dtype='int64')
embedding_2 = Embedding(len(word_index) + 1, embedding_dim, weights=[embedding_matrix], mask_zero=True, trainable=True)(input_layer_2)
lstm_hidden_2 = LSTM(512, return_sequences=False, dropout=0.3)(embedding_2)
# lstm_hidden_2 = Bidirectional(LSTM(512, return_sequences=False, dropout=0.4))(embedding_2)

#merge input model
merge = concatenate([lstm_hidden_1, lstm_hidden_2])
merge = Reshape((2, 512))(merge)
print(merge.shape)
lstm_hidden_3 = LSTM(256, return_sequences=False)(merge)
# dropout = Dropout(0.2)(merge)
hidden_1 = Dense(128, activation='relu')(lstm_hidden_3)
dropout_1 = Dropout(0.2)(hidden_1)
hidden_2 = Dense(64, activation='relu')(dropout_1)
dropout_2 = Dropout(0.2)(hidden_2)
output = Dense(3, activation='softmax')(dropout_2)
model2 = Model(inputs=[input_layer_1, input_layer_2], outputs=output)
print(model.summary())
adam = Adam(lr=1e-3)
model2.compile(loss='categorical_crossentropy', optimizer=adam, metrics=["accuracy"])
early_stopping = EarlyStopping(monitor='val_loss', patience=8, verbose=1, restore_best_weights=True)

In [None]:
model1_fit = model2.fit([X_left_train, X_right_train],Y_train, batch_size=64,epochs=30,
                      validation_data=([X_left_test, X_right_test],Y_test), callbacks=[early_stopping])

### 看confusion matrix

In [30]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

### model

In [31]:
# 取所有label中的test label
Y_label = data['label'].to_numpy()[5915:]

# test data confusion matrix
predictions= model.predict([X_left_test, X_right_test]) # 輸出的是n*5的編碼值array
predictions = np.argmax(predictions, axis=1) # axis = 1是取行的最大值的索引，0是列的最大值的索引
predictions
print(accuracy_score(Y_label, predictions))
print(confusion_matrix(Y_label, predictions))
print(classification_report(Y_label, predictions))

0.7417519908987485
[[201  41  82]
 [ 71 123 171]
 [ 53  36 980]]
              precision    recall  f1-score   support

           0       0.62      0.62      0.62       324
           1       0.61      0.34      0.44       365
           2       0.79      0.92      0.85      1069

    accuracy                           0.74      1758
   macro avg       0.68      0.62      0.64      1758
weighted avg       0.72      0.74      0.72      1758



In [32]:
# 取所有label中的laptop test lable
laptop_label = data['label'].to_numpy()[5915:6553]

# laptop test data confusion matrix
predictions= model.predict([X_left_test[:638], X_right_test[:638]]) # 輸出的是n*5的編碼值array
predictions = np.argmax(predictions, axis=1) # axis = 1是取行的最大值的索引，0是列的最大值的索引
predictions
print(accuracy_score(laptop_label, predictions))
print(confusion_matrix(laptop_label, predictions))
print(classification_report(laptop_label, predictions))

0.6849529780564263
[[ 75  29  24]
 [ 41  68  60]
 [ 23  24 294]]
              precision    recall  f1-score   support

           0       0.54      0.59      0.56       128
           1       0.56      0.40      0.47       169
           2       0.78      0.86      0.82       341

    accuracy                           0.68       638
   macro avg       0.63      0.62      0.62       638
weighted avg       0.67      0.68      0.67       638



In [33]:
# 取所有label中的restaurant test lable
restaurant_label = data['label'].to_numpy()[6553:]

# restaurant test data confusion matrix
predictions= model.predict([X_left_test[638:], X_right_test[638:]]) # 輸出的是n*5的編碼值array
predictions = np.argmax(predictions, axis=1) # axis = 1是取行的最大值的索引，0是列的最大值的索引
predictions
print(accuracy_score(restaurant_label, predictions))
print(confusion_matrix(restaurant_label, predictions))
print(classification_report(restaurant_label, predictions))

0.7741071428571429
[[126  12  58]
 [ 30  55 111]
 [ 30  12 686]]
              precision    recall  f1-score   support

           0       0.68      0.64      0.66       196
           1       0.70      0.28      0.40       196
           2       0.80      0.94      0.87       728

    accuracy                           0.77      1120
   macro avg       0.73      0.62      0.64      1120
weighted avg       0.76      0.77      0.75      1120



### model1

In [None]:
# 取所有label中的test label
Y_label = data['label'].to_numpy()[5915:]

# test data confusion matrix
predictions= model1.predict([X_left_test, X_right_test]) # 輸出的是n*5的編碼值array
predictions = np.argmax(predictions, axis=1) # axis = 1是取行的最大值的索引，0是列的最大值的索引
predictions
print(accuracy_score(Y_label, predictions))
print(confusion_matrix(Y_label, predictions))
print(classification_report(Y_label, predictions))

In [None]:
# 取所有label中的laptop test lable
laptop_label = data['label'].to_numpy()[5915:6553]

# laptop test data confusion matrix
predictions= model1.predict([X_left_test[:638], X_right_test[:638]]) # 輸出的是n*5的編碼值array
predictions = np.argmax(predictions, axis=1) # axis = 1是取行的最大值的索引，0是列的最大值的索引
predictions
print(accuracy_score(laptop_label, predictions))
print(confusion_matrix(laptop_label, predictions))
print(classification_report(laptop_label, predictions))

In [None]:
# 取所有label中的restaurant test lable
restaurant_label = data['label'].to_numpy()[6553:]

# restaurant test data confusion matrix
predictions= model1.predict([X_left_test[638:], X_right_test[638:]]) # 輸出的是n*5的編碼值array
predictions = np.argmax(predictions, axis=1) # axis = 1是取行的最大值的索引，0是列的最大值的索引
predictions
print(accuracy_score(restaurant_label, predictions))
print(confusion_matrix(restaurant_label, predictions))
print(classification_report(restaurant_label, predictions))

### model2

In [None]:
# 取所有label中的test label
Y_label = data['label'].to_numpy()[5915:]

# test data confusion matrix
predictions= model2.predict([X_left_test, X_right_test]) # 輸出的是n*5的編碼值array
predictions = np.argmax(predictions, axis=1) # axis = 1是取行的最大值的索引，0是列的最大值的索引
predictions
print(accuracy_score(Y_label, predictions))
print(confusion_matrix(Y_label, predictions))
print(classification_report(Y_label, predictions))

In [None]:
# 取所有label中的laptop test lable
laptop_label = data['label'].to_numpy()[5915:6553]

# laptop test data confusion matrix
predictions= model2.predict([X_left_test[:638], X_right_test[:638]]) # 輸出的是n*5的編碼值array
predictions = np.argmax(predictions, axis=1) # axis = 1是取行的最大值的索引，0是列的最大值的索引
predictions
print(accuracy_score(laptop_label, predictions))
print(confusion_matrix(laptop_label, predictions))
print(classification_report(laptop_label, predictions))

In [None]:
# 取所有label中的restaurant test lable
restaurant_label = data['label'].to_numpy()[6553:]

# restaurant test data confusion matrix
predictions= model2.predict([X_left_test[638:], X_right_test[638:]]) # 輸出的是n*5的編碼值array
predictions = np.argmax(predictions, axis=1) # axis = 1是取行的最大值的索引，0是列的最大值的索引
predictions
print(accuracy_score(restaurant_label, predictions))
print(confusion_matrix(restaurant_label, predictions))
print(classification_report(restaurant_label, predictions))