# Baseline

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import sys
sys.path.insert(0, '..')

In [3]:
import os
import json
import jieba
import gensim
import collections
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf

In [4]:
%matplotlib inline

In [5]:
tf.enable_eager_execution()

In [26]:
from senti_analysis import config
from senti_analysis import preprocess
from senti_analysis.baseline import train_data

In [7]:
x_train, y_train, x_val, y_val = train_data()

x_train shape:(105000, 1500), y_train shape: (105000,)
x_val shape:(15000, 1500), y_val shape: (15000,)


In [41]:
tokenizer = preprocess.load_tokenizer()

In [29]:
wv_model = gensim.models.Word2Vec.load('w2v.model')

In [31]:
embedding_matrix = preprocess.initializer_embedding_matrix(tokenizer.word_index, wv_model)

In [46]:
from tensorflow.keras.initializers import Constant

In [49]:
def get_model(embedding_matrix, name='baseline_model'):
    """
    create model.
    :return: model
    """
    num_class = 4

    inputs = tf.keras.layers.Input(shape=(config.MAX_SEQUENCE_LENGTH,))
    embedding = tf.keras.layers.Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1],
                                          embeddings_initializer=Constant(embedding_matrix),
                                          input_length=config.MAX_SEQUENCE_LENGTH,
                                          trainable=False)(inputs)
    hidden = tf.keras.layers.GRU(64, activation='relu', return_sequences=True)(embedding)
    hidden = tf.keras.layers.GRU(32, activation='relu')(hidden)
    outputs = tf.keras.layers.Dense(num_class, activation='softmax')(hidden)

    model = tf.keras.Model(inputs=inputs, outputs=outputs, name=name)

    return model


In [50]:
model = get_model(embedding_matrix)

In [51]:
model.summary()

Model: "baseline_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_9 (InputLayer)         [(None, 1500)]            0         
_________________________________________________________________
embedding_8 (Embedding)      (None, 1500, 100)         25315500  
_________________________________________________________________
gru_7 (GRU)                  (None, 1500, 64)          31680     
_________________________________________________________________
gru_8 (GRU)                  (None, 32)                9312      
_________________________________________________________________
dense_3 (Dense)              (None, 4)                 132       
Total params: 25,356,624
Trainable params: 41,124
Non-trainable params: 25,315,500
_________________________________________________________________


In [52]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['acc'])

In [55]:
# history = model.fit(x_train, y_train, batch_size=32, epochs=10, verbose=1,
#                         validation_data=(x_val, y_val))