In [104]:
import pandas as pd
import numpy as np
import random

import tensorflow as tf

from tensorflow.keras.layers import Dense, InputLayer, GlobalMaxPool1D, Dropout, Conv1D, MaxPool1D, Flatten, Embedding
from tensorflow.keras.models import Sequential
from tensorflow.keras.activations import relu, sigmoid
from tensorflow.keras.optimizers import Adam, SGD

from sklearn.model_selection import StratifiedKFold
from sklearn import metrics
from sklearn.model_selection import train_test_split

In [109]:
def configure_nn(data,
                layers=None, 
                dropout_rate=0,
                kernel_size=10,
                stride=10,
                pool_size=2,
                optimizer='Adam',
                loss='binary_crossentropy',
                 kernel_initializer='lecun_normal',
                 kernel_regularizer=tf.keras.regularizers.L2(0.01)
                ):
    """ Layers argument shape:
    [[number of nodes, activate function], 
    [number of nodes, activate function],
    ...]
    
    """
    input_len = data.shape[1] - 1 
    model = Sequential()
    model.add(InputLayer(input_len,))
    model.add(Embedding(10000, 20))
    model.add(Conv1D(32, 3, padding='same', activation='relu'))
    model.add(MaxPool1D(3))
    model.add(Conv1D(64, 3, activation='relu'))
    model.add(MaxPool1D(pool_size=3))
    model.add(Conv1D(128, 3, activation='relu'))
    model.add(GlobalMaxPool1D())
    model.add(Flatten())
    
    if dropout_rate > 0:
        model.add(Dropout(dropout_rate))
    
    if layers != None:
        for node in layers[1:]:
            model.add(Dense(node[0], activation=node[1], kernel_initializer=kernel_initializer, 
                            kernel_regularizer=kernel_regularizer))
    model.add(Dense(10, activation='sigmoid'))
    model.add(Dense(1, activation='sigmoid', kernel_initializer=kernel_initializer, 
                    kernel_regularizer=kernel_regularizer))
    
    model.compile(loss=loss, 
                 optimizer=optimizer,
                 metrics=['accuracy'])
    
    return model

In [110]:
model = configure_nn(data)

In [111]:
model.summary()

Model: "sequential_19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_18 (Embedding)     (None, 38, 20)            200000    
_________________________________________________________________
conv1d_25 (Conv1D)           (None, 38, 32)            1952      
_________________________________________________________________
max_pooling1d_22 (MaxPooling (None, 12, 32)            0         
_________________________________________________________________
conv1d_26 (Conv1D)           (None, 10, 64)            6208      
_________________________________________________________________
max_pooling1d_23 (MaxPooling (None, 3, 64)             0         
_________________________________________________________________
conv1d_27 (Conv1D)           (None, 1, 128)            24704     
_________________________________________________________________
global_max_pooling1d_3 (Glob (None, 128)             

In [79]:
data = pd.read_csv('../tokenized_data.csv')

In [59]:
pd.re

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,sentiment
0,0,0,0,0,0,1189,369,3135,38,325,82,1037,553,2424,9704,1
1,0,0,0,0,0,0,0,22,293,3,12,63,1,810,561,1
2,0,0,0,0,0,0,0,0,23,313,3,13,333,774,6227,0
3,0,0,0,0,0,0,0,0,0,0,0,379,293,1167,3643,1
4,0,0,0,0,0,0,0,0,0,0,8,72,18,4949,687,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159995,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
159996,0,0,0,0,1,2178,27,794,263,270,9,36,52,3523,624,0
159997,0,0,0,0,0,0,0,0,901,297,137,318,5,781,31,0
159998,0,1251,475,1010,1626,2231,348,1051,558,202,98,339,4,6843,50,0


In [88]:
features = data.columns.tolist()
features.remove('sentiment')

In [89]:
X_train, X_test, y_train, y_test = train_test_split(data[features], data['sentiment'], test_size=0.33, random_state=42)

In [100]:
model.fit(X_train, y_train, epochs=100, verbose=1,
         validation_data=(X_test, y_test))

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
 514/3350 [===>..........................] - ETA: 19s - loss: 0.5308 - accuracy: 0.7037

KeyboardInterrupt: 