load all needed lib

In [76]:
import pandas as pd
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
import os
from sklearn.model_selection import train_test_split

In [77]:
# load a list of files into a 3D array of [samples, timesteps, features]
def load_file(filepath):
	dataframe = pd.read_csv(filepath)
	return dataframe.values

In [78]:
dirs = os.listdir('./ready')
d = dict(enumerate(dirs))
d_swap = {v: k for k, v in d.items()}
d_swap

{'hot': 0, 'I': 1, 'like': 2, 'weather': 3, 'you': 4}

In [79]:
x = []
y = []
dirs = os.listdir('./ready')
maxtimestep = 0

for dir in dirs:
    files = os.listdir('./ready/'+dir)
    for file in files:
        loaded = load_file('./ready/'+dir+'/'+file)
        # get the maximum time step shape for padding
        if loaded.shape[0]>maxtimestep:
            maxtimestep = loaded.shape[0]
        
        # remove the first(time index) and last column(nan)
        loaded = loaded[:,1:-1]
        x.append(loaded)
        y.append(d_swap[dir])

for i in range(len(x)):
    if len(x[i]) < maxtimestep:
        difference = maxtimestep - len(x[i])
        d1 = difference//2
        d2 = difference - d1
        x[i] = np.concatenate([np.zeros((d1,30)),x[i], np.zeros((d2,30))], axis=0)
        
x = np.array(x)
y = np.array(y)
print(x.shape, y.shape)



(370, 115, 30) (370,)


In [80]:
# load data set and split into training and testing inputs (X) and outputs (y)

# nornmalize data
from sklearn.preprocessing import StandardScaler
scalar = StandardScaler()
tran_x = []
for i in range(len(x)):
    scalar = scalar.fit(x[i])
    tran_x.append(scalar.transform(x[i]))
x = np.array(tran_x)

trainX, testX, trainy, testy = train_test_split(x,y, train_size=0.8, random_state=1111)
trainy = keras.utils.to_categorical(trainy)
testy = keras.utils.to_categorical(testy)
print(trainX.shape, testX.shape, trainy.shape, testy.shape)

(296, 115, 30) (74, 115, 30) (296, 5) (74, 5)


In [81]:
def train_model5(trainX, trainy, testX, testy):
	verbose, epochs, batch_size = 1, 10, 32
	n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]
	model = Sequential()
	model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps,n_features)))
	# model.add(keras.layers.LSTM(64, input_shape=(n_timesteps,n_features)))
	model.add(Dropout(0.5))
	model.add(MaxPooling1D(pool_size=3))
	model.add(Flatten())
	model.add(Dense(100, activation='relu'))
	model.add(Dense(n_outputs, activation='softmax'))
	model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

	# Define the early stopping callback
	early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, verbose=1, mode='min')
	# fit network
	model.fit(trainX, trainy, epochs=epochs, validation_split=0.2, batch_size=batch_size, verbose=verbose, callbacks=[early_stop])
	return model

In [82]:
model = train_model5(trainX,trainy,testX,testy)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [83]:
model.summary()
model.save('./')

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_6 (Conv1D)           (None, 113, 64)           5824      
                                                                 
 dropout_6 (Dropout)         (None, 113, 64)           0         
                                                                 
 max_pooling1d_6 (MaxPooling  (None, 37, 64)           0         
 1D)                                                             
                                                                 
 flatten_6 (Flatten)         (None, 2368)              0         
                                                                 
 dense_12 (Dense)            (None, 100)               236900    
                                                                 
 dense_13 (Dense)            (None, 5)                 505       
                                                      



INFO:tensorflow:Assets written to: ./assets


INFO:tensorflow:Assets written to: ./assets


In [84]:
# evaluating test set
print(testX.shape,testy.shape)
model.evaluate(testX, testy, batch_size=32)

(74, 115, 30) (74, 5)


[0.009110916405916214, 1.0]