In [24]:
import csv
import numpy
import os
import pandas as pd
from numpy import mean
from numpy import std
from numpy import dstack
from pandas import read_csv
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import TimeDistributed
# from tensorflow.keras.layers.convolutional import Conv1D
# from tensorflow.keras.layers.convolutional import MaxPooling1D
# from tensorflow.keras.layers import ConvLSTM2D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import load_model
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler

In [25]:
train_original = pd.read_csv("train.csv", header = None)
test = pd.read_csv("test.csv", header = None)
# Splitting training data into train and validation set
X = train_original.iloc[:, :961]
y = train_original.iloc[:,961] # also the class labels of the training set (unmodified)

In [18]:
def loadTrainingData():    
    reader = csv.reader(open("train.csv", "r"), delimiter=",")
    dataList = list(reader)
    OriginalDataTable = numpy.array(dataList).astype("float")
    
    XdataLoaded = loadDataInto3DArray(OriginalDataTable)
    YdataLoaded = numpy.transpose(OriginalDataTable[:, 961])
    
    X_train, X_test, y_train, y_test = train_test_split(
    XdataLoaded, YdataLoaded, test_size=0.33, random_state=42)
    
    # zero-offset class values
    y_train = y_train - 1
    y_test = y_test - 1
    # one hot encode y
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)
    
    print(X_train.shape)
    print(X_test.shape)
    print(y_train.shape)
    print(y_test.shape)
    
    #print(y_train)
        
    return X_train, X_test, y_train, y_test

In [26]:
def randomOversampling():
    ros = RandomOverSampler(random_state = 42)
    X_ros, y_ros = ros.fit_resample(X,y)
    train_X_ros, validation_X_ros, train_y_ros, validation_y_ros = train_test_split(X_ros, y_ros, test_size = 0.2, random_state = 1, stratify = y_ros)

    return train_X_ros, validation_X_ros, train_y_ros, validation_y_ros
    

In [20]:
def loadPredictionData():    
    reader = csv.reader(open("test.csv", "r"), delimiter=",")
    dataList = list(reader)
    OriginalDataTable = numpy.array(dataList).astype("float")
    
    XdataLoaded = loadDataInto3DArray(OriginalDataTable)
    
    #print(XdataLoaded.shape)
        
    return XdataLoaded

In [21]:
def loadDataInto3DArray(OriginalDataTable):
    XdataLoaded = list()
    for x in range(0, 60):
        Xarray = OriginalDataTable[:,(1+x):(902+x):60]
        XdataLoaded.append(Xarray)
    
    XdataLoaded = dstack(XdataLoaded)
    
    return XdataLoaded
    

In [22]:
# fit and evaluate a model
def evaluate_model(trainX, testX, trainy,  testy):
    verbose, epochs, batch_size = 0, 15, 64
    n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]
    model = Sequential()
    model.add(LSTM(300, return_sequences=True, input_shape=(n_timesteps,n_features)))
    model.add(LSTM(300))
    model.add(Dropout(0.5))
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    # fit network
    model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
    # evaluate model
    _, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
    model.save('lstm_model.h5')
    return accuracy

In [53]:
def evaluate_model_WithConvLSTM(trainX, testX, trainy,  testy):
    # define model
    verbose, epochs, batch_size = 0, 25, 64
    n_timesteps, n_features, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1]
    # reshape into subsequences (samples, time steps, rows, cols, channels)
    n_steps, n_length = 2, 8
    trainX = trainX.reshape((trainX.shape[0], n_steps, 1, n_length, n_features))
    testX = testX.reshape((testX.shape[0], n_steps, 1, n_length, n_features))
    # define model
    model = Sequential()
    model.add(ConvLSTM2D(filters=64, kernel_size=(1,3), activation='relu', input_shape=(n_steps, 1, n_length, n_features)))
    model.add(Dropout(0.5))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    # fit network
    model.fit(trainX, trainy, epochs=epochs, batch_size=batch_size, verbose=verbose)
    # evaluate model
    _, accuracy = model.evaluate(testX, testy, batch_size=batch_size, verbose=0)
    model.save('lstm_modelConcLSTM.h5')
    return accuracy

In [9]:
def makePredictionsWithLSTM():
    X = loadPredictionData()
    model = load_model('lstm_model.h5')
    # make predictions
    yHat = model.predict(X, verbose=0)
    yHat = numpy.argmax(yHat, axis=1)
    yHat = yHat + 1
    print(yHat.shape)
    numpy.savetxt("predictionsWithLSTM.csv", numpy.dstack((numpy.arange(0, yHat.size),yHat))[0],"%d,%d",header="Id,Category", comments='')

In [55]:
def makePredictionsWithConvLSTM():
    X = loadPredictionData()
    model = load_model('lstm_model.h5')
    # make predictions
    yHat = model.predict(X, verbose=0)
    yHat = numpy.argmax(yHat, axis=1)
    yHat = yHat + 1
    print(yHat.shape)
    numpy.savetxt("predictionsWithConvLSTM.csv", numpy.dstack((numpy.arange(0, yHat.size),yHat))[0],"%d,%d",header="Id,Category", comments='')

In [14]:
def startTheClassification():
    X_train, X_test, y_train, y_test = randomOversampling()
    score = evaluate_model(X_train, X_test, y_train, y_test)
    print(score)

In [57]:
def startTheClassificationWithConvLSTM():
    X_train, X_test, y_train, y_test = loadTrainingData()
    score = evaluate_model_WithConvLSTM(X_train, X_test, y_train, y_test)
    print(score)

In [27]:
startTheClassification()

IndexError: tuple index out of range

In [58]:
startTheClassificationWithConvLSTM()

(6289, 16, 60)
(3099, 16, 60)
(6289, 49)
(3099, 49)
0.2491126169876423


In [None]:
makePredictionsWithLSTM()

In [None]:
makePredictionsWithConvLSTM()