In [1]:
import pandas as pd
import numpy as np
from tensorflow import keras
train_total = pd.read_csv("train.csv", header = None)

# Feature engineering

## Split data into train and development dataset

In [2]:
from sklearn.model_selection import KFold
import numpy as np
from sklearn import utils
shuffle_data = utils.shuffle(train_total)
# k fold cross validation
feature = shuffle_data.iloc[:,1:-1]
label = shuffle_data.iloc[:,-1:]
kf = KFold(n_splits=10,shuffle=True)
count = 1
for train_index, dev_index in kf.split(shuffle_data):
    train_feature, dev_feature = np.array(feature.iloc[train_index]), np.array(feature.iloc[dev_index])
    train_label, dev_label = np.array(label.iloc[train_index].values.ravel()), np.array(label.iloc[dev_index].values.ravel())

## Handling of unbalanced data by oversampling

In [3]:
from imblearn.over_sampling import SMOTE
train_feature, train_label = SMOTE().fit_resample(train_feature,train_label)

## reshape the format of data that can be used in deep learning model

In [4]:
def reshape_feature(feature):
    reshape_f = []
    for f in feature:
        reshape_f.append(f.reshape(16,60))
    reshape_f = np.array(reshape_f)
    return reshape_f
def generate_label(label):
    new_label = []
    for l in label:
        new_label.append(l-1)
    return np.array(new_label)

In [5]:
reshape_train_feature = reshape_feature(train_feature)
reshape_dev_feature = reshape_feature(dev_feature)
reshape_train_label = keras.utils.to_categorical(generate_label(train_label))
reshape_dev_label = keras.utils.to_categorical(generate_label(dev_label))

# Recurrent Neural Network(RNN)

In [17]:
from tensorflow.keras import initializers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dropout, LSTM, Dense,Activation,TimeDistributed,Masking,Concatenate
from tensorflow.keras.callbacks import EarlyStopping,CSVLogger,ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras import backend as K
from tensorflow.python.keras.layers import Layer
from tensorflow.python.keras import layers

In [18]:
class MeanOverTime(Layer):
    """Average the score of every step"""
    def __init__(self, **kwargs):
        self.supports_masking = True

        super(MeanOverTime, self).__init__(**kwargs)

    def call(self, inputs, mask=None):
        x = K.sum(inputs, axis=1)
        mask = K.cast(mask, K.floatx())
        mask = K.sum(mask, axis=1, keepdims=True)
        output = x / mask
        return output

    def compute_output_shape(self, input_shape):
        return (input_shape[0], input_shape[2])

    def compute_mask(self, x, mask):
        return None

    def get_config(self):
        config = {}
        base_config = super(MeanOverTime, self).get_config()
        return dict(list(base_config.items()))

In [19]:
class Noise(Layer):
    """Add Guassian Noise"""

    def __init__(self, sigma, **kwargs):
        self.supports_masking = True
        self.sigma = sigma
        self.uses_learning_phase = True
        super(Noise, self).__init__(**kwargs)

    def compute_mask(self, input, mask):
        return mask

    def call(self, x, mask=None, training=None):
        m = K.not_equal(x, 0.)
        noise_x = x + K.random_normal(shape=K.shape(x),
                                      mean=0.,
                                      stddev=self.sigma)
        noise_x = noise_x * K.cast(m, K.floatx())

        return K.in_train_phase(noise_x, x, training=training)

    def get_config(self):
        config = {'sigma': self.sigma}
        base_config = super(Noise, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [20]:
def create_model(input_shape, num_class):

    init = initializers.Orthogonal(gain=0.001)
    sequence_input =Input(shape=input_shape)
    mask = Masking(mask_value=0.)(sequence_input)
    X = Noise(0.075)(mask)

    X = LSTM(100,recurrent_activation='sigmoid',return_sequences=True,implementation=2,recurrent_initializer=init)(X)
    X = Dropout(0.5)(X)
    X = LSTM(100,recurrent_activation='sigmoid',return_sequences=True,implementation=2,recurrent_initializer=init)(X)
    X = Dropout(0.5)(X)
    X = LSTM(100,recurrent_activation='sigmoid',return_sequences=True,implementation=2,recurrent_initializer=init)(X)
    X = Dropout(0.5)(X)
    X = TimeDistributed(Dense(num_class))(X)
    X = MeanOverTime()(X)
    X = Activation('softmax')(X)
    
    model=Model(sequence_input,X)
    return model

In [21]:
rnn = create_model((16,60), 49)

In [22]:
optimizer = Adam(lr=0.001, clipnorm=1.)

early_stop = EarlyStopping(monitor='val_accuracy', patience=15, mode='auto')
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', factor=0.1, patience=5, mode='auto', cooldown=3., verbose=1)
callbacks_list = [early_stop, reduce_lr]

In [23]:
rnn.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

In [24]:
rnn.fit(reshape_train_feature, reshape_train_label, validation_data=(reshape_dev_feature, reshape_dev_label), epochs=200,
          batch_size=256, callbacks=callbacks_list, verbose=1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200

Epoch 00037: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200

Epoch 00045: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200

Epoch 00052: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 53/200
Epoch 54/200
Epoch 55/200


<tensorflow.python.keras.callbacks.History at 0x1cb5779c688>

# Support Vector Machines(SVM)

In [25]:
from sklearn.preprocessing import StandardScaler
def generate_feature(data):
    scalar = StandardScaler()
    final_feature = []
    for row in data:
        final_feature.append(process_data(row))
    return scalar.fit_transform(np.array(final_feature))

def process_data(data):
    processed_data = []
    time_data = data.reshape(16,60)
    mean = []
    median = []
    std = []
    for i in range(60):
        mean.append(np.mean(time_data[:,i]))
        median.append(np.median(time_data[:,i]))
        std.append(np.std(time_data[:,i]))

    processed_data = mean + median + std
    return processed_data

In [26]:
svm_train_feature = generate_feature(train_feature)
svm_dev_feature = generate_feature(dev_feature)

In [27]:
def accuracy(predict, gold):
    acc = 0
    for pos in range(len(predict)):
        if(predict[pos] ==  gold[pos]):
            acc += 1
    accuracy = acc / len(predict)
    return accuracy

In [28]:
from sklearn import svm
SVM = svm.SVC()
SVM.fit(svm_train_feature, train_label)
predict = SVM.predict(svm_dev_feature)
opredict = SVM.predict(svm_train_feature)

In [29]:
print(accuracy(predict, dev_label))
print(accuracy(opredict, train_label))

0.3720682302771855
0.72770113632045


# Convolution Neural Network(CNN)

In [6]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
import torch
import torch.nn as nn
from transformers import AdamW
import random
import numpy as np
from numpy import argmax

In [7]:
batch_size = 64

cnn_train_label = train_label[:] - 1
train_tensor = torch.Tensor(reshape_train_feature)
train_label_tensor = torch.LongTensor(np.array(cnn_train_label))
train_data = TensorDataset(train_tensor,train_label_tensor)
train_loader = DataLoader(train_data, sampler=RandomSampler(train_data), batch_size=batch_size)
 
cnn_dev_label = dev_label[:] - 1
dev_tensor = torch.Tensor(reshape_dev_feature)
dev_label_tensor = torch.LongTensor(np.array(cnn_dev_label))
dev_t_data = TensorDataset(dev_tensor, dev_label_tensor)
dev_loader = DataLoader(dev_t_data, shuffle=False, batch_size=batch_size)


## Construction of network

In [8]:
class Conv_MLP(nn.Module):
    def __init__(self, input_dim, output_dim, kernel_size, dropout):
        super().__init__()
        self.conv1 = nn.Conv1d(input_dim, 300, kernel_size=kernel_size)
        self.maxpool1 = nn.MaxPool1d(kernel_size=14)
        self.relu = nn.ReLU()
        self.layer = nn.Linear(300, 100)
        self.predictor = nn.Linear(100, output_dim)
        self.dropout = nn.Dropout(dropout)
    def forward(self, data):
        data = data.permute(0, 2, 1)
        conv_layer = self.conv1(data)
        out = self.relu(conv_layer)
        out = self.maxpool1(out).squeeze(2)
        out = self.layer(out)
        out = self.relu(out)
        out = self.dropout(out)
        result = self.predictor(out)
        return result

In [9]:
input_dim = 60
output_dim = 49
kernel_size = 3
dropout_rate = 0.1
cnn = Conv_MLP(input_dim, output_dim,kernel_size, dropout_rate)
cnn.cuda()

Conv_MLP(
  (conv1): Conv1d(60, 300, kernel_size=(3,), stride=(1,))
  (maxpool1): MaxPool1d(kernel_size=14, stride=14, padding=0, dilation=1, ceil_mode=False)
  (relu): ReLU()
  (layer): Linear(in_features=300, out_features=100, bias=True)
  (predictor): Linear(in_features=100, out_features=49, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
)

In [10]:
# optimizer 
learning_rate = 1e-3
optimizer = AdamW(cnn.parameters(), lr = learning_rate, eps=1e-8)
# loss
criterion = nn.CrossEntropyLoss()
criterion.cuda()

CrossEntropyLoss()

In [11]:
def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

In [12]:
# Train the Model
epochs = 100
seed_val = 36
best_valid_score = 0

random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)

training_stats = [] # used to store the training information


for epoch_i in range(epochs):
    
    print("")
    print("======== Epoch {:} / {:} ========".format(epoch_i + 1, epochs))
    print("Training...")
    
    total_train_loss = 0
    
    cnn.train()
    
    for step, batch in enumerate(train_loader):
        
        if (step + 1) % 10 == 0 and not step == 0:
            print("Batch {} of {}".format(step+1, len(train_loader)))
        
        batch_input_data = batch[0].cuda()
        batch_labels = batch[1].cuda()


        optimizer.zero_grad()
        logits = cnn(batch_input_data)
        loss = criterion(logits, batch_labels)
        total_train_loss += loss.item()
        loss.backward()
        
        optimizer.step()
        
    
    avg_train_loss = total_train_loss / len(train_loader)
    
    print("")
    print(" Average Training Loss is {:2f}".format(avg_train_loss))
    
    # Now perform validation
    
    print("")
    print("Running Validation...")
    
    cnn.eval()
    
    total_eval_accuracy = 0
    total_eval_loss = 0
    
    for batch in dev_loader:
        dev_input_data = batch[0].cuda()
        dev_labels = batch[1].cuda()
        
        with torch.no_grad():
            logits = cnn(dev_input_data)
            
            loss = criterion(logits, dev_labels)

        total_eval_loss += loss.item()
        preds = logits.detach().cpu().numpy()
        
        label_ids = dev_labels.cpu().numpy()
        
        total_eval_accuracy += flat_accuracy(preds, label_ids)

    avg_val_loss = total_eval_loss / len(dev_loader)
    avg_val_accuracy = total_eval_accuracy / len(dev_loader)
    training_stats.append(avg_val_loss)
    print("Validation loss :{}".format(avg_val_loss))
    print("Accuracy is {}".format(avg_val_accuracy))
    
    if avg_val_accuracy > best_valid_score:
        best_valid_score = avg_val_accuracy
        torch.save(cnn.state_dict(), 'model.pt')
    
    
print("")
print("Training Complete!...")
print("best validation score", best_valid_score)


Training...
Batch 10 of 412
Batch 20 of 412
Batch 30 of 412
Batch 40 of 412
Batch 50 of 412
Batch 60 of 412
Batch 70 of 412
Batch 80 of 412
Batch 90 of 412
Batch 100 of 412
Batch 110 of 412
Batch 120 of 412
Batch 130 of 412
Batch 140 of 412
Batch 150 of 412
Batch 160 of 412
Batch 170 of 412
Batch 180 of 412
Batch 190 of 412
Batch 200 of 412
Batch 210 of 412
Batch 220 of 412
Batch 230 of 412
Batch 240 of 412
Batch 250 of 412
Batch 260 of 412
Batch 270 of 412
Batch 280 of 412
Batch 290 of 412
Batch 300 of 412
Batch 310 of 412
Batch 320 of 412
Batch 330 of 412
Batch 340 of 412
Batch 350 of 412
Batch 360 of 412
Batch 370 of 412
Batch 380 of 412
Batch 390 of 412
Batch 400 of 412
Batch 410 of 412

 Average Training Loss is 3.511074

Running Validation...
Validation loss :3.2817334175109862
Accuracy is 0.15590277777777778

Training...
Batch 10 of 412
Batch 20 of 412
Batch 30 of 412
Batch 40 of 412
Batch 50 of 412
Batch 60 of 412
Batch 70 of 412
Batch 80 of 412
Batch 90 of 412
Batch 100 of 41

Batch 340 of 412
Batch 350 of 412
Batch 360 of 412
Batch 370 of 412
Batch 380 of 412
Batch 390 of 412
Batch 400 of 412
Batch 410 of 412

 Average Training Loss is 1.540021

Running Validation...
Validation loss :3.344744396209717
Accuracy is 0.2873015873015873

Training...
Batch 10 of 412
Batch 20 of 412
Batch 30 of 412
Batch 40 of 412
Batch 50 of 412
Batch 60 of 412
Batch 70 of 412
Batch 80 of 412
Batch 90 of 412
Batch 100 of 412
Batch 110 of 412
Batch 120 of 412
Batch 130 of 412
Batch 140 of 412
Batch 150 of 412
Batch 160 of 412
Batch 170 of 412
Batch 180 of 412
Batch 190 of 412
Batch 200 of 412
Batch 210 of 412
Batch 220 of 412
Batch 230 of 412
Batch 240 of 412
Batch 250 of 412
Batch 260 of 412
Batch 270 of 412
Batch 280 of 412
Batch 290 of 412
Batch 300 of 412
Batch 310 of 412
Batch 320 of 412
Batch 330 of 412
Batch 340 of 412
Batch 350 of 412
Batch 360 of 412
Batch 370 of 412
Batch 380 of 412
Batch 390 of 412
Batch 400 of 412
Batch 410 of 412

 Average Training Loss is 1.484202

R

Batch 140 of 412
Batch 150 of 412
Batch 160 of 412
Batch 170 of 412
Batch 180 of 412
Batch 190 of 412
Batch 200 of 412
Batch 210 of 412
Batch 220 of 412
Batch 230 of 412
Batch 240 of 412
Batch 250 of 412
Batch 260 of 412
Batch 270 of 412
Batch 280 of 412
Batch 290 of 412
Batch 300 of 412
Batch 310 of 412
Batch 320 of 412
Batch 330 of 412
Batch 340 of 412
Batch 350 of 412
Batch 360 of 412
Batch 370 of 412
Batch 380 of 412
Batch 390 of 412
Batch 400 of 412
Batch 410 of 412

 Average Training Loss is 1.207837

Running Validation...
Validation loss :3.6811784426371257
Accuracy is 0.3296626984126984

Training...
Batch 10 of 412
Batch 20 of 412
Batch 30 of 412
Batch 40 of 412
Batch 50 of 412
Batch 60 of 412
Batch 70 of 412
Batch 80 of 412
Batch 90 of 412
Batch 100 of 412
Batch 110 of 412
Batch 120 of 412
Batch 130 of 412
Batch 140 of 412
Batch 150 of 412
Batch 160 of 412
Batch 170 of 412
Batch 180 of 412
Batch 190 of 412
Batch 200 of 412
Batch 210 of 412
Batch 220 of 412
Batch 230 of 412
Bat

Batch 70 of 412
Batch 80 of 412
Batch 90 of 412
Batch 100 of 412
Batch 110 of 412
Batch 120 of 412
Batch 130 of 412
Batch 140 of 412
Batch 150 of 412
Batch 160 of 412
Batch 170 of 412
Batch 180 of 412
Batch 190 of 412
Batch 200 of 412
Batch 210 of 412
Batch 220 of 412
Batch 230 of 412
Batch 240 of 412
Batch 250 of 412
Batch 260 of 412
Batch 270 of 412
Batch 280 of 412
Batch 290 of 412
Batch 300 of 412
Batch 310 of 412
Batch 320 of 412
Batch 330 of 412
Batch 340 of 412
Batch 350 of 412
Batch 360 of 412
Batch 370 of 412
Batch 380 of 412
Batch 390 of 412
Batch 400 of 412
Batch 410 of 412

 Average Training Loss is 1.080103

Running Validation...
Validation loss :3.5988982200622557
Accuracy is 0.3697420634920635

Training...
Batch 10 of 412
Batch 20 of 412
Batch 30 of 412
Batch 40 of 412
Batch 50 of 412
Batch 60 of 412
Batch 70 of 412
Batch 80 of 412
Batch 90 of 412
Batch 100 of 412
Batch 110 of 412
Batch 120 of 412
Batch 130 of 412
Batch 140 of 412
Batch 150 of 412
Batch 160 of 412
Batch 

Batch 370 of 412
Batch 380 of 412
Batch 390 of 412
Batch 400 of 412
Batch 410 of 412

 Average Training Loss is 1.023679

Running Validation...
Validation loss :4.050914510091146
Accuracy is 0.3697420634920635

Training...
Batch 10 of 412
Batch 20 of 412
Batch 30 of 412
Batch 40 of 412
Batch 50 of 412
Batch 60 of 412
Batch 70 of 412
Batch 80 of 412
Batch 90 of 412
Batch 100 of 412
Batch 110 of 412
Batch 120 of 412
Batch 130 of 412
Batch 140 of 412
Batch 150 of 412
Batch 160 of 412
Batch 170 of 412
Batch 180 of 412
Batch 190 of 412
Batch 200 of 412
Batch 210 of 412
Batch 220 of 412
Batch 230 of 412
Batch 240 of 412
Batch 250 of 412
Batch 260 of 412
Batch 270 of 412
Batch 280 of 412
Batch 290 of 412
Batch 300 of 412
Batch 310 of 412
Batch 320 of 412
Batch 330 of 412
Batch 340 of 412
Batch 350 of 412
Batch 360 of 412
Batch 370 of 412
Batch 380 of 412
Batch 390 of 412
Batch 400 of 412
Batch 410 of 412

 Average Training Loss is 0.958569

Running Validation...
Validation loss :4.19981349309

Batch 140 of 412
Batch 150 of 412
Batch 160 of 412
Batch 170 of 412
Batch 180 of 412
Batch 190 of 412
Batch 200 of 412
Batch 210 of 412
Batch 220 of 412
Batch 230 of 412
Batch 240 of 412
Batch 250 of 412
Batch 260 of 412
Batch 270 of 412
Batch 280 of 412
Batch 290 of 412
Batch 300 of 412
Batch 310 of 412
Batch 320 of 412
Batch 330 of 412
Batch 340 of 412
Batch 350 of 412
Batch 360 of 412
Batch 370 of 412
Batch 380 of 412
Batch 390 of 412
Batch 400 of 412
Batch 410 of 412

 Average Training Loss is 0.877722

Running Validation...
Validation loss :4.024225775400797
Accuracy is 0.3474206349206349

Training...
Batch 10 of 412
Batch 20 of 412
Batch 30 of 412
Batch 40 of 412
Batch 50 of 412
Batch 60 of 412
Batch 70 of 412
Batch 80 of 412
Batch 90 of 412
Batch 100 of 412
Batch 110 of 412
Batch 120 of 412
Batch 130 of 412
Batch 140 of 412
Batch 150 of 412
Batch 160 of 412
Batch 170 of 412
Batch 180 of 412
Batch 190 of 412
Batch 200 of 412
Batch 210 of 412
Batch 220 of 412
Batch 230 of 412
Batc

Batch 410 of 412

 Average Training Loss is 0.874347

Running Validation...
Validation loss :4.187334966659546
Accuracy is 0.35406746031746034

Training...
Batch 10 of 412
Batch 20 of 412
Batch 30 of 412
Batch 40 of 412
Batch 50 of 412
Batch 60 of 412
Batch 70 of 412
Batch 80 of 412
Batch 90 of 412
Batch 100 of 412
Batch 110 of 412
Batch 120 of 412
Batch 130 of 412
Batch 140 of 412
Batch 150 of 412
Batch 160 of 412
Batch 170 of 412
Batch 180 of 412
Batch 190 of 412
Batch 200 of 412
Batch 210 of 412
Batch 220 of 412
Batch 230 of 412
Batch 240 of 412
Batch 250 of 412
Batch 260 of 412
Batch 270 of 412
Batch 280 of 412
Batch 290 of 412
Batch 300 of 412
Batch 310 of 412
Batch 320 of 412
Batch 330 of 412
Batch 340 of 412
Batch 350 of 412
Batch 360 of 412
Batch 370 of 412
Batch 380 of 412
Batch 390 of 412
Batch 400 of 412
Batch 410 of 412

 Average Training Loss is 0.942872

Running Validation...
Validation loss :4.4152694384257
Accuracy is 0.367609126984127

Training...
Batch 10 of 412
Batch

Batch 240 of 412
Batch 250 of 412
Batch 260 of 412
Batch 270 of 412
Batch 280 of 412
Batch 290 of 412
Batch 300 of 412
Batch 310 of 412
Batch 320 of 412
Batch 330 of 412
Batch 340 of 412
Batch 350 of 412
Batch 360 of 412
Batch 370 of 412
Batch 380 of 412
Batch 390 of 412
Batch 400 of 412
Batch 410 of 412

 Average Training Loss is 0.915341

Running Validation...
Validation loss :4.1399356047312414
Accuracy is 0.35615079365079366

Training...
Batch 10 of 412
Batch 20 of 412
Batch 30 of 412
Batch 40 of 412
Batch 50 of 412
Batch 60 of 412
Batch 70 of 412
Batch 80 of 412
Batch 90 of 412
Batch 100 of 412
Batch 110 of 412
Batch 120 of 412
Batch 130 of 412
Batch 140 of 412
Batch 150 of 412
Batch 160 of 412
Batch 170 of 412
Batch 180 of 412
Batch 190 of 412
Batch 200 of 412
Batch 210 of 412
Batch 220 of 412
Batch 230 of 412
Batch 240 of 412
Batch 250 of 412
Batch 260 of 412
Batch 270 of 412
Batch 280 of 412
Batch 290 of 412
Batch 300 of 412
Batch 310 of 412
Batch 320 of 412
Batch 330 of 412
Ba

Batch 90 of 412
Batch 100 of 412
Batch 110 of 412
Batch 120 of 412
Batch 130 of 412
Batch 140 of 412
Batch 150 of 412
Batch 160 of 412
Batch 170 of 412
Batch 180 of 412
Batch 190 of 412
Batch 200 of 412
Batch 210 of 412
Batch 220 of 412
Batch 230 of 412
Batch 240 of 412
Batch 250 of 412
Batch 260 of 412
Batch 270 of 412
Batch 280 of 412
Batch 290 of 412
Batch 300 of 412
Batch 310 of 412
Batch 320 of 412
Batch 330 of 412
Batch 340 of 412
Batch 350 of 412
Batch 360 of 412
Batch 370 of 412
Batch 380 of 412
Batch 390 of 412
Batch 400 of 412
Batch 410 of 412

 Average Training Loss is 0.817860

Running Validation...
Validation loss :5.0459115664164225
Accuracy is 0.3535714285714286

Training...
Batch 10 of 412
Batch 20 of 412
Batch 30 of 412
Batch 40 of 412
Batch 50 of 412
Batch 60 of 412
Batch 70 of 412
Batch 80 of 412
Batch 90 of 412
Batch 100 of 412
Batch 110 of 412
Batch 120 of 412
Batch 130 of 412
Batch 140 of 412
Batch 150 of 412
Batch 160 of 412
Batch 170 of 412
Batch 180 of 412
Batc

Batch 410 of 412

 Average Training Loss is 0.798525

Running Validation...
Validation loss :5.3887608528137205
Accuracy is 0.36929563492063494

Training...
Batch 10 of 412
Batch 20 of 412
Batch 30 of 412
Batch 40 of 412
Batch 50 of 412
Batch 60 of 412
Batch 70 of 412
Batch 80 of 412
Batch 90 of 412
Batch 100 of 412
Batch 110 of 412
Batch 120 of 412
Batch 130 of 412
Batch 140 of 412
Batch 150 of 412
Batch 160 of 412
Batch 170 of 412
Batch 180 of 412
Batch 190 of 412
Batch 200 of 412
Batch 210 of 412
Batch 220 of 412
Batch 230 of 412
Batch 240 of 412
Batch 250 of 412
Batch 260 of 412
Batch 270 of 412
Batch 280 of 412
Batch 290 of 412
Batch 300 of 412
Batch 310 of 412
Batch 320 of 412
Batch 330 of 412
Batch 340 of 412
Batch 350 of 412
Batch 360 of 412
Batch 370 of 412
Batch 380 of 412
Batch 390 of 412
Batch 400 of 412
Batch 410 of 412

 Average Training Loss is 0.841189

Running Validation...
Validation loss :5.617785390218099
Accuracy is 0.35094246031746035

Training...
Batch 10 of 412


Batch 220 of 412
Batch 230 of 412
Batch 240 of 412
Batch 250 of 412
Batch 260 of 412
Batch 270 of 412
Batch 280 of 412
Batch 290 of 412
Batch 300 of 412
Batch 310 of 412
Batch 320 of 412
Batch 330 of 412
Batch 340 of 412
Batch 350 of 412
Batch 360 of 412
Batch 370 of 412
Batch 380 of 412
Batch 390 of 412
Batch 400 of 412
Batch 410 of 412

 Average Training Loss is 0.794793

Running Validation...
Validation loss :5.084347279866536
Accuracy is 0.33690476190476193

Training...
Batch 10 of 412
Batch 20 of 412
Batch 30 of 412
Batch 40 of 412
Batch 50 of 412
Batch 60 of 412
Batch 70 of 412
Batch 80 of 412
Batch 90 of 412
Batch 100 of 412
Batch 110 of 412
Batch 120 of 412
Batch 130 of 412
Batch 140 of 412
Batch 150 of 412
Batch 160 of 412
Batch 170 of 412
Batch 180 of 412
Batch 190 of 412
Batch 200 of 412
Batch 210 of 412
Batch 220 of 412
Batch 230 of 412
Batch 240 of 412
Batch 250 of 412
Batch 260 of 412
Batch 270 of 412
Batch 280 of 412
Batch 290 of 412
Batch 300 of 412
Batch 310 of 412
Bat

In [13]:
predictions = []
model = Conv_MLP(input_dim, output_dim,kernel_size, dropout_rate)
model.load_state_dict(torch.load('model.pt'))
model.eval()

# test_nor_data = scalar.fit_transform(test_data)

# test_nor_data = test_nor_data.reshape((-1, 16 ,60))

# test_tensor = torch.Tensor(test_nor_data)


# test_t_data = TensorDataset(test_tensor)

# test_loader = DataLoader(test_t_data, shuffle=False, batch_size=2959)

# mylist = []
# for ele in test_loader:
#     output = model(ele[0])
#     preds = output.detach().cpu().numpy()
#     mylist.append(np.argmax(preds, axis=1).flatten())

Conv_MLP(
  (conv1): Conv1d(60, 300, kernel_size=(3,), stride=(1,))
  (maxpool1): MaxPool1d(kernel_size=14, stride=14, padding=0, dilation=1, ceil_mode=False)
  (relu): ReLU()
  (layer): Linear(in_features=300, out_features=100, bias=True)
  (predictor): Linear(in_features=100, out_features=49, bias=True)
  (dropout): Dropout(p=0.1, inplace=False)
)

# Using bagging to get the final result

In [47]:
models = [rnn, SVM, cnn]
model_name = ["rnn", "svm", "cnn"]
model_name = np.array(model_name)

In [49]:
test = pd.read_csv('test.csv', header=None)
test_feature = np.array(test.iloc[:,1:])

In [50]:
test_feature.shape

(2959, 960)

In [69]:
def final_prediction(models, model_name, test_feature):
    reshape_test_feature = reshape_feature(test_feature)
    all_predictions = []
    for i in range(len(models)):
        if(model_name[i] == "rnn"):
            result = models[i].predict(reshape_test_feature)
            final_result = []
            for i in result:
                final_result.append(i.tolist().index(max(i))+1)
            all_predictions.append(final_result)
        elif(model_name[i] == "svm"):
            svm_test_feature = generate_feature(test_feature)
            result = models[i].predict(svm_test_feature)
            all_predictions.append(result)
        elif(model_name[i] == "cnn"):
            model = Conv_MLP(input_dim, output_dim,kernel_size, dropout_rate)
            model.load_state_dict(torch.load('model.pt'))
            test_tensor = torch.Tensor(reshape_test_feature)
            test_t_data = TensorDataset(test_tensor)
            test_loader = DataLoader(test_t_data, shuffle=False, batch_size=2959)
            final_result = []
            for ele in test_loader:
                output = model(ele[0])
                preds = output.detach().cpu().numpy()
                final_result.append(np.argmax(preds, axis=1).flatten())
            print(final_result)
            all_predictions.append(final_result[0])
    prediction = bagging(all_predictions)
    return prediction

def bagging(labels):
    final_predict = []
    for i in range(len(labels[0])):
        one_row = []
        for l in labels:
            one_row.append(l[i])
        maxlabel = max(one_row, key = one_row.count)
        final_predict.append(maxlabel)
    return final_predict

In [None]:
prediction = final_prediction(models, model_name, test_feature)

In [None]:
import csv
def out_file(result):
    out_f = open("predict_f.csv", 'w', newline='')
    writer = csv.writer(out_f)
    head = ["ID", "Category"]
    writer.writerow(head)
    count = 0
    for i in result:
        writer.writerow([count, i+1])
        count += 1

In [None]:
out_file(prediction)