In [1]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv1D, MaxPooling1D, LSTM
from keras.utils import np_utils
from keras.preprocessing import sequence
from keras import optimizers, metrics, regularizers
from sklearn.metrics import accuracy_score

Using TensorFlow backend.


In [2]:
train_dataX = np.load('train_data.npy')
test_dataX = np.load('test_data.npy')
train_datay = pd.read_csv('train_labels.csv')

In [3]:
#pre-processing data
train_datay = train_datay.drop('Id',1)
train_dataX = train_dataX.reshape(-1,210,210)
test_dataX = test_dataX.reshape(-1,210,210)
Y_train = np_utils.to_categorical(train_datay,2)

#splitting data for validation
X_train, X_test, y_train, y_test = train_test_split(train_dataX,Y_train, test_size=0.10, shuffle=True)
X_test, X_valid, y_test, y_valid = train_test_split(X_test,y_test, test_size=0.2, shuffle=True)

#deleting unwanted variables
del train_dataX, train_datay

In [7]:
#Simple LSTM Model
#del model
model = Sequential()
model.add(LSTM(210, dropout=0.05, recurrent_dropout=0.35, return_sequences=True, input_shape=(210,210)))
model.add(LSTM(64,  dropout=0.05, recurrent_dropout=0.35, return_sequences=True,))
model.add(Flatten())
model.add(Dense(2,activation='softmax'))

model.summary()

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 4)                 3440      
_________________________________________________________________
dense_1 (Dense)              (None, 2)                 10        
Total params: 3,450
Trainable params: 3,450
Non-trainable params: 0
_________________________________________________________________


In [8]:
# compile model
sgd=optimizers.SGD(lr=0.01)
adam=optimizers.Adam(lr=0.01)
model.compile(loss='categorical_crossentropy', 
              optimizer=adam,
              metrics=['accuracy'])

In [10]:
model.fit(X_train, y_train,
          batch_size=500, nb_epoch=10,verbose=1,
          validation_data=(X_test, y_test))

  This is separate from the ipykernel package so we can avoid doing imports until


Train on 3599 samples, validate on 320 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2451b6e8630>

In [11]:
from sklearn.metrics import roc_auc_score

Y_test_predClass = model.predict_proba(X_test)


roc = roc_auc_score(y_test, Y_test_predClass)
print("ROC:" + str(round(roc,4)))

[[0.80634665 0.19365336]
 [0.8092882  0.19071181]
 [0.80880314 0.19119683]
 ...
 [0.80720437 0.19279568]
 [0.80846393 0.19153605]
 [0.80845386 0.19154611]]
(80,)


In [None]:
Y_test_pred = model.predict(test_dataX)
print(Y_test_pred)

In [12]:
pd.DataFrame(Y_test_pred[;,0]).to_csv('submission30.csv', header=["Label"], index_label='Id')
del model

**Using Embedding layers, to embedd the data**

In [2]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import LSTM
from keras.layers.embeddings import Embedding
from keras.utils import np_utils
from keras.preprocessing import sequence
from keras import optimizers, metrics, regularizers
from sklearn.metrics import accuracy_score

Using TensorFlow backend.


In [29]:
train_dataX = np.load('train_data.npy')
test_dataX = np.load('test_data.npy')
train_datay = pd.read_csv('train_labels.csv')

In [30]:
#pre-processing data
train_datay = train_datay.drop('Id',1)
train_dataX = train_dataX.reshape(-1,44100)
test_dataX = test_dataX.reshape(-1,44100)
Y_train = np_utils.to_categorical(train_datay,2)
print(train_dataX.shape)

#splitting data for validation
X_train, X_test, y_train, y_test = train_test_split(train_dataX,Y_train, test_size=0.10, shuffle=True)
X_test, X_valid, y_test, y_valid = train_test_split(X_test,y_test, test_size=0.2, shuffle=True)

#deleting unwanted variables
del train_dataX, train_datay

(3999, 44100)


In [34]:
#Simple LSTM Model
model = Sequential()
model.add(Embedding(10,4,input_length=44100))
model.add(LSTM(64, dropout=0.1,recurrent_dropout=0.1))
model.add(Dense(16,activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(2,activation='softmax'))

model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_9 (Embedding)      (None, 44100, 4)          40        
_________________________________________________________________
lstm_8 (LSTM)                (None, 64)                17664     
_________________________________________________________________
dense_11 (Dense)             (None, 16)                1040      
_________________________________________________________________
dropout_6 (Dropout)          (None, 16)                0         
_________________________________________________________________
dense_12 (Dense)             (None, 2)                 34        
Total params: 18,778
Trainable params: 18,778
Non-trainable params: 0
_________________________________________________________________


In [35]:
# compile model
sgd=optimizers.SGD(lr=0.01) 
adam=optimizers.Adam(lr=0.01) 
model.compile(loss='binary_crossentropy', optimizer=sgd, metrics=['accuracy'])
#model.compile(loss='categorical_crossentropy',optimizer=sgd,metrics=['accuracy'])

In [None]:
model.fit(X_train, y_train,
          batch_size=250, nb_epoch=10,verbose=1,
          validation_data=(X_test, y_test))

Instructions for updating:
Use tf.cast instead.


  This is separate from the ipykernel package so we can avoid doing imports until


Train on 3599 samples, validate on 320 samples
Epoch 1/10


In [None]:
Y_test_pred = model.predict(test_dataX)
Y_valid_pred = model.predict_classes(X_valid)

#accuracy = accuracy_score(y_valid,Y_valid_pred)
#print('accuracy score:' +str(accuracy))
print(Y_test_pred)
print(Y_valid_pred.shape)

In [None]:
del model
#manually edit csv file and delete the second coulmn and add header for "Id" column
pd.DataFrame(Y_test_pred).to_csv('submission30.csv', header=["Label", "Label"])