In [46]:
import matplotlib.pyplot as plt 
import numpy as np
import random
import keras
from sklearn.model_selection import KFold

In [47]:
# Load training and testing text file
Data_train = np.loadtxt('training_data.txt', skiprows = 1)
X_train = Data_train[:, 1:]
y_train = Data_train[:, 0]
X_test = np.loadtxt("test_data.txt", skiprows = 1)
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)

X_train shape: (20000, 1000)
y_train shape: (20000,)
X_test shape: (10000, 1000)


In [48]:
# Normalize training and testing data
for i in range(len(X_train[0])):
    col = X_train[:, i]
    mean = np.mean(col)
    std = np.std(col)
    X_train[:, i] = (col - mean) / std
    X_test[:, i] = (X_test[:, i] - mean) / std

In [51]:
from sklearn.model_selection import KFold
kf = KFold(n_splits=3, shuffle=True, random_state=0)
in_index, out_index = list(kf.split(X_train))[0]
print("IN:", in_index, "OUT:", out_index)
X_in, X_out = X_train[in_index], X_train[out_index]
y_in, y_out = y_train[in_index], y_train[out_index]
print(X_in.shape, X_out.shape)

IN: [    0     1     2 ..., 19996 19997 19998] OUT: [    8     9    12 ..., 19990 19995 19999]
(13333, 1000) (6667, 1000)


In [52]:
out_index_1, out_index_2 = list(kf.split(X_out))[0]
print("OUT1:", out_index_1, "OUT2:", out_index_2)
X_out_1, X_out_2 = X_out[out_index_1], X_out[out_index_2]
y_out_1, y_out = y_out[out_index_1], y_out[out_index_2]
print(X_out_1.shape, X_out_2.shape)

OUT1: [   0    2    3 ..., 6663 6664 6666] OUT2: [   1    4    6 ..., 6660 6662 6665]
(4444, 1000) (2223, 1000)


In [55]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.layers import Conv2D, MaxPooling2D, Flatten, BatchNormalization
from keras import regularizers

# Build sequential model
model = Sequential()

# Hidden layers
model.add(Dense(100, activation="sigmoid", input_shape=(1000,)))
model.add(Dropout(0.2))
model.add(Dense(100, activation="sigmoid"))
model.add(Dropout(0.2))
model.add(Dense(50, activation="sigmoid"))

# Output layer
model.add(Dense(1, activation="sigmoid"))
# Print a summary
model.summary()
# Compile
model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy'])

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_53 (Dense)             (None, 100)               100100    
_________________________________________________________________
dropout_25 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_54 (Dense)             (None, 100)               10100     
_________________________________________________________________
dropout_26 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_55 (Dense)             (None, 50)                5050      
_________________________________________________________________
dense_56 (Dense)             (None, 1)                 51        
Total params: 115,301
Trainable params: 115,301
Non-trainable params: 0
_________________________________________________________________


In [56]:
fit = model.fit(X_train, y_train, batch_size=32, epochs=4, validation_split=0.2, shuffle=True, verbose=1)

Train on 16000 samples, validate on 4000 samples
Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [58]:
fit = model.fit(X_in, y_in, batch_size=32, epochs=5, shuffle=True, verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [57]:
def make_submission_file(pred, filename):
    super_threshold_indices = pred >= 0.5
    pred.fill(0)
    pred[super_threshold_indices] = 1

    submission = [[i+1, int(pred[i])] for i in range(len(pred))]
    submission.insert(0, ['Id','Prediction'])
    with open(filename, 'w') as f:
        for line in submission:
            f.write(','.join(map(str, line)) + '\n')

In [60]:
# Predict X_out_1 and X_out_2
pred1 = model.predict(X_out_1)
pred2 = model.predict(X_out_2)
make_submission_file(pred1, "out1_neuralnet.csv")
make_submission_file(pred2, "out2_neuralnet.csv")

In [61]:
# Train on the entire training dataset


# Build sequential model
model = Sequential()

# Hidden layers
model.add(Dense(100, activation="sigmoid", input_shape=(1000,)))
model.add(Dropout(0.2))
model.add(Dense(100, activation="sigmoid"))
model.add(Dropout(0.2))
model.add(Dense(50, activation="sigmoid"))

# Output layer
model.add(Dense(1, activation="sigmoid"))
# Print a summary
model.summary()
# Compile
model.compile(loss='binary_crossentropy',optimizer='adam', metrics=['accuracy'])
# Fit
fit = model.fit(X_train, y_train, batch_size=32, epochs=5, shuffle=True, verbose=1)

# Make predictions
pred = model.predict(X_test)
make_submission_file(pred, "test_neuralnet.csv")

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_57 (Dense)             (None, 100)               100100    
_________________________________________________________________
dropout_27 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_58 (Dense)             (None, 100)               10100     
_________________________________________________________________
dropout_28 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_59 (Dense)             (None, 50)                5050      
_________________________________________________________________
dense_60 (Dense)             (None, 1)                 51        
Total params: 115,301
Trainable params: 115,301
Non-trainable params: 0
_________________________________________________________________
Epoc

In [62]:
print(pred[0:5])

[[ 1.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]]
