In [223]:
from __future__ import absolute_import
from __future__ import print_function

import numpy as np
import pandas as pd
np.random.seed(1337)  # for reproducibility

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.layers.normalization import BatchNormalization
from keras.layers.advanced_activations import PReLU
from keras.optimizers import SGD
from keras.utils import np_utils, generic_utils

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler


import math
import ast

import h5py
import sys

'''
    This demonstrates how to reach a score of 0.4890 (local validation)
    on the Kaggle Otto challenge, with a deep net using Keras.
    Compatible Python 2.7-3.4. Requires Scikit-Learn and Pandas.
    Recommended to run on GPU: 
        Command: THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python kaggle_otto_nn.py
        On EC2 g2.2xlarge instance: 19s/epoch. 6-7 minutes total training time.
    Best validation score at epoch 21: 0.4881 
    Try it at home:
        - with/without BatchNormalization (BatchNormalization helps!)
        - with ReLU or with PReLU (PReLU helps!)
        - with smaller layers, largers layers
        - with more layers, less layers
        - with different optimizers (SGD+momentum+decay is probably better than Adam!)
    Get the data from Kaggle: https://www.kaggle.com/c/otto-group-product-classification-challenge/data
'''




def load_data(path, train=True):
    df = pd.read_csv(path)
    X = df.values.copy()
    if train:
        np.random.shuffle(X)  # https://youtu.be/uyUXoap67N8
        X, labels = X[:, 1:-1].astype(np.float32), X[:, -1]
        return X, labels
    else:
        X, ids = X[:, 1:].astype(np.float32), X[:, 0].astype(str)
        return X, ids


def preprocess_data(X, scaler=None):
    if not scaler:
        scaler = StandardScaler()
        scaler.fit(X)
    X = scaler.transform(X)
    return X, scaler


def preprocess_labels(labels, encoder=None, categorical=True):
    if not encoder:
        encoder = LabelEncoder()
        encoder.fit(labels)
    y = encoder.transform(labels).astype(np.int32)
    if categorical:
        y = np_utils.to_categorical(y)
    return y, encoder


def make_submission(y_prob, ids, encoder, fname):
    with open(fname, 'w') as f:
        f.write('id,')
        f.write(','.join([str(i) for i in encoder.classes_]))
        f.write('\n')
        for i, probs in zip(ids, y_prob):
            probas = ','.join([i] + [str(p) for p in probs.tolist()])
            f.write(probas)
            f.write('\n')
    print("Wrote submission to file {}.".format(fname))

In [4]:
X, labels = load_data('train.csv', train=True)
X, scaler = preprocess_data(X)
y, encoder = preprocess_labels(labels)

X_test, ids = load_data('test.csv', train=False)
X_test, _ = preprocess_data(X_test, scaler)

nb_classes = y.shape[1]
print(nb_classes, 'classes')

dims = X.shape[1]
print(dims, 'dims')


print("Building model...")

9 classes
93 dims
Building model...


In [50]:
input_array = np.loadtxt("input_array.csv")
output_array = np.loadtxt("output_array.csv")

num_floors = int(np.ptp(output_array[:, 0], axis=0) + 1)

iden_mat = np.eye(num_floors, dtype=int)
binarized_output_array = []
for floor in output_array[:, 0]:
    binarized_output_array.append(iden_mat[int(floor)])

In [79]:
mean_center_scale = 0.5
X = (-mean_center_scale + 2*mean_center_scale*(input_array - np.min(input_array))/float(np.ptp(input_array)))
y = np.asarray(binarized_output_array)

print(y.shape[1])

4


In [251]:
model = Sequential()
model.add(Dense(X.shape[1], 250, init='glorot_normal'))
# model.add(PReLU((512,)))
model.add(Activation('tanh'))
# model.add(BatchNormalization((512,)))
model.add(Dropout(0.5))

# model.add(Dense(512, 512, init='glorot_uniform'))
# model.add(PReLU((512,)))
# model.add(BatchNormalization((512,)))
# model.add(Dropout(0.5))

# model.add(Dense(512, 512, init='glorot_uniform'))
# model.add(PReLU((512,)))
# model.add(BatchNormalization((512,)))
# model.add(Dropout(0.5))

model.add(Dense(250, 4, init='glorot_normal'))
model.add(Activation('softmax'))

In [252]:
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer="adam")

# model.compile(loss='categorical_crossentropy', optimizer="adam")

In [284]:
print("Training model...")

# batch_size=128, validation_split=0.15

model.fit(X, y, nb_epoch=10, show_accuracy=True)
# model.train_on_batch(X, y)

Training model...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1b095690>

In [276]:
proba = model.predict_proba(X)
print(proba)
# make_submission(proba, ids, encoder, fname='keras-otto.csv')

[[  9.29960823e-04   4.55543153e-03   8.81465149e-01   1.13049459e-01]
 [  9.29960823e-04   4.55543153e-03   8.81465149e-01   1.13049459e-01]
 [  4.16154954e-04   2.88406250e-03   9.81641298e-01   1.50584843e-02]
 ..., 
 [  3.03535377e-04   9.99585184e-01   8.93365070e-05   2.19437944e-05]
 [  3.03535377e-04   9.99585184e-01   8.93365070e-05   2.19437944e-05]
 [  1.08939053e-03   9.98473812e-01   3.69948793e-04   6.68487662e-05]]


In [26]:
print(proba[0])

[  2.42564689e-06   1.98431562e-01   2.21497101e-01   5.78015024e-01
   3.34626706e-11   2.87196675e-06   2.05075527e-03   2.28725031e-07
   3.04342314e-08]


In [151]:
model.save_weights("test3.hdf5")

In [123]:
f = h5py.File('test.hdf5','r')

In [124]:
f

<HDF5 file "test.hdf5" (mode r)>

In [125]:
for fin,fkey in f.iteritems():
    print(fin)
    print(fkey)
    

layer_0
<HDF5 group "/layer_0" (2 members)>
layer_1
<HDF5 group "/layer_1" (0 members)>
layer_2
<HDF5 group "/layer_2" (2 members)>
layer_3
<HDF5 group "/layer_3" (0 members)>


In [111]:
f.name
f['layer_0'].

<HDF5 group "/layer_0" (2 members)>

In [112]:
f.visit(print)

layer_0
layer_0/param_0
layer_0/param_1
layer_1
layer_2
layer_2/param_0
layer_2/param_1
layer_3


In [113]:
f.visititems(lambda name,obj:print(name, obj))

layer_0 <HDF5 group "/layer_0" (2 members)>
layer_0/param_0 <HDF5 dataset "param_0": shape (541, 250), type "<f8">
layer_0/param_1 <HDF5 dataset "param_1": shape (250,), type "<f8">
layer_1 <HDF5 group "/layer_1" (0 members)>
layer_2 <HDF5 group "/layer_2" (2 members)>
layer_2/param_0 <HDF5 dataset "param_0": shape (250, 4), type "<f8">
layer_2/param_1 <HDF5 dataset "param_1": shape (4,), type "<f8">
layer_3 <HDF5 group "/layer_3" (0 members)>


In [115]:
# create a sparse array from the sample/matrix group
data = f['layer_0/param_1'].value
# indices = f['sample/matrix/indices'].value
# indptr = f['sample/matrix/indptr'].value
# sample = sp.csr_matrix((data,indices,indptr))
# display as a dense array
# print(sample.A)
print(data)

[ -1.90197961e-03   1.18766944e-02   2.40259374e-03  -3.56830892e-03
  -1.29862597e-02  -1.64156936e-05  -1.34127999e-04  -4.55241356e-03
   2.21322759e-03  -4.47395665e-03  -2.22056617e-03  -5.70677936e-03
  -6.10961176e-04   6.19648562e-03   1.88218657e-03  -9.00871802e-03
   8.28027491e-04  -1.91418119e-04  -1.75501312e-03   6.71617942e-05
   3.61379984e-03  -5.52694515e-04   4.73527548e-03  -8.57505648e-03
  -1.73284098e-03   1.29134331e-03  -1.18414024e-03  -3.27039478e-03
  -1.60713380e-03   1.78908248e-03   6.67603857e-04   1.02940916e-03
   3.39556222e-04  -9.01556729e-04   4.98067540e-03  -9.90481648e-04
   7.43468951e-03  -6.67229799e-03  -7.03490331e-03   3.01043442e-03
   2.42478874e-04   6.63993751e-03  -5.18203854e-03   3.39509949e-03
   1.44741881e-03   3.82907100e-03   3.23554482e-03   1.45010590e-03
   6.01245762e-03   6.71767367e-03  -3.17853818e-03   1.40380215e-03
   1.13291908e-03  -4.50581454e-03   9.93930846e-04  -1.38145228e-03
  -8.67824368e-05   1.04442368e-03

In [197]:
len(model.layers[0].get_weights())

2

In [198]:
rng = np.random.RandomState(1234)

In [211]:
#initializing the weights

num_input = 541
num_output = 4
num_hidden = 1000

w1 = np.asarray(rng.uniform(
    low=-np.sqrt(6./(num_input + num_hidden)),
    high=np.sqrt(6./(num_input + num_hidden)),
    size=(num_input, num_hidden)
    
))

w2 = np.asarray(rng.uniform(
    low=-np.sqrt(6./(num_hidden + num_output)),
    high=np.sqrt(6./(num_hidden + num_output)),
    size=(num_hidden, num_output)
    
))

#initializing the bias vectors

b1 = np.asarray(rng.uniform(
    low=-1.,
    high=1.,
    size=(1, num_hidden)
    
))

b2 = np.asarray(rng.uniform(
    low=-1.,
    high=1.,
    size=(1, num_output)
    
))

In [208]:
index = 300
input = np.asarray(input_array[index])

In [203]:
input_array = np.loadtxt("input_array.csv")
output_array = np.loadtxt("output_array.csv")

In [218]:
import time

In [285]:
start_time = time.time()

mean_center_scale = 0.5
input = (-mean_center_scale + 2*mean_center_scale*(input - np.min(input))/float(np.ptp(input)))
#calculating the output of the hidden layer

z1= input.dot(w1) + b1
a1 = np.tanh(z1)
#calculating the output of the output layer

z2 = a1.dot(w2) + b2

#softmax function
exp_scores = np.exp(z2)
probs = exp_scores/np.sum(exp_scores)
print(np.argmax(probs))


end_time = time.time()
print("time taken to execute the code:", (end_time - start_time), "secs")

2
time taken to execute the code: 0.0398778915405 secs


2

In [215]:
index = 300
print(output_array[index])

[   2.  791.  139.]


In [235]:
np.savetxt("weight1.csv", w1)