# Clasificación de Puerta (Cerrada o Abierta)

In [1]:
import pandas as pd
import numpy as np

from sklearn.cross_validation import KFold, train_test_split
from sklearn.metrics import log_loss, confusion_matrix
from sklearn.utils import shuffle

import matplotlib.pyplot as plt

from keras import backend as K
from keras.callbacks import EarlyStopping, Callback
from keras.utils import np_utils
from keras import optimizers
from keras import regularizers
from keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from keras import optimizers
from keras.models import Sequential, model_from_json
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D, Activation, Dropout, Flatten, Dense, Reshape, InputLayer

Using TensorFlow backend.


## Parameters

In [2]:
i = 4
testing = False
##Hyper
batch_size = 32
epochs = 150

### image dimensions
img_width, img_height = 190, 200
num_channels = 3

## Read Data

In [3]:
df_train = pd.read_csv('data/train.csv')
df_test = pd.read_csv('data/test.csv')

In [4]:
df_train['estado'] = df_train['estado'].apply(lambda x: int(x=='cerrada'))
df_train.head(2)

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,...,X37993,X37994,X37995,X37996,X37997,X37998,X37999,X38000,estado,hora
0,140,138,135,136,136,133,135,136,135,135,...,126,127,126,126,126,126,126,126,1,2015-07-23 05:40:19
1,140,138,136,137,136,135,136,137,137,137,...,126,126,126,126,126,126,126,126,1,2015-07-23 05:40:36


In [5]:
df_test.set_index('id', inplace=True)
df_test.head(2)

Unnamed: 0_level_0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,...,X37992,X37993,X37994,X37995,X37996,X37997,X37998,X37999,X38000,hora
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,153,154,156,153,150,151,149,150,149,147,...,223,223,223,222,222,221,220,220,219,2015-07-30 09:51:32
2,151,151,153,150,148,149,146,147,148,145,...,218,219,219,219,217,218,218,217,216,2015-07-30 09:51:48


In [6]:
# Borrar estado y hora de X
x_columnas = [x for x in df_train.columns if x not in ['estado', 'hora']]

if testing:
    X = df_train[x_columnas].copy()
    y = df_train['estado'].copy()
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
else:
    X_train = df_train[x_columnas].copy()
    y_train = df_train['estado'].copy()
    X_test = df_test[x_columnas].copy()
    
X_train = X_train.values.reshape(len(X_train), img_width, img_height, 1) / 255
X_test = X_test.values.reshape(len(X_test), img_width, img_height, 1) / 255

In [7]:
def build_model_A():
    np.random.seed(128212)
    model = Sequential()
    model.add(Convolution2D(50, (3, 3), input_shape=(img_width, img_height, 1)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.5))

    #model.add(Convolution2D(64, (3, 3)))
    #model.add(Activation('relu'))
    #model.add(MaxPooling2D(pool_size=(2, 2)))
    #model.add(Dropout(0.25))

    #model.add(Convolution2D(64, (3, 3)))
    #model.add(Activation('relu'))
    #model.add(MaxPooling2D(pool_size=(2, 2)))
    
    model.add(Convolution2D(32, (3, 3)))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
              
    model.add(Flatten())
    
    #model.add(Dense(32, input_dim=32,
                #kernel_regularizer=regularizers.l2(0.001),
    #            activity_regularizer=regularizers.l2(0.001)))
    
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1))
    model.add(Activation('sigmoid'))
    
    sgd = optimizers.SGD(lr=0.01, decay=0.01)
    model.compile(optimizer= sgd,
                  loss='binary_crossentropy',
                  metrics = ['accuracy','binary_crossentropy'])

    if testing:
        model.fit(X_train, y_train,
              epochs=epochs,
              batch_size=batch_size,
              validation_data = [X_test, y_test],
              verbose=2)
    else:
        model.fit(X_train, y_train,
                  epochs=epochs,
                  batch_size=batch_size,
                  verbose=2)
    
    return model

In [8]:
model = build_model_A()

Epoch 1/150
30s - loss: 0.7667 - acc: 0.5475 - binary_crossentropy: 0.7667
Epoch 2/150
29s - loss: 0.6467 - acc: 0.6100 - binary_crossentropy: 0.6467
Epoch 3/150
29s - loss: 0.6241 - acc: 0.6275 - binary_crossentropy: 0.6241
Epoch 4/150
35s - loss: 0.6009 - acc: 0.6675 - binary_crossentropy: 0.6009
Epoch 5/150
34s - loss: 0.5969 - acc: 0.6500 - binary_crossentropy: 0.5969
Epoch 6/150
35s - loss: 0.5697 - acc: 0.7225 - binary_crossentropy: 0.5697
Epoch 7/150
36s - loss: 0.5382 - acc: 0.7525 - binary_crossentropy: 0.5382
Epoch 8/150
35s - loss: 0.5002 - acc: 0.7975 - binary_crossentropy: 0.5002
Epoch 9/150
34s - loss: 0.4833 - acc: 0.8025 - binary_crossentropy: 0.4833
Epoch 10/150
28s - loss: 0.4559 - acc: 0.8225 - binary_crossentropy: 0.4559
Epoch 11/150
28s - loss: 0.4474 - acc: 0.7950 - binary_crossentropy: 0.4474
Epoch 12/150
28s - loss: 0.4205 - acc: 0.8150 - binary_crossentropy: 0.4205
Epoch 13/150
28s - loss: 0.3871 - acc: 0.8475 - binary_crossentropy: 0.3871
Epoch 14/150
28s - lo

44s - loss: 0.1344 - acc: 0.9575 - binary_crossentropy: 0.1344
Epoch 110/150
43s - loss: 0.1431 - acc: 0.9500 - binary_crossentropy: 0.1431
Epoch 111/150
45s - loss: 0.1338 - acc: 0.9575 - binary_crossentropy: 0.1338
Epoch 112/150
42s - loss: 0.1318 - acc: 0.9600 - binary_crossentropy: 0.1318
Epoch 113/150
37s - loss: 0.1304 - acc: 0.9550 - binary_crossentropy: 0.1304
Epoch 114/150
33s - loss: 0.1177 - acc: 0.9650 - binary_crossentropy: 0.1177
Epoch 115/150
35s - loss: 0.1256 - acc: 0.9600 - binary_crossentropy: 0.1256
Epoch 116/150
33s - loss: 0.1322 - acc: 0.9550 - binary_crossentropy: 0.1322
Epoch 117/150
33s - loss: 0.1313 - acc: 0.9500 - binary_crossentropy: 0.1313
Epoch 118/150
33s - loss: 0.1352 - acc: 0.9600 - binary_crossentropy: 0.1352
Epoch 119/150
35s - loss: 0.1451 - acc: 0.9450 - binary_crossentropy: 0.1451
Epoch 120/150
32s - loss: 0.1249 - acc: 0.9600 - binary_crossentropy: 0.1249
Epoch 121/150
33s - loss: 0.1279 - acc: 0.9675 - binary_crossentropy: 0.1279
Epoch 122/150

In [9]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 188, 198, 50)      500       
_________________________________________________________________
activation_1 (Activation)    (None, 188, 198, 50)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 94, 99, 50)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 94, 99, 50)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 92, 97, 32)        14432     
_________________________________________________________________
activation_2 (Activation)    (None, 92, 97, 32)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 46, 48, 32)        0         
__________

In [10]:
if not testing:
    scores = model.predict_proba(X_test, batch_size=batch_size)
    flatten = lambda l: [item for sublist in l for item in sublist]
    predictions = pd.DataFrame({'id': df_test.index,
                            'estado': flatten(scores.tolist())
                                    })
    predictions[['id','estado']].to_csv('Results{i}.csv'.format(i=i), index=False)

