# Dogs vs Cats Classification

<b>Problem Statement</b>: The train folder contains 25,000 images of dogs and cats. Each image in this folder has the label as part of the filename. The test folder contains 12,500 images, named according to a numeric id. For each image in the test set, you should predict a probability that the image is a dog (1 = dog, 0 = cat).

In [None]:
import numpy as np
import pandas as pd
import cv2
from os import listdir
from sklearn.cross_validation import train_test_split
from keras.models import Sequential
from keras.layers import Dense

In [None]:
np.random.seed(1)
DIR_DATASET_ROOT = "dataset/"

### Load Train dataset

In [None]:
files = listdir(DIR_DATASET_ROOT+"train/")
files = np.random.choice(files,50)   # choose any random N images

data = []
for file in files:
    img = cv2.cvtColor(cv2.imread(DIR_DATASET_ROOT+"train/"+file),cv2.COLOR_BGR2GRAY)
    if "dog" in file:
        data.append((file.split('.')[1],img,0))
    else:
        data.append((file.split('.')[1],img,1))

df = pd.DataFrame(data,columns=["ID","img","label"]).set_index(["ID"])

### Get Features

In [None]:
%%time
sift = cv2.xfeatures2d.SIFT_create()

def get_descriptors(img):
    return (sift.detectAndCompute(img,None))[1]

df["descriptors"] = df["img"].map(get_descriptors)

### Label Features

In [None]:
features = []
for index,row in df.iterrows():
    for desc in row.descriptors:
        features.append((desc,row.label,index))

features = pd.DataFrame(features,columns=["descriptor","label","fk_ID"])

### Train-Test Split

In [None]:
X = []
y = []
for i,row in features.iterrows():
    X.append(np.array(row.descriptor))
    y.append(np.array(row.label))

X = np.array(X)
y = np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=.2)

### Train-Test Model

#### Sequential Neural Network with Dense layer

##### Train Model

In [None]:
nn = Sequential()
nn.add(Dense(128,input_dim=128,init='normal',activation="tanh"))
nn.add(Dense(1,init='normal',activation="sigmoid"))
nn.compile(loss="binary_crossentropy",optimizer='sgd', metrics=['accuracy'])
nn.summary()
nn.fit(X_train,y_train,verbose=1,nb_epoch=10,batch_size=32)
nn.evaluate(X_test,y_test)

##### Predict 