# Keras Functional API for Multi-Input Neural Networks
This notebook will demonstrate how to load numerical, categorical, and image data, preprocessit, and use it for a multi-input model for Keras.

Examples of Data Types:
    * Numeric/Continuous Values: age, heart rate, blood pressure
    * Categorical Values: gender, ethnicity
    * Image Data: MRI, X-Ray

In [29]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import LabelBinarizer, MinMaxScaler
from sklearn.model_selection import train_test_split

import glob
import cv2
import os
import argparse
import locale

from keras.models import Sequential, Model
from keras.layers.normalization import BatchNormalization
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.layers.core import Activation, Dropout, Dense
from keras.optimizers import Adam
from keras.layers import concatenate
from keras.layers import Flatten, Input

In [8]:
# Set the path to the directory of the dataset
Data_Directory = '/Users/edenmolina/Documents/AI-Playground/Houses Dataset'

In [13]:
# This function will load the data and return only zipcodes with <25 houses
def load_house_attributes(inputPath, cols):
    """
    inputPath: string path to the text file containing the house data
    cols: array of column names
    """
    # Set column names and load the text data
    cols = cols
    df = pd.read_csv(inputPath, sep=" ", header=None, names=cols)

    # determine (1) the unique zip codes and (2) the number of data
    # points with each zip code
    zipcodes = df["zipcode"].value_counts().keys().tolist()
    counts = df["zipcode"].value_counts().tolist()

    # loop over each of the unique zip codes and their corresponding
    # count
    for (zipcode, count) in zip(zipcodes, counts):
        # the zip code counts for our housing dataset is *extremely*
        # unbalanced (some only having 1 or 2 houses per zip code)
        # so let's sanitize our data by removing any houses with less
        # than 25 houses per zip code
        if count < 25:
            idxs = df[df["zipcode"] == zipcode].index
            df.drop(idxs, inplace=True)

    # return dataframe with houses with less than 25 houses per zipcode
    return df

In [116]:
# This function will process the normalize the data and one hot encode the zipcodes and create training and testing data
def process_house_attributes(df, train, test, continuous_columns):
    # Initiates the column names of the continuous data
    continuous = continuous_columns
    print ('Continuous COl', continuous)
    # Perform min-max scaling so each feature value is [0, 1]
    cs = MinMaxScaler()
    trainContinuous = cs.fit_transform(train[continuous])
    testContinuous = cs.transform(test[continuous])
    
    # One-hot encode the zip code categorical data
    zipBinarizer = LabelBinarizer().fit(df['zipcode'])
    trainCategorical = zipBinarizer.transform(train['zipcode'])
    testCategorical = zipBinarizer.transform(test['zipcode'])
    
    # Stack the data together
    trainX = np.hstack([trainCategorical, trainContinuous])
    testX = np.hstack([testCategorical, trainContinuous])
    
    return (trainX, testX)

In [104]:
# This function will load the images, generate a montage image from the four images
# images will be arranged from bathroom, bedroom, kichen, frontal
# append all home montage from four photos
def load_house_images(df, inputPath):
    """
    df: pandas dataframe of house data
    inputPath: path to the dataset
    """
    # initialize our images
    images = []
    
    # Loop over the indecies of the houses
    for i in df.index.values:
        # find four images for the house and sort by file path
        # ensure that the four images are ALWAYS in the same order
        basePath = os.path.sep.join([inputPath, "{}_*".format(i + 1)])

        housePaths = sorted(list(glob.glob(basePath)))

        # Initialize list of input images along with the output image
        # Images are in color, there fore there are 3 channels
        inputImages = []
        outputImage = np.zeros((64, 64, 3), dtype = 'uint8')

        # Loop over the input house paths
        for housePath in housePaths:
            image = cv2.imread(housePath)

            # resize image
            image = cv2.resize(image, (32, 32))
            inputImages.append(image)

            # tile the four images in the output image
            # first image goes in top-right
            # second image goes in top left
            # third image goes in bottom right
            # fourth goes in bottom left

        outputImage[0:32, 0:32] = inputImages[0]
        outputImage[0:32, 32:64] = inputImages[1]
        outputImage[32:64, 32:64] = inputImages[2]
        outputImage[32:64, 0:32] = inputImages[3]

        # add tiled image to the set of images the network will be trained on
        images.append(outputImage)

    return np.array(images)

# Construct Multi-Layer Perceptron (MLP) and Convolutional Neural Network (Covnet)

In [22]:
def create_mlp(dim, regress = False):
    """
    Create layers of Multi-Layer Perceptron
    can return tensor dim 8 or can regress to output scalar
    """
    # Define MLP as 2 dense layers
    model = Sequential()
    model.add(Dense(8, input_dim = dim, activation = 'relu'))
    model.add(Dense(4, activation = 'relu'))
    
    # Check if regression node should be added
    if regress:
        model.add(Dense(1, activation='linear'))
        
    # Return the Model
    return model

In [23]:
def create_cnn(width, height, depth, filters = (16, 32, 64), regress = False):
    """
    Handles the image data
    width: int width of the input image in pixels
    height: int height of the input image in pixels
    depth: int number of channels in image (for RGB it is 3)
    filters: tuple of progressively larger filters to the network can learn to discriminate features
    regress: bool inidicating whether or not a fully connected linear activation layer will be appended to the CNN for regression
    """
    # Initialize input shape and channel dimension
    # Assumes a 'channels last' ordering for the TensorFlow backend
    inputShape = (height, width, depth)
    chanDim = -1
    
    # Model Input
    inputs = Input(shape = inputShape)
    
    # Loop over the filters
    for (i, f) in enumerate(filters):
        # If this is the first convolutional layer then set the input accordingly
        if i == 0:
            x = inputs
        
        # Conv > Relu > BN > Pool
        x = Conv2D(f, (3, 3), padding = 'same')(x)
        x = Activation('relu')(x)
        x = BatchNormalization(axis = chanDim)(x)
        x = MaxPooling2D(pool_size = (2, 2))(x)
        
        # Flatten the volume
        # FC > Relu > BN > Dropout
        x = Flatten()(x)
        x = Dense(16)(x)
        x = Activation('relu')(x)
        x = BatchNormalization(axis = chanDim)(x)
        x = Dropout(.5)(x)
        
        # Apply another FC layer to match the nodes outputted by the MLP
        x = Dense(4)(x)
        x = Activation('relu')(x)
        
        # check to see if the regression node should be added
        if regress:
            x = Dense(1, activation="linear")(x)
            
        # construct the CNN
        model = Model(inputs, x)
        
        # Return CNN
        return model

--------

In [118]:
# Firectory of the text file and columns of the data
inputPath = '%s/HousesInfo.txt'%Data_Directory
cols = ['bedrooms', 'bathrooms', 'area', 'zipcode', 'price']
continuous_cols = ['bedrooms', 'bathrooms', 'area']

In [62]:
df = load_house_attributes(inputPath, cols)
df.head()

Unnamed: 0,bedrooms,bathrooms,area,zipcode,price
30,5,3.0,2520,93446,789000.0
32,3,2.0,1802,93446,365000.0
39,3,3.0,2146,93446,455000.0
80,4,2.5,2464,91901,599000.0
81,2,2.0,1845,91901,529800.0


In [105]:
images = load_house_images(df, Data_Directory)

In [107]:
# Normalize the images
images = images / 255.0

In [110]:
print("[INFO] processing data...")
# Split into train and test data
(trainAttrX, testAttrX, trainImagesX, testImagesX) = train_test_split(df, images, test_size=0.25, random_state=42)
print ("Done!")

[INFO] processing data...
Done!


In [111]:
# find the largest house price in the training set and use it to
# scale our house prices to the range [0, 1] (will lead to better
# training and convergence)
maxPrice = trainAttrX["price"].max()
trainY = trainAttrX["price"] / maxPrice
testY = testAttrX["price"] / maxPrice

In [117]:
(trainAttrX, testAttrX) = process_house_attributes(df, trainAttrX, testAttrX, continuous_cols)

Continuous COl ['bedrooms', 'bathrooms', 'area']


  return self.partial_fit(X, y)


ValueError: all the input array dimensions except for the concatenation axis must match exactly