*AMPARO ALÍAS CUESTA*

*--------------------------------------------------------------------------------------------------------------------*

En este notebook se construye y compila el modelo mlp que procesará la data numérica y categórica. 
<br> No hace falta ejecutarlo ya que al final se guarda el modelo y desde el notebook principal se carga directamente, ya entrenado.
<br>*--------------------------------------------------------------------------------------------------------------------*

### Required imports

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping

pd.set_option('display.max_columns', None)
import warnings
warnings.filterwarnings("ignore")

In [2]:
import os 
path = '../'
print(path)
import sys 
sys.path.insert (0, path) 

../


In [3]:
import utils 
from utils import get_data_ready

### Load and split data into train/test

In [4]:
data = pd.read_csv('../airbnb-listings.csv', sep = ';')
loaded_images = np.load('../images_branch/images.npy')
was_loaded = np.load('../images_branch/was_loaded.npy')

data = data[was_loaded == 1]
loaded_images = loaded_images[was_loaded == 1]

data.reset_index(drop=True, inplace = True)

In [12]:
(trainNum, testNum, trainImages, testImages) = train_test_split(data, loaded_images, test_size=0.2, random_state=2)


### Process NUMERICAL data
(According to **numerical_data_processing** notebook -- decisions done based only on training data)

In [13]:
X_train, y_train, X_test, y_test = get_data_ready(trainNum, testNum)


### Save the data to avoid conflict in the main file

In [14]:
np.save('../data/numerical/X_train.npy', X_train)
np.save('../data/numerical/X_test.npy', X_test)

y_train.to_csv('../data/numerical/y_train.csv', sep=';', decimal='.', index=True)
y_test.to_csv('../data/numerical/y_test.csv', sep=';', decimal='.', index=True)

In [15]:
y_train = y_train.values
y_test = y_test.values

In [16]:
maxPrice = y_train.max()
y_train = y_train / maxPrice
y_test = y_test / maxPrice

### MLP MODEL

In [17]:
# import the necessary packages
from tensorflow.keras.models import Sequential, Model, save_model
from tensorflow.keras.layers import BatchNormalization, Conv2D, MaxPooling2D, Activation, Dropout, Dense, Flatten, Input
from tensorflow.keras.optimizers import Adam


def create_mlp(dim):   

    model = Sequential()

    model.add(Dense(64, input_dim=dim))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.5))

    model.add(Dense(32))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.4))
    
    model.add(Dense(1))
    model.add(Activation('linear'))
    
    return model


In [20]:
# create our MLP and then compile the model using mean absolute percentage error as our loss, implying that we seek to minimize
# the absolute percentage difference between our price *predictions* and the *actual prices*

model = create_mlp(X_train.shape[1])
opt = Adam(lr=0.001, decay=1e-3/ 200)

'''model.compile(
    optimizer=opt, 
    loss='categorical_crossentropy', 
    metrics=['accuracy']
)'''

model.compile(loss="mean_absolute_error", optimizer=opt)

# train the model
print("[INFO] training model...")

# simple early stopping
es = EarlyStopping(monitor='val_loss', verbose=1)

model.fit(
    X_train, 
    y_train, 
    epochs=100, 
    validation_data=(X_test, y_test), 
    batch_size=32,
    callbacks=[es]
)


[INFO] training model...
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 00014: early stopping


<tensorflow.python.keras.callbacks.History at 0x7fccad887390>

In [21]:
model.save('MLP_model_reg.h5')  # creates a HDF5 file 'my_model.h5'
#del model  # deletes the existing model

In [22]:
'''Preds = model.evaluate(x = X_test, y = y_test)
print()
print ("Loss = " + str(preds[0]))
print ("Test Accuracy = " + str(preds[1]))'''

'Preds = model.evaluate(x = X_test, y = y_test)\nprint()\nprint ("Loss = " + str(preds[0]))\nprint ("Test Accuracy = " + str(preds[1]))'

In [23]:
preds = model.evaluate(x = X_test, y = y_test)
print()
print("Loss = " + str(preds))


Loss = 0.07971682399511337


In [24]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 64)                1792      
_________________________________________________________________
batch_normalization_6 (Batch (None, 64)                256       
_________________________________________________________________
activation_9 (Activation)    (None, 64)                0         
_________________________________________________________________
dropout_6 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_10 (Dense)             (None, 32)                2080      
_________________________________________________________________
batch_normalization_7 (Batch (None, 32)                128       
_________________________________________________________________
activation_10 (Activation)   (None, 32)               