In [1]:
import pandas as pd
from load_data import *
from matplotlib import pyplot as plt
%matplotlib inline
import tabnet
import tensorflow as tf

import matplotlib.pyplot as plt
import os
import seaborn as sns 
%matplotlib inline
from sklearn.model_selection import KFold

In [5]:
def transform(ds):
    features = tf.unstack(ds["features"])
    prices = ds["price"]

    x = dict(zip(col_names, features))
    y = prices
    return x, y

def R_squared(y, y_pred):
  residual = tf.reduce_sum(tf.square(tf.subtract(y, y_pred)))
  total = tf.reduce_sum(tf.square(tf.subtract(y, tf.reduce_mean(y))))
  r2 = tf.subtract(1.0, tf.math.divide(residual, total))
  
  return r2

In [7]:
X_train, X_test, X_val, y_train, y_test, y_val = load_data(for_dendro = False)

bin_col = [col for col in X_train if np.isin(X_train[col].unique(), [0, 1]).all()]
num_col = [col for col in X_train if ~np.isin(X_train[col].unique(), [0, 1]).all()]
col_names = bin_col + num_col

train_size = int(X_train.shape[0] * 0.9)
batch_size = int(X_train.shape[0] * 0.1)

data_train = tf.data.Dataset.from_tensor_slices({"features": X_train, "price": y_train})
data_train = data_train.shuffle(6000, seed = 13)
train_dataset = data_train.take(len(X_train))
train_dataset = train_dataset.map(transform)
train_dataset = train_dataset.batch(batch_size)

data_test = tf.data.Dataset.from_tensor_slices({"features": X_val, "price": y_val})
test_dataset = data_test.take(len(X_val))
test_dataset = test_dataset.map(transform)
test_dataset = test_dataset.batch(batch_size)


feature_columns = []

for col in col_names:
    feature_columns.append(tf.feature_column.numeric_column(col))


od = 110
fd = 114
nds = 2
rf = 2.5
epo = 1

lr = tf.keras.optimizers.schedules.ExponentialDecay(0.01, decay_steps=100, decay_rate=0.95, staircase=False)
#lr = 0.01
optimizer = tf.keras.optimizers.Adam(lr)

model = tabnet.TabNetRegression(feature_columns, num_regressors=1,
                                output_dim=od, feature_dim=fd, num_groups=1,
                                num_decision_steps=nds, relaxation_factor=rf)

model.compile(optimizer, loss=['mse', "mae"] , metrics=[R_squared, "mse", "mae"])

hist_model = model.fit(train_dataset, epochs=epo, 
                        validation_data=test_dataset, verbose=1)

model.load_weights("TabNet_GS/best_model5.hdf5")

------------------------------
Fit and Transform data...
------------------------------
58 amenities have been removed due to close zero-variance.
Text, OpenStreet and image data loaded.
44 binary variables have been removed due to close zero-variance.
Imputation done. No NaN's are left in the data.
PCA's built and correlated features dropped.
Due to insignificant t-tests we drop:
['host_is_superhost', 'Extra pillows and blankets', 'Luggage dropoff allowed', 'Free_parking', 'host_name_sounds_west', 'host_name_sounds_rare', 'neighbourhood_cleansed_Dn Laoghaire-Rathdown']
------------------------------
Transform data...
------------------------------
58 amenities have been removed due to close zero-variance.
Text, OpenStreet and image data loaded.
44 binary variables have been removed due to close zero-variance.
Imputation done. No NaN's are left in the data.
PCA's built and correlated features dropped.
Due to insignificant t-tests we drop:
['host_is_superhost', 'Extra pillows and blanke

In [9]:
x, y = next(iter(train_dataset))
_ = model(x)

writer = tf.summary.create_file_writer("TabNet_GS_logs")
with writer.as_default():
    for i, mask in enumerate(model.tabnet.feature_selection_masks):
        print("Saving mask {} of shape {}".format(i + 1, mask.shape))
        tf.summary.image('mask_at_iter_{}'.format(i + 1), step=0, data=mask, max_outputs=1)
        writer.flush()

    agg_mask = model.tabnet.aggregate_feature_selection_mask
    print("Saving aggregate mask of shape", agg_mask.shape)
    tf.summary.image("Aggregate Mask", step=0, data=agg_mask, max_outputs=1)
    writer.flush()

writer.close()

Saving mask 1 of shape (1, 477, 66, 1)
Saving aggregate mask of shape (1, 477, 66, 1)
