In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import itertools
import matplotlib.pyplot as plt
import string
import re
import collections
from sklearn import preprocessing

%matplotlib inline

In [2]:
# READ DATA 
train_df = pd.read_json('train.json.zip')
test_df = pd.read_json('test.json.zip')

## TRAIN DATA FEATURE ENGINEERING

In [3]:
# convert TARGET to the numeric
train_df['interest_level'] = train_df['interest_level'].apply(lambda x: 0 if x=='low' 
                                                      else 1 if x=='medium' 
                                                      else 2) 
# REMOVE UNNECESSARY WORDS FROM DESCRIPTION
train_df['description'] = train_df['description'].apply(lambda x: x.replace("<br />", ""))
train_df['description'] = train_df['description'].apply(lambda x: x.replace("br", ""))
train_df['description'] = train_df['description'].apply(lambda x: x.replace("<p><a", ""))

#basic features
train_df['rooms'] = train_df['bedrooms'] + train_df['bathrooms'] 

# count of photos #
train_df["num_photos"] = train_df["photos"].apply(len)

# count of "features" #
train_df["num_features"] = train_df["features"].apply(len)

# count of words present in description column #
train_df["num_description_words"] = train_df["description"].apply(lambda x: len(x.split(" ")))

# description contains email
regex = r'[\w\.-]+@[\w\.-]+'
train_df['has_email'] = train_df['description'].apply(lambda x: 1 if re.findall(regex, x) else 0)

# description contains phone
train_df['has_phone'] = train_df['description'].apply(lambda x:re.sub('['+string.punctuation+']', '', x).split())\
        .apply(lambda x: [s for s in x if s.isdigit()])\
        .apply(lambda x: len([s for s in x if len(str(s))==10]))\
        .apply(lambda x: 1 if x>0 else 0)

# CONVERT LOWER ALL OF WORDS
train_df[["features"]] = train_df[["features"]].apply(
    lambda _: [list(map(str.strip, map(str.lower, x))) for x in _])

## TEST DATA FEATURE ENGINEERING

In [4]:
# REMOVE UNNECESSARY WORDS FROM DESCRIPTION
test_df['description'] = test_df['description'].apply(lambda x: x.replace("<br />", ""))
test_df['description'] = test_df['description'].apply(lambda x: x.replace("br", ""))
test_df['description'] = test_df['description'].apply(lambda x: x.replace("<p><a", ""))

#basic features
test_df['rooms'] = test_df['bedrooms'] + test_df['bathrooms'] 

# count of photos #
test_df["num_photos"] = test_df["photos"].apply(len)

# count of "features" #
test_df["num_features"] = test_df["features"].apply(len)

# count of words present in description column #
test_df["num_description_words"] = test_df["description"].apply(lambda x: len(x.split(" ")))

# description contains email
regex = r'[\w\.-]+@[\w\.-]+'
test_df['has_email'] = test_df['description'].apply(lambda x: 1 if re.findall(regex, x) else 0)

# description contains phone
test_df['has_phone'] = test_df['description'].apply(lambda x:re.sub('['+string.punctuation+']', '', x).split())\
        .apply(lambda x: [s for s in x if s.isdigit()])\
        .apply(lambda x: len([s for s in x if len(str(s))==10]))\
        .apply(lambda x: 1 if x>0 else 0)

# CONVERT LOWER ALL OF WORDS
test_df[["features"]] = test_df[["features"]].apply(
    lambda _: [list(map(str.strip, map(str.lower, x))) for x in _])

## MOST FREQUENT FEATURES EXTRACTION

In [5]:
feature_value_train = train_df['features'].tolist()
feature_value_test = test_df['features'].tolist()

feature_value_train
feature_value_test

feature_lst_train = []
feature_lst_test = []

for i in range(len(feature_value_train)):
    feature_lst_train += feature_value_train[i]
    
for i in range(len(feature_value_test)):
    feature_lst_test += feature_value_test[i]

uniq_feature_train = list(set(feature_lst_train))
uniq_feature_test = list(set(feature_lst_test))


# see the frequency of each feature
def most_common(lst):
    features = collections.Counter(lst)
    feature_value = features.keys()
    frequency = features.values()
    data = [('feature_value', feature_value),
            ('frequency', frequency),]    
    df = pd.DataFrame.from_dict(dict(data))
    return df.sort_values(by = 'frequency', ascending = False)

df_features_train = most_common(feature_lst_train)
df_features_test = most_common(feature_lst_test)


def newColumn(name, df, series):
    feature = pd.Series(0,df.index,name = name)# data : 0
    for row,word in enumerate(series):
        if name in word:
            feature.iloc[row] = 1
    df[name] = feature # feature : series ; value in series : 1 or 0
    return df

# select features based on frequency
facilities = ['elevator', 'cats allowed', 'hardwood floors', 'dogs allowed', 'doorman', 'dishwasher', 'no fee', 'laundry in building', 'fitness center']
for name in facilities:
    train_df = newColumn(name, train_df, train_df['features'])
    test_df = newColumn(name, test_df, test_df['features'])

## LABEL ENCODING

In [6]:
categorical = ["display_address", "manager_id", "building_id", "street_address"]
for f in categorical:
        if train_df[f].dtype=='object':
            #print(f)
            lbl = preprocessing.LabelEncoder()
            lbl.fit(list(train_df[f].values) + list(test_df[f].values))
            train_df[f] = lbl.transform(list(train_df[f].values))
            test_df[f] = lbl.transform(list(test_df[f].values))

In [7]:
train_df['price'] = np.log10(train_df['price'])
test_df['price'] = np.log10(test_df['price'])

In [8]:
train_df['price']

4         3.380211
6         3.579784
9         3.543447
10        3.477121
15        3.446382
            ...   
124000    3.447158
124002    3.379306
124004    3.267172
124008    3.622732
124009    3.631444
Name: price, Length: 49352, dtype: float64

## DROP UNNECESSARY COLUMNS

In [9]:
# TRAINING DATASET
train_df.drop('created', axis=1, inplace=True)
train_df.drop('description', axis=1, inplace=True)
train_df.drop('features', axis=1, inplace=True)
train_df.drop('photos', axis=1, inplace=True)


# TEST DATASET
test_df.drop('created', axis=1, inplace=True)
test_df.drop('description', axis=1, inplace=True)
test_df.drop('features', axis=1, inplace=True)
test_df.drop('photos', axis=1, inplace=True)

# REGRESSION FOR PRICE

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import make_scorer, mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import xgboost as xgb
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
import optuna
import math
from keras import callbacks

X = train_df.drop(['price'], axis = 1)
y = train_df.price
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size = .3,
                                                    random_state = 5)

In [11]:
class Optimizer:
    def __init__(self, metric, trials=100):
        self.metric = metric
        self.trials = trials
        
    def objective(self, trial):
        model = create_model(trial)
        model.fit(X, y)
        preds = model.predict(X_test)
        return mean_absolute_error(y_test, preds)
            
    def optimize(self):
        study = optuna.create_study(direction="minimize")
        study.optimize(self.objective, n_trials=self.trials)
        return study

In [12]:
from keras.layers import Dense
from keras.models import Sequential

def create_model(trial):
    model = Sequential()
    model.add(Dense
                 (
                    units = trial.suggest_int("units", 32, 256),
                    kernel_initializer=trial.suggest_categorical("kernel_initializer", ["glorot_uniform", "normal"]),
                    input_dim = X.shape[1], 
                    activation= trial.suggest_categorical("activation", ["relu", "silu", "leaky_relu"])
                 )
             )
    model.add(Dense(1))
    model.compile(
        loss="mean_absolute_error"
    )
    return model

optimizer = Optimizer('mae')
keras_study = optimizer.optimize()

print("Number of finished trials: ", len(keras_study.trials))
print("Best trial:")
keras_trial = keras_study.best_trial

print("  Value: {}".format(keras_trial.value))
print("  Params: ")
for key, value in keras_trial.params.items():
    print("    {}: {}".format(key, value))


[32m[I 2021-09-28 17:53:36,049][0m A new study created in memory with name: no-name-a331b82c-4720-4590-9c92-fed25c0a8156[0m




[32m[I 2021-09-28 17:53:39,674][0m Trial 0 finished with value: 16515.011623708084 and parameters: {'units': 177, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 0 with value: 16515.011623708084.[0m




[32m[I 2021-09-28 17:53:41,675][0m Trial 1 finished with value: 90180.31850626413 and parameters: {'units': 203, 'kernel_initializer': 'glorot_uniform', 'activation': 'relu'}. Best is trial 0 with value: 16515.011623708084.[0m




[32m[I 2021-09-28 17:53:43,949][0m Trial 2 finished with value: 80783.38150766604 and parameters: {'units': 243, 'kernel_initializer': 'glorot_uniform', 'activation': 'silu'}. Best is trial 0 with value: 16515.011623708084.[0m




[32m[I 2021-09-28 17:53:45,938][0m Trial 3 finished with value: 9333.99197716002 and parameters: {'units': 143, 'kernel_initializer': 'glorot_uniform', 'activation': 'leaky_relu'}. Best is trial 3 with value: 9333.99197716002.[0m




[32m[I 2021-09-28 17:53:47,941][0m Trial 4 finished with value: 75658.22424878449 and parameters: {'units': 216, 'kernel_initializer': 'glorot_uniform', 'activation': 'leaky_relu'}. Best is trial 3 with value: 9333.99197716002.[0m




[32m[I 2021-09-28 17:53:49,956][0m Trial 5 finished with value: 7424.058743536404 and parameters: {'units': 214, 'kernel_initializer': 'glorot_uniform', 'activation': 'leaky_relu'}. Best is trial 5 with value: 7424.058743536404.[0m




[32m[I 2021-09-28 17:53:52,075][0m Trial 6 finished with value: 9722.378897348204 and parameters: {'units': 145, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 5 with value: 7424.058743536404.[0m




[32m[I 2021-09-28 17:53:53,991][0m Trial 7 finished with value: 10187.92243860196 and parameters: {'units': 83, 'kernel_initializer': 'glorot_uniform', 'activation': 'relu'}. Best is trial 5 with value: 7424.058743536404.[0m




[32m[I 2021-09-28 17:53:55,911][0m Trial 8 finished with value: 40956.81029221663 and parameters: {'units': 68, 'kernel_initializer': 'glorot_uniform', 'activation': 'leaky_relu'}. Best is trial 5 with value: 7424.058743536404.[0m




[32m[I 2021-09-28 17:53:57,861][0m Trial 9 finished with value: 5143.597371840069 and parameters: {'units': 94, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 5143.597371840069.[0m




[32m[I 2021-09-28 17:53:59,795][0m Trial 10 finished with value: 7741.619826115889 and parameters: {'units': 37, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 5143.597371840069.[0m




[32m[I 2021-09-28 17:54:01,743][0m Trial 11 finished with value: 5721.640498166707 and parameters: {'units': 109, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 5143.597371840069.[0m




[32m[I 2021-09-28 17:54:04,035][0m Trial 12 finished with value: 54769.80959808292 and parameters: {'units': 109, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 5143.597371840069.[0m




[32m[I 2021-09-28 17:54:05,980][0m Trial 13 finished with value: 17666.23294827709 and parameters: {'units': 114, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 5143.597371840069.[0m




[32m[I 2021-09-28 17:54:07,933][0m Trial 14 finished with value: 18301.660984604212 and parameters: {'units': 70, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 9 with value: 5143.597371840069.[0m




[32m[I 2021-09-28 17:54:09,873][0m Trial 15 finished with value: 26945.909688945576 and parameters: {'units': 33, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 5143.597371840069.[0m




[32m[I 2021-09-28 17:54:11,832][0m Trial 16 finished with value: 40552.65903491385 and parameters: {'units': 114, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 5143.597371840069.[0m




[32m[I 2021-09-28 17:54:13,855][0m Trial 17 finished with value: 10569.08211923935 and parameters: {'units': 149, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 5143.597371840069.[0m




[32m[I 2021-09-28 17:54:15,812][0m Trial 18 finished with value: 11579.790785106967 and parameters: {'units': 92, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 9 with value: 5143.597371840069.[0m




[32m[I 2021-09-28 17:54:17,833][0m Trial 19 finished with value: 19686.915943272368 and parameters: {'units': 53, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 9 with value: 5143.597371840069.[0m




[32m[I 2021-09-28 17:54:19,835][0m Trial 20 finished with value: 47414.51819475656 and parameters: {'units': 168, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 5143.597371840069.[0m




[32m[I 2021-09-28 17:54:21,871][0m Trial 21 finished with value: 93590.47520912152 and parameters: {'units': 252, 'kernel_initializer': 'glorot_uniform', 'activation': 'leaky_relu'}. Best is trial 9 with value: 5143.597371840069.[0m




[32m[I 2021-09-28 17:54:23,842][0m Trial 22 finished with value: 105114.55650692731 and parameters: {'units': 127, 'kernel_initializer': 'glorot_uniform', 'activation': 'leaky_relu'}. Best is trial 9 with value: 5143.597371840069.[0m




[32m[I 2021-09-28 17:54:25,853][0m Trial 23 finished with value: 9696.88126831215 and parameters: {'units': 186, 'kernel_initializer': 'glorot_uniform', 'activation': 'leaky_relu'}. Best is trial 9 with value: 5143.597371840069.[0m




[32m[I 2021-09-28 17:54:27,803][0m Trial 24 finished with value: 19785.04346982452 and parameters: {'units': 92, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 5143.597371840069.[0m




[32m[I 2021-09-28 17:54:29,824][0m Trial 25 finished with value: 41307.75690472844 and parameters: {'units': 224, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 5143.597371840069.[0m




[32m[I 2021-09-28 17:54:31,799][0m Trial 26 finished with value: 6426.2445702727155 and parameters: {'units': 131, 'kernel_initializer': 'glorot_uniform', 'activation': 'leaky_relu'}. Best is trial 9 with value: 5143.597371840069.[0m




[32m[I 2021-09-28 17:54:33,784][0m Trial 27 finished with value: 43416.01004904071 and parameters: {'units': 127, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 5143.597371840069.[0m




[32m[I 2021-09-28 17:54:35,717][0m Trial 28 finished with value: 61678.71735229826 and parameters: {'units': 101, 'kernel_initializer': 'glorot_uniform', 'activation': 'relu'}. Best is trial 9 with value: 5143.597371840069.[0m




[32m[I 2021-09-28 17:54:37,867][0m Trial 29 finished with value: 14453.937165137655 and parameters: {'units': 162, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 9 with value: 5143.597371840069.[0m




[32m[I 2021-09-28 17:54:40,029][0m Trial 30 finished with value: 4632.522870892131 and parameters: {'units': 135, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 30 with value: 4632.522870892131.[0m




[32m[I 2021-09-28 17:54:42,542][0m Trial 31 finished with value: 10980.277434570655 and parameters: {'units': 130, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 30 with value: 4632.522870892131.[0m




[32m[I 2021-09-28 17:54:44,574][0m Trial 32 finished with value: 2928.144715932625 and parameters: {'units': 75, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 32 with value: 2928.144715932625.[0m




[32m[I 2021-09-28 17:54:46,608][0m Trial 33 finished with value: 6419.799125528503 and parameters: {'units': 79, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 32 with value: 2928.144715932625.[0m




[32m[I 2021-09-28 17:54:48,616][0m Trial 34 finished with value: 41298.14299345514 and parameters: {'units': 59, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 32 with value: 2928.144715932625.[0m




[32m[I 2021-09-28 17:54:50,682][0m Trial 35 finished with value: 26940.275950269523 and parameters: {'units': 100, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 32 with value: 2928.144715932625.[0m




[32m[I 2021-09-28 17:54:52,916][0m Trial 36 finished with value: 32329.118003190426 and parameters: {'units': 189, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 32 with value: 2928.144715932625.[0m




[32m[I 2021-09-28 17:54:54,906][0m Trial 37 finished with value: 2051.6157451483236 and parameters: {'units': 52, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 37 with value: 2051.6157451483236.[0m




[32m[I 2021-09-28 17:54:56,873][0m Trial 38 finished with value: 5329.171856996109 and parameters: {'units': 46, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 37 with value: 2051.6157451483236.[0m




[32m[I 2021-09-28 17:54:58,905][0m Trial 39 finished with value: 14832.836937347824 and parameters: {'units': 83, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 37 with value: 2051.6157451483236.[0m




[32m[I 2021-09-28 17:55:00,956][0m Trial 40 finished with value: 11058.084726264162 and parameters: {'units': 66, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 37 with value: 2051.6157451483236.[0m




[32m[I 2021-09-28 17:55:03,018][0m Trial 41 finished with value: 5299.957124428625 and parameters: {'units': 45, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 37 with value: 2051.6157451483236.[0m




[32m[I 2021-09-28 17:55:05,066][0m Trial 42 finished with value: 22548.955066453003 and parameters: {'units': 45, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 37 with value: 2051.6157451483236.[0m




[32m[I 2021-09-28 17:55:07,110][0m Trial 43 finished with value: 16829.103555201757 and parameters: {'units': 59, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 37 with value: 2051.6157451483236.[0m




[32m[I 2021-09-28 17:55:09,142][0m Trial 44 finished with value: 26058.72848482055 and parameters: {'units': 77, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 37 with value: 2051.6157451483236.[0m




[32m[I 2021-09-28 17:55:11,109][0m Trial 45 finished with value: 4744.706757013969 and parameters: {'units': 45, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 37 with value: 2051.6157451483236.[0m




[32m[I 2021-09-28 17:55:13,124][0m Trial 46 finished with value: 3023.8700002230544 and parameters: {'units': 70, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 37 with value: 2051.6157451483236.[0m




[32m[I 2021-09-28 17:55:15,105][0m Trial 47 finished with value: 1082.722668145338 and parameters: {'units': 37, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 47 with value: 1082.722668145338.[0m




[32m[I 2021-09-28 17:55:17,054][0m Trial 48 finished with value: 2771.9410263718573 and parameters: {'units': 36, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 47 with value: 1082.722668145338.[0m




[32m[I 2021-09-28 17:55:19,426][0m Trial 49 finished with value: 19559.083900413898 and parameters: {'units': 38, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 47 with value: 1082.722668145338.[0m




[32m[I 2021-09-28 17:55:21,439][0m Trial 50 finished with value: 3539.899575330315 and parameters: {'units': 57, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 47 with value: 1082.722668145338.[0m




[32m[I 2021-09-28 17:55:23,383][0m Trial 51 finished with value: 16250.40572353195 and parameters: {'units': 32, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 47 with value: 1082.722668145338.[0m




[32m[I 2021-09-28 17:55:25,354][0m Trial 52 finished with value: 20497.44996769659 and parameters: {'units': 57, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 47 with value: 1082.722668145338.[0m




[32m[I 2021-09-28 17:55:27,375][0m Trial 53 finished with value: 15371.60851471764 and parameters: {'units': 72, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 47 with value: 1082.722668145338.[0m




[32m[I 2021-09-28 17:55:29,379][0m Trial 54 finished with value: 5020.3144121020405 and parameters: {'units': 52, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 47 with value: 1082.722668145338.[0m




[32m[I 2021-09-28 17:55:31,300][0m Trial 55 finished with value: 22809.7591546609 and parameters: {'units': 65, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 47 with value: 1082.722668145338.[0m




[32m[I 2021-09-28 17:55:33,262][0m Trial 56 finished with value: 13319.44896950012 and parameters: {'units': 40, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 47 with value: 1082.722668145338.[0m




[32m[I 2021-09-28 17:55:35,253][0m Trial 57 finished with value: 7522.018213229435 and parameters: {'units': 51, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 47 with value: 1082.722668145338.[0m




[32m[I 2021-09-28 17:55:37,288][0m Trial 58 finished with value: 5665.0814336141175 and parameters: {'units': 63, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 47 with value: 1082.722668145338.[0m




[32m[I 2021-09-28 17:55:39,323][0m Trial 59 finished with value: 61021.58478819799 and parameters: {'units': 83, 'kernel_initializer': 'glorot_uniform', 'activation': 'silu'}. Best is trial 47 with value: 1082.722668145338.[0m




[32m[I 2021-09-28 17:55:41,238][0m Trial 60 finished with value: 4092.831026987899 and parameters: {'units': 32, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 47 with value: 1082.722668145338.[0m




[32m[I 2021-09-28 17:55:43,138][0m Trial 61 finished with value: 14209.514560250278 and parameters: {'units': 32, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 47 with value: 1082.722668145338.[0m




[32m[I 2021-09-28 17:55:45,041][0m Trial 62 finished with value: 5957.685105244416 and parameters: {'units': 73, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 47 with value: 1082.722668145338.[0m




[32m[I 2021-09-28 17:55:46,964][0m Trial 63 finished with value: 1044.2942941830224 and parameters: {'units': 53, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 63 with value: 1044.2942941830224.[0m




[32m[I 2021-09-28 17:55:48,886][0m Trial 64 finished with value: 1568.733906626859 and parameters: {'units': 56, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 63 with value: 1044.2942941830224.[0m




[32m[I 2021-09-28 17:55:50,803][0m Trial 65 finished with value: 32413.610445812927 and parameters: {'units': 51, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 63 with value: 1044.2942941830224.[0m




[32m[I 2021-09-28 17:55:52,752][0m Trial 66 finished with value: 25610.937852538875 and parameters: {'units': 90, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 63 with value: 1044.2942941830224.[0m




[32m[I 2021-09-28 17:55:54,639][0m Trial 67 finished with value: 3410.3496567841744 and parameters: {'units': 41, 'kernel_initializer': 'glorot_uniform', 'activation': 'relu'}. Best is trial 63 with value: 1044.2942941830224.[0m




[32m[I 2021-09-28 17:55:56,567][0m Trial 68 finished with value: 4635.826816703268 and parameters: {'units': 67, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 63 with value: 1044.2942941830224.[0m




[32m[I 2021-09-28 17:55:58,499][0m Trial 69 finished with value: 15543.562669573568 and parameters: {'units': 52, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 63 with value: 1044.2942941830224.[0m




[32m[I 2021-09-28 17:56:00,436][0m Trial 70 finished with value: 1888.8348079191933 and parameters: {'units': 75, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 63 with value: 1044.2942941830224.[0m




[32m[I 2021-09-28 17:56:02,820][0m Trial 71 finished with value: 3448.349923564614 and parameters: {'units': 63, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 63 with value: 1044.2942941830224.[0m




[32m[I 2021-09-28 17:56:04,717][0m Trial 72 finished with value: 33658.23314232321 and parameters: {'units': 78, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 63 with value: 1044.2942941830224.[0m




[32m[I 2021-09-28 17:56:06,636][0m Trial 73 finished with value: 5696.682805361001 and parameters: {'units': 87, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 63 with value: 1044.2942941830224.[0m




[32m[I 2021-09-28 17:56:08,591][0m Trial 74 finished with value: 39163.27128353772 and parameters: {'units': 101, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 63 with value: 1044.2942941830224.[0m




[32m[I 2021-09-28 17:56:10,512][0m Trial 75 finished with value: 24233.888692781005 and parameters: {'units': 71, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 63 with value: 1044.2942941830224.[0m




[32m[I 2021-09-28 17:56:12,427][0m Trial 76 finished with value: 844.9858726442183 and parameters: {'units': 42, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 76 with value: 844.9858726442183.[0m




[32m[I 2021-09-28 17:56:14,340][0m Trial 77 finished with value: 1541.7290582079538 and parameters: {'units': 41, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 76 with value: 844.9858726442183.[0m




[32m[I 2021-09-28 17:56:16,277][0m Trial 78 finished with value: 8483.10794473189 and parameters: {'units': 44, 'kernel_initializer': 'glorot_uniform', 'activation': 'relu'}. Best is trial 76 with value: 844.9858726442183.[0m




[32m[I 2021-09-28 17:56:18,271][0m Trial 79 finished with value: 1540.1521487327038 and parameters: {'units': 36, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 76 with value: 844.9858726442183.[0m




[32m[I 2021-09-28 17:56:20,212][0m Trial 80 finished with value: 16019.953358552064 and parameters: {'units': 49, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 76 with value: 844.9858726442183.[0m




[32m[I 2021-09-28 17:56:22,122][0m Trial 81 finished with value: 9527.928814822539 and parameters: {'units': 39, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 76 with value: 844.9858726442183.[0m




[32m[I 2021-09-28 17:56:24,035][0m Trial 82 finished with value: 35620.44610788046 and parameters: {'units': 58, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 76 with value: 844.9858726442183.[0m




[32m[I 2021-09-28 17:56:25,934][0m Trial 83 finished with value: 21660.368629915865 and parameters: {'units': 37, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 76 with value: 844.9858726442183.[0m




[32m[I 2021-09-28 17:56:27,850][0m Trial 84 finished with value: 8098.180757526298 and parameters: {'units': 37, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 76 with value: 844.9858726442183.[0m




[32m[I 2021-09-28 17:56:29,757][0m Trial 85 finished with value: 5172.574298725549 and parameters: {'units': 47, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 76 with value: 844.9858726442183.[0m




[32m[I 2021-09-28 17:56:31,691][0m Trial 86 finished with value: 23359.957063373593 and parameters: {'units': 57, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 76 with value: 844.9858726442183.[0m




[32m[I 2021-09-28 17:56:33,604][0m Trial 87 finished with value: 16033.852183286865 and parameters: {'units': 39, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 76 with value: 844.9858726442183.[0m




[32m[I 2021-09-28 17:56:35,502][0m Trial 88 finished with value: 12312.979130937416 and parameters: {'units': 44, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 76 with value: 844.9858726442183.[0m




[32m[I 2021-09-28 17:56:37,442][0m Trial 89 finished with value: 4568.773469425719 and parameters: {'units': 54, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 76 with value: 844.9858726442183.[0m




[32m[I 2021-09-28 17:56:39,367][0m Trial 90 finished with value: 23936.22120631501 and parameters: {'units': 61, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 76 with value: 844.9858726442183.[0m




[32m[I 2021-09-28 17:56:41,263][0m Trial 91 finished with value: 7921.239030455557 and parameters: {'units': 36, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 76 with value: 844.9858726442183.[0m




[32m[I 2021-09-28 17:56:43,687][0m Trial 92 finished with value: 8220.341535484136 and parameters: {'units': 47, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 76 with value: 844.9858726442183.[0m




[32m[I 2021-09-28 17:56:45,584][0m Trial 93 finished with value: 5019.033403170801 and parameters: {'units': 53, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 76 with value: 844.9858726442183.[0m




[32m[I 2021-09-28 17:56:47,556][0m Trial 94 finished with value: 2125.3077831993783 and parameters: {'units': 43, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 76 with value: 844.9858726442183.[0m




[32m[I 2021-09-28 17:56:49,454][0m Trial 95 finished with value: 17891.298006097823 and parameters: {'units': 43, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 76 with value: 844.9858726442183.[0m




[32m[I 2021-09-28 17:56:51,410][0m Trial 96 finished with value: 9571.66137961033 and parameters: {'units': 35, 'kernel_initializer': 'glorot_uniform', 'activation': 'silu'}. Best is trial 76 with value: 844.9858726442183.[0m




[32m[I 2021-09-28 17:56:53,769][0m Trial 97 finished with value: 30763.805999193533 and parameters: {'units': 225, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 76 with value: 844.9858726442183.[0m




[32m[I 2021-09-28 17:56:55,723][0m Trial 98 finished with value: 14620.5028092472 and parameters: {'units': 41, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 76 with value: 844.9858726442183.[0m




[32m[I 2021-09-28 17:56:57,790][0m Trial 99 finished with value: 20931.296469822406 and parameters: {'units': 49, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 76 with value: 844.9858726442183.[0m


Number of finished trials:  100
Best trial:
  Value: 844.9858726442183
  Params: 
    units: 42
    kernel_initializer: normal
    activation: relu


In [13]:
keras_params = keras_study.best_params

model2 = Sequential()
model2.add(Dense(**keras_params))
model2.add(Dense(**keras_params))
model2.add(Dense(**keras_params))
model2.add(Dense(1, activation= keras_params['activation']))

model2.compile(loss="mean_absolute_error")
earlystopping = callbacks.EarlyStopping(monitor ="loss", 
                                        mode ="min", patience = 10, 
                                        restore_best_weights = True)
model2.fit(X_train, y_train, validation_data = (X_test, y_test), epochs=100, batch_size=10, callbacks =[earlystopping])


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100


<keras.callbacks.History at 0x270b0629fd0>

In [14]:
print(model2.get_config())

{'name': 'sequential_100', 'layers': [{'class_name': 'InputLayer', 'config': {'batch_input_shape': (None, 25), 'dtype': 'float64', 'sparse': False, 'ragged': False, 'name': 'dense_200_input'}}, {'class_name': 'Dense', 'config': {'name': 'dense_200', 'trainable': True, 'dtype': 'float32', 'units': 42, 'activation': 'relu', 'use_bias': True, 'kernel_initializer': {'class_name': 'RandomNormal', 'config': {'mean': 0.0, 'stddev': 0.05, 'seed': None}}, 'bias_initializer': {'class_name': 'Zeros', 'config': {}}, 'kernel_regularizer': None, 'bias_regularizer': None, 'activity_regularizer': None, 'kernel_constraint': None, 'bias_constraint': None}}, {'class_name': 'Dense', 'config': {'name': 'dense_201', 'trainable': True, 'dtype': 'float32', 'units': 42, 'activation': 'relu', 'use_bias': True, 'kernel_initializer': {'class_name': 'RandomNormal', 'config': {'mean': 0.0, 'stddev': 0.05, 'seed': None}}, 'bias_initializer': {'class_name': 'Zeros', 'config': {}}, 'kernel_regularizer': None, 'bias_re

* BEFORE PREDICTION INVERSE LOG10

In [15]:
train_df['price'] = 10 ** train_df['price']
test_df['price'] = 10 ** test_df['price']

In [16]:
train_df['price']

4         2400.0
6         3800.0
9         3495.0
10        3000.0
15        2795.0
           ...  
124000    2800.0
124002    2395.0
124004    1850.0
124008    4195.0
124009    4280.0
Name: price, Length: 49352, dtype: float64

In [17]:
y_test = 10 ** y_test

In [18]:
score = model2.evaluate(X_test, y_test, verbose=0)
print(score)

3963.275390625
