In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import itertools
import matplotlib.pyplot as plt
import string
import re
import collections
from sklearn import preprocessing

%matplotlib inline

In [2]:
# READ DATA 
train_df = pd.read_json('train.json.zip')
test_df = pd.read_json('test.json.zip')

## TRAIN DATA FEATURE ENGINEERING

In [3]:
# convert TARGET to the numeric
train_df['interest_level'] = train_df['interest_level'].apply(lambda x: 0 if x=='low' 
                                                      else 1 if x=='medium' 
                                                      else 2) 
# REMOVE UNNECESSARY WORDS FROM DESCRIPTION
train_df['description'] = train_df['description'].apply(lambda x: x.replace("<br />", ""))
train_df['description'] = train_df['description'].apply(lambda x: x.replace("br", ""))
train_df['description'] = train_df['description'].apply(lambda x: x.replace("<p><a", ""))

#basic features
train_df['rooms'] = train_df['bedrooms'] + train_df['bathrooms'] 

# count of photos #
train_df["num_photos"] = train_df["photos"].apply(len)

# count of "features" #
train_df["num_features"] = train_df["features"].apply(len)

# count of words present in description column #
train_df["num_description_words"] = train_df["description"].apply(lambda x: len(x.split(" ")))

# description contains email
regex = r'[\w\.-]+@[\w\.-]+'
train_df['has_email'] = train_df['description'].apply(lambda x: 1 if re.findall(regex, x) else 0)

# description contains phone
train_df['has_phone'] = train_df['description'].apply(lambda x:re.sub('['+string.punctuation+']', '', x).split())\
        .apply(lambda x: [s for s in x if s.isdigit()])\
        .apply(lambda x: len([s for s in x if len(str(s))==10]))\
        .apply(lambda x: 1 if x>0 else 0)

# CONVERT LOWER ALL OF WORDS
train_df[["features"]] = train_df[["features"]].apply(
    lambda _: [list(map(str.strip, map(str.lower, x))) for x in _])

## TEST DATA FEATURE ENGINEERING

In [4]:
# REMOVE UNNECESSARY WORDS FROM DESCRIPTION
test_df['description'] = test_df['description'].apply(lambda x: x.replace("<br />", ""))
test_df['description'] = test_df['description'].apply(lambda x: x.replace("br", ""))
test_df['description'] = test_df['description'].apply(lambda x: x.replace("<p><a", ""))

#basic features
test_df['rooms'] = test_df['bedrooms'] + test_df['bathrooms'] 

# count of photos #
test_df["num_photos"] = test_df["photos"].apply(len)

# count of "features" #
test_df["num_features"] = test_df["features"].apply(len)

# count of words present in description column #
test_df["num_description_words"] = test_df["description"].apply(lambda x: len(x.split(" ")))

# description contains email
regex = r'[\w\.-]+@[\w\.-]+'
test_df['has_email'] = test_df['description'].apply(lambda x: 1 if re.findall(regex, x) else 0)

# description contains phone
test_df['has_phone'] = test_df['description'].apply(lambda x:re.sub('['+string.punctuation+']', '', x).split())\
        .apply(lambda x: [s for s in x if s.isdigit()])\
        .apply(lambda x: len([s for s in x if len(str(s))==10]))\
        .apply(lambda x: 1 if x>0 else 0)

# CONVERT LOWER ALL OF WORDS
test_df[["features"]] = test_df[["features"]].apply(
    lambda _: [list(map(str.strip, map(str.lower, x))) for x in _])

## MOST FREQUENT FEATURES EXTRACTION

In [5]:
feature_value_train = train_df['features'].tolist()
feature_value_test = test_df['features'].tolist()

feature_value_train
feature_value_test

feature_lst_train = []
feature_lst_test = []

for i in range(len(feature_value_train)):
    feature_lst_train += feature_value_train[i]
    
for i in range(len(feature_value_test)):
    feature_lst_test += feature_value_test[i]

uniq_feature_train = list(set(feature_lst_train))
uniq_feature_test = list(set(feature_lst_test))


# see the frequency of each feature
def most_common(lst):
    features = collections.Counter(lst)
    feature_value = features.keys()
    frequency = features.values()
    data = [('feature_value', feature_value),
            ('frequency', frequency),]    
    df = pd.DataFrame.from_dict(dict(data))
    return df.sort_values(by = 'frequency', ascending = False)

df_features_train = most_common(feature_lst_train)
df_features_test = most_common(feature_lst_test)


def newColumn(name, df, series):
    feature = pd.Series(0,df.index,name = name)# data : 0
    for row,word in enumerate(series):
        if name in word:
            feature.iloc[row] = 1
    df[name] = feature # feature : series ; value in series : 1 or 0
    return df

# select features based on frequency
facilities = ['elevator', 'cats allowed', 'hardwood floors', 'dogs allowed', 'doorman', 'dishwasher', 'no fee', 'laundry in building', 'fitness center']
for name in facilities:
    train_df = newColumn(name, train_df, train_df['features'])
    test_df = newColumn(name, test_df, test_df['features'])

## LABEL ENCODING

In [6]:
categorical = ["display_address", "manager_id", "building_id", "street_address"]
for f in categorical:
        if train_df[f].dtype=='object':
            #print(f)
            lbl = preprocessing.LabelEncoder()
            lbl.fit(list(train_df[f].values) + list(test_df[f].values))
            train_df[f] = lbl.transform(list(train_df[f].values))
            test_df[f] = lbl.transform(list(test_df[f].values))

In [7]:
train_df['price'] = np.log10(train_df['price'])
test_df['price'] = np.log10(test_df['price'])

In [8]:
train_df['price']

4         3.380211
6         3.579784
9         3.543447
10        3.477121
15        3.446382
            ...   
124000    3.447158
124002    3.379306
124004    3.267172
124008    3.622732
124009    3.631444
Name: price, Length: 49352, dtype: float64

## DROP UNNECESSARY COLUMNS

In [9]:
# TRAINING DATASET
train_df.drop('created', axis=1, inplace=True)
train_df.drop('description', axis=1, inplace=True)
train_df.drop('features', axis=1, inplace=True)
train_df.drop('photos', axis=1, inplace=True)


# TEST DATASET
test_df.drop('created', axis=1, inplace=True)
test_df.drop('description', axis=1, inplace=True)
test_df.drop('features', axis=1, inplace=True)
test_df.drop('photos', axis=1, inplace=True)

# REGRESSION FOR PRICE

In [10]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import make_scorer, mean_absolute_error, mean_squared_error
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
import xgboost as xgb
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
import optuna
import math
from keras import callbacks

X = train_df.drop(['price'], axis = 1)
y = train_df.price
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size = .3,
                                                    random_state = 5)

In [42]:
class Optimizer:
    def __init__(self, metric, trials=100):
        self.metric = metric
        self.trials = trials
        
    def objective(self, trial):
        model = create_model(trial)
        model.fit(X, y)
        preds = model.predict(X_test)
        return mean_absolute_error(y_test, preds)
            
    def optimize(self):
        study = optuna.create_study(direction="minimize")
        study.optimize(self.objective, n_trials=self.trials)
        return study

In [44]:
from keras.layers import Dense
from keras.models import Sequential

def create_model(trial):
    model = Sequential()
    model.add(Dense
                 (
                    units = trial.suggest_int("units", 32, 256),
                    kernel_initializer=trial.suggest_categorical("kernel_initializer", ["glorot_uniform", "normal"]),
                    input_dim = X.shape[1], 
                    activation= trial.suggest_categorical("activation", ["relu", "silu", "leaky_relu"])
                 )
             )
    model.add(Dense(1))
    model.compile(
        loss="mean_absolute_error"
    )
    return model

optimizer = Optimizer('mae')
keras_study = optimizer.optimize()

print("Number of finished trials: ", len(keras_study.trials))
print("Best trial:")
keras_trial = keras_study.best_trial

print("  Value: {}".format(keras_trial.value))
print("  Params: ")
for key, value in keras_trial.params.items():
    print("    {}: {}".format(key, value))


[32m[I 2021-09-26 23:01:54,119][0m A new study created in memory with name: no-name-d757bb40-b1de-49ef-a61d-711f48f79137[0m




[32m[I 2021-09-26 23:01:56,242][0m Trial 0 finished with value: 109892.2112862182 and parameters: {'units': 244, 'kernel_initializer': 'glorot_uniform', 'activation': 'leaky_relu'}. Best is trial 0 with value: 109892.2112862182.[0m




[32m[I 2021-09-26 23:01:58,223][0m Trial 1 finished with value: 47298.854153235814 and parameters: {'units': 60, 'kernel_initializer': 'glorot_uniform', 'activation': 'relu'}. Best is trial 1 with value: 47298.854153235814.[0m




[32m[I 2021-09-26 23:02:00,553][0m Trial 2 finished with value: 28083.607334083517 and parameters: {'units': 205, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 2 with value: 28083.607334083517.[0m




[32m[I 2021-09-26 23:02:02,527][0m Trial 3 finished with value: 27677.069129818778 and parameters: {'units': 113, 'kernel_initializer': 'glorot_uniform', 'activation': 'relu'}. Best is trial 3 with value: 27677.069129818778.[0m




[32m[I 2021-09-26 23:02:04,632][0m Trial 4 finished with value: 10620.214864392181 and parameters: {'units': 235, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 4 with value: 10620.214864392181.[0m




[32m[I 2021-09-26 23:02:06,568][0m Trial 5 finished with value: 26777.459983294564 and parameters: {'units': 79, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 4 with value: 10620.214864392181.[0m




[32m[I 2021-09-26 23:02:08,618][0m Trial 6 finished with value: 66183.90256279147 and parameters: {'units': 165, 'kernel_initializer': 'glorot_uniform', 'activation': 'relu'}. Best is trial 4 with value: 10620.214864392181.[0m




[32m[I 2021-09-26 23:02:10,801][0m Trial 7 finished with value: 23048.96007853678 and parameters: {'units': 36, 'kernel_initializer': 'glorot_uniform', 'activation': 'silu'}. Best is trial 4 with value: 10620.214864392181.[0m




[32m[I 2021-09-26 23:02:13,007][0m Trial 8 finished with value: 50274.90584323944 and parameters: {'units': 222, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 4 with value: 10620.214864392181.[0m




[32m[I 2021-09-26 23:02:15,019][0m Trial 9 finished with value: 1620.1521040673035 and parameters: {'units': 126, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:02:17,009][0m Trial 10 finished with value: 3196.915696922174 and parameters: {'units': 149, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:02:18,989][0m Trial 11 finished with value: 53483.02607268611 and parameters: {'units': 148, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:02:20,924][0m Trial 12 finished with value: 42822.90947220531 and parameters: {'units': 123, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:02:23,039][0m Trial 13 finished with value: 48771.33100185334 and parameters: {'units': 181, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:02:25,092][0m Trial 14 finished with value: 10665.989237825037 and parameters: {'units': 108, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:02:27,461][0m Trial 15 finished with value: 39527.51456091471 and parameters: {'units': 182, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:02:29,904][0m Trial 16 finished with value: 37477.52734104818 and parameters: {'units': 136, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:02:32,205][0m Trial 17 finished with value: 8717.301833797637 and parameters: {'units': 93, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:02:34,937][0m Trial 18 finished with value: 14572.354040707947 and parameters: {'units': 148, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:02:37,438][0m Trial 19 finished with value: 10990.262234598938 and parameters: {'units': 194, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:02:40,153][0m Trial 20 finished with value: 37174.136983170036 and parameters: {'units': 78, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:02:42,132][0m Trial 21 finished with value: 34194.260730114904 and parameters: {'units': 95, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:02:44,167][0m Trial 22 finished with value: 40155.98934348849 and parameters: {'units': 129, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:02:46,285][0m Trial 23 finished with value: 11618.102280441839 and parameters: {'units': 162, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:02:48,254][0m Trial 24 finished with value: 32052.81245217859 and parameters: {'units': 95, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:02:50,269][0m Trial 25 finished with value: 20646.31365603451 and parameters: {'units': 45, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:02:52,462][0m Trial 26 finished with value: 47159.47937550676 and parameters: {'units': 74, 'kernel_initializer': 'glorot_uniform', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:02:54,571][0m Trial 27 finished with value: 2677.7883754591157 and parameters: {'units': 101, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:02:56,707][0m Trial 28 finished with value: 53233.63019525425 and parameters: {'units': 159, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:02:58,764][0m Trial 29 finished with value: 2832.147012829713 and parameters: {'units': 121, 'kernel_initializer': 'glorot_uniform', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:00,750][0m Trial 30 finished with value: 77824.61144704827 and parameters: {'units': 114, 'kernel_initializer': 'glorot_uniform', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:02,838][0m Trial 31 finished with value: 60302.82085446799 and parameters: {'units': 138, 'kernel_initializer': 'glorot_uniform', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:05,005][0m Trial 32 finished with value: 36505.39875825236 and parameters: {'units': 124, 'kernel_initializer': 'glorot_uniform', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:07,076][0m Trial 33 finished with value: 8200.379273568149 and parameters: {'units': 103, 'kernel_initializer': 'glorot_uniform', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:09,038][0m Trial 34 finished with value: 64715.699028856754 and parameters: {'units': 64, 'kernel_initializer': 'glorot_uniform', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:11,189][0m Trial 35 finished with value: 7785.245067161382 and parameters: {'units': 255, 'kernel_initializer': 'glorot_uniform', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:13,281][0m Trial 36 finished with value: 5805.4039184760395 and parameters: {'units': 121, 'kernel_initializer': 'glorot_uniform', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:15,482][0m Trial 37 finished with value: 55738.50027022427 and parameters: {'units': 176, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:17,533][0m Trial 38 finished with value: 13545.8156703678 and parameters: {'units': 148, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:19,431][0m Trial 39 finished with value: 15801.18075488801 and parameters: {'units': 88, 'kernel_initializer': 'glorot_uniform', 'activation': 'relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:21,650][0m Trial 40 finished with value: 38002.23222367601 and parameters: {'units': 208, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:23,737][0m Trial 41 finished with value: 23429.404030207555 and parameters: {'units': 116, 'kernel_initializer': 'glorot_uniform', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:26,510][0m Trial 42 finished with value: 29510.08943382835 and parameters: {'units': 132, 'kernel_initializer': 'glorot_uniform', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:28,564][0m Trial 43 finished with value: 43936.109306468024 and parameters: {'units': 104, 'kernel_initializer': 'glorot_uniform', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:30,853][0m Trial 44 finished with value: 3366.293300924587 and parameters: {'units': 118, 'kernel_initializer': 'glorot_uniform', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:33,499][0m Trial 45 finished with value: 30212.56328244799 and parameters: {'units': 157, 'kernel_initializer': 'glorot_uniform', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:35,925][0m Trial 46 finished with value: 26345.440928255644 and parameters: {'units': 139, 'kernel_initializer': 'glorot_uniform', 'activation': 'relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:38,623][0m Trial 47 finished with value: 49785.82880099305 and parameters: {'units': 173, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:40,983][0m Trial 48 finished with value: 40335.25894136014 and parameters: {'units': 111, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:43,019][0m Trial 49 finished with value: 26824.80601554606 and parameters: {'units': 145, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:44,948][0m Trial 50 finished with value: 21601.06762402046 and parameters: {'units': 89, 'kernel_initializer': 'glorot_uniform', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:47,051][0m Trial 51 finished with value: 5833.862995323053 and parameters: {'units': 121, 'kernel_initializer': 'glorot_uniform', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:49,150][0m Trial 52 finished with value: 14646.127188589824 and parameters: {'units': 127, 'kernel_initializer': 'glorot_uniform', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:51,239][0m Trial 53 finished with value: 13985.934113424457 and parameters: {'units': 103, 'kernel_initializer': 'glorot_uniform', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:53,326][0m Trial 54 finished with value: 37623.9025405028 and parameters: {'units': 119, 'kernel_initializer': 'glorot_uniform', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:55,483][0m Trial 55 finished with value: 2140.598466449491 and parameters: {'units': 153, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:57,464][0m Trial 56 finished with value: 23388.74108264829 and parameters: {'units': 153, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:03:59,672][0m Trial 57 finished with value: 42531.7659826884 and parameters: {'units': 170, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:01,720][0m Trial 58 finished with value: 24092.31928324012 and parameters: {'units': 193, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:03,700][0m Trial 59 finished with value: 13943.976285015577 and parameters: {'units': 140, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:05,665][0m Trial 60 finished with value: 30550.45984306953 and parameters: {'units': 134, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:07,749][0m Trial 61 finished with value: 2125.3120416031843 and parameters: {'units': 109, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:10,272][0m Trial 62 finished with value: 21559.032786739543 and parameters: {'units': 107, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:12,328][0m Trial 63 finished with value: 24135.07238124556 and parameters: {'units': 84, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:14,392][0m Trial 64 finished with value: 29054.636414491808 and parameters: {'units': 99, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:16,528][0m Trial 65 finished with value: 35650.25176600681 and parameters: {'units': 129, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:18,599][0m Trial 66 finished with value: 61081.06148129327 and parameters: {'units': 165, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:20,602][0m Trial 67 finished with value: 3594.7975325298867 and parameters: {'units': 70, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:22,623][0m Trial 68 finished with value: 24242.230586749887 and parameters: {'units': 153, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:24,564][0m Trial 69 finished with value: 16374.110236003035 and parameters: {'units': 53, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:26,644][0m Trial 70 finished with value: 1745.5875914571418 and parameters: {'units': 111, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:28,715][0m Trial 71 finished with value: 6270.111524938897 and parameters: {'units': 112, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:30,779][0m Trial 72 finished with value: 32806.205725888365 and parameters: {'units': 97, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:32,863][0m Trial 73 finished with value: 25799.028310091562 and parameters: {'units': 116, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:34,977][0m Trial 74 finished with value: 45375.703764057005 and parameters: {'units': 125, 'kernel_initializer': 'normal', 'activation': 'silu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:36,921][0m Trial 75 finished with value: 2254.9123505499783 and parameters: {'units': 81, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:38,869][0m Trial 76 finished with value: 8942.651455044936 and parameters: {'units': 73, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:40,807][0m Trial 77 finished with value: 32843.1154902206 and parameters: {'units': 83, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:42,731][0m Trial 78 finished with value: 2419.5355976587107 and parameters: {'units': 92, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:44,671][0m Trial 79 finished with value: 21979.64445480802 and parameters: {'units': 65, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:46,597][0m Trial 80 finished with value: 6223.2640693685735 and parameters: {'units': 78, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:48,516][0m Trial 81 finished with value: 11266.196008280216 and parameters: {'units': 91, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:50,465][0m Trial 82 finished with value: 2925.924254244004 and parameters: {'units': 109, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:52,408][0m Trial 83 finished with value: 20103.410048016645 and parameters: {'units': 100, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:54,826][0m Trial 84 finished with value: 14185.210070358089 and parameters: {'units': 108, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:56,765][0m Trial 85 finished with value: 34509.46088783665 and parameters: {'units': 87, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:04:58,693][0m Trial 86 finished with value: 7997.3665749577885 and parameters: {'units': 95, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:05:00,646][0m Trial 87 finished with value: 7887.7335690993605 and parameters: {'units': 109, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:05:02,585][0m Trial 88 finished with value: 3501.301207270068 and parameters: {'units': 81, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:05:04,541][0m Trial 89 finished with value: 3202.3760511625355 and parameters: {'units': 103, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:05:06,570][0m Trial 90 finished with value: 46923.00006615752 and parameters: {'units': 130, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:05:08,566][0m Trial 91 finished with value: 42403.09573049968 and parameters: {'units': 143, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:05:10,526][0m Trial 92 finished with value: 40074.057464626036 and parameters: {'units': 123, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:05:12,498][0m Trial 93 finished with value: 27131.168302820854 and parameters: {'units': 113, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:05:14,506][0m Trial 94 finished with value: 25488.605815748466 and parameters: {'units': 133, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:05:16,534][0m Trial 95 finished with value: 25653.360966352117 and parameters: {'units': 150, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:05:18,458][0m Trial 96 finished with value: 23495.465788193767 and parameters: {'units': 92, 'kernel_initializer': 'normal', 'activation': 'relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:05:20,484][0m Trial 97 finished with value: 22026.639798888336 and parameters: {'units': 143, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:05:22,466][0m Trial 98 finished with value: 46455.297788575794 and parameters: {'units': 119, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m




[32m[I 2021-09-26 23:05:24,480][0m Trial 99 finished with value: 2437.450050522355 and parameters: {'units': 136, 'kernel_initializer': 'normal', 'activation': 'leaky_relu'}. Best is trial 9 with value: 1620.1521040673035.[0m


Number of finished trials:  100
Best trial:
  Value: 1620.1521040673035
  Params: 
    units: 126
    kernel_initializer: normal
    activation: leaky_relu


In [45]:
keras_params = keras_study.best_params

model2 = Sequential()
model2.add(Dense(**keras_params))
model2.add(Dense(**keras_params))
model2.add(Dense(**keras_params))
model2.add(Dense(1, activation= keras_params['activation']))

model2.compile(loss="mean_absolute_error")
earlystopping = callbacks.EarlyStopping(monitor ="loss", 
                                        mode ="min", patience = 10, 
                                        restore_best_weights = True)
model2.fit(X_train, y_train, validation_data = (X_test, y_test), epochs=100, batch_size=10, callbacks =[earlystopping])


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100


<keras.callbacks.History at 0x19997c96850>

In [46]:
print(model2.get_config())

{'name': 'sequential_219', 'layers': [{'class_name': 'InputLayer', 'config': {'batch_input_shape': (None, 25), 'dtype': 'float64', 'sparse': False, 'ragged': False, 'name': 'dense_452_input'}}, {'class_name': 'Dense', 'config': {'name': 'dense_452', 'trainable': True, 'dtype': 'float32', 'units': 126, 'activation': 'leaky_relu', 'use_bias': True, 'kernel_initializer': {'class_name': 'RandomNormal', 'config': {'mean': 0.0, 'stddev': 0.05, 'seed': None}}, 'bias_initializer': {'class_name': 'Zeros', 'config': {}}, 'kernel_regularizer': None, 'bias_regularizer': None, 'activity_regularizer': None, 'kernel_constraint': None, 'bias_constraint': None}}, {'class_name': 'Dense', 'config': {'name': 'dense_453', 'trainable': True, 'dtype': 'float32', 'units': 126, 'activation': 'leaky_relu', 'use_bias': True, 'kernel_initializer': {'class_name': 'RandomNormal', 'config': {'mean': 0.0, 'stddev': 0.05, 'seed': None}}, 'bias_initializer': {'class_name': 'Zeros', 'config': {}}, 'kernel_regularizer': 