In [1]:
import numpy as np
import pandas as pd

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, OrdinalEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

from keras.models import Sequential, load_model
from keras import layers
from keras import Model
from keras.callbacks import ModelCheckpoint, EarlyStopping

2022-06-22 07:15:11.991168: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-06-22 07:15:11.991211: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 4355666906832260455
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 4143382528
locality {
  bus_id: 1
  links {
  }
}
incarnation: 3651569386379864042
physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 2060, pci bus id: 0000:2d:00.0, compute capability: 7.5"
]


In [2]:
rental_offer = pd.read_csv("./immo_features.csv")
rental_offer = rental_offer[["noRooms","balcony", "regio1", "regio2", "cellar", "condition", "regio3", "garden", "livingSpace", "lift", "baseRent"]]
rental_offer = rental_offer.dropna()

rental_offer.to_csv("./immo_features.csv")

rental_offer.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200361 entries, 0 to 200360
Data columns (total 11 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   noRooms      200361 non-null  float64
 1   balcony      200361 non-null  bool   
 2   regio1       200361 non-null  object 
 3   regio2       200361 non-null  object 
 4   cellar       200361 non-null  bool   
 5   condition    200361 non-null  object 
 6   regio3       200361 non-null  object 
 7   garden       200361 non-null  bool   
 8   livingSpace  200361 non-null  float64
 9   lift         200361 non-null  bool   
 10  baseRent     200361 non-null  float64
dtypes: bool(4), float64(3), object(4)
memory usage: 11.5+ MB


In [3]:
learning_frame = rental_offer[["noRooms","balcony", "regio1", "cellar", "condition", "garden", "livingSpace", "lift"]]
learning_frame.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200361 entries, 0 to 200360
Data columns (total 8 columns):
 #   Column       Non-Null Count   Dtype  
---  ------       --------------   -----  
 0   noRooms      200361 non-null  float64
 1   balcony      200361 non-null  bool   
 2   regio1       200361 non-null  object 
 3   cellar       200361 non-null  bool   
 4   condition    200361 non-null  object 
 5   garden       200361 non-null  bool   
 6   livingSpace  200361 non-null  float64
 7   lift         200361 non-null  bool   
dtypes: bool(4), float64(2), object(2)
memory usage: 6.9+ MB


In [4]:
target = rental_offer["baseRent"]
target.info()

<class 'pandas.core.series.Series'>
RangeIndex: 200361 entries, 0 to 200360
Series name: baseRent
Non-Null Count   Dtype  
--------------   -----  
200361 non-null  float64
dtypes: float64(1)
memory usage: 1.5 MB


In [6]:
num_attribs = learning_frame.drop(["regio1", "condition"], axis=1)
num_attribs.head()

Unnamed: 0,noRooms,balcony,cellar,garden,livingSpace,lift
0,4.0,False,True,True,86.0,False
1,3.0,True,False,False,89.0,False
2,3.0,True,True,False,83.8,True
3,3.0,True,False,False,84.97,False
4,2.0,True,False,False,53.43,False


In [9]:
num_attribs = list(num_attribs)
cat_attribs = ["regio1"]
ord_attribs = ["condition"]

num_pipeline = Pipeline([("std_scaler", StandardScaler())])


full_pipe = ColumnTransformer([
    ("num", num_pipeline, num_attribs),
    ("cat", OneHotEncoder(), cat_attribs),
    ("ord", OrdinalEncoder(), ord_attribs)
])

In [10]:
learning_prepared = full_pipe.fit_transform(learning_frame)
learning_prepared = learning_prepared.astype(np.float32)
learning_prepared

array([[ 0.4542822 , -1.3336794 ,  0.6953863 , ...,  0.        ,
         0.        ,  9.        ],
       [ 0.11347608,  0.7498054 , -1.4380496 , ...,  0.        ,
         0.        ,  7.        ],
       [ 0.11347608,  0.7498054 ,  0.6953863 , ...,  0.        ,
         0.        ,  0.        ],
       ...,
       [ 0.4542822 ,  0.7498054 ,  0.6953863 , ...,  0.        ,
         0.        ,  9.        ],
       [-0.22733003,  0.7498054 ,  0.6953863 , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.11347608,  0.7498054 ,  0.6953863 , ...,  0.        ,
         0.        ,  4.        ]], dtype=float32)

In [11]:
learning_prepared 

AttributeError: 'numpy.ndarray' object has no attribute 'toarray'

In [12]:
X_train_full, X_test, y_train_full, y_test = train_test_split(learning_prepared, target)

In [13]:
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full)

In [12]:
class WideAndDeepModel(Model):
    def __init__(self, units=30, activation="relu", **kwargs):
        super().__init__(**kwargs)
        self.hidden1 = layers.Dense(units, activation=activation)
        self.hidden2 = layers.Dense(units, activation=activation)
        self.conc = layers.Concatenate()
        self.output = layers.Dense(1)
        
        
    def call(self, inputs):
        input_ = inputs
        hidden

In [14]:
input_ = layers.Input(shape=X_train.shape[1:])
hidden1 = layers.Dense(8641, activation="relu")(input_)
hidden2 = layers.Dense(8641, activation="relu")(hidden1)
conc = layers.Concatenate()([input_, hidden2])
output = layers.Dense(1)(conc)

model = Model(inputs=[input_], outputs=[output])

2022-06-22 07:17:16.667457: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-06-22 07:17:16.667497: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-06-22 07:17:16.667521: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (codespaces-b618c3): /proc/driver/nvidia/version does not exist
2022-06-22 07:17:16.667964: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-06-22 07:17:17.030362: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 2

In [15]:
model.compile(loss="mean_squared_error", optimizer="adam")

In [None]:
mse_test = model.evaluate(X_test, y_test)

In [None]:
test_frame = learning_frame.where(learning_frame["regio1"] == "Schleswig_Holstein")
test_frame = test_frame.dropna()
test_frame["condition"].unique()

In [None]:
l_frame = learning_frame.where(learning_frame["regio2"] == "Lübeck")
l_frame = l_frame.dropna()
l_frame

In [None]:
test_data = {"noRooms": 7, "balcony": True, "regio1": "Schlewsig_Holstein", "regio2": "Lübeck", "cellar": False, "condition": "need_for_renovation", "regio3": "Innenstadt", "garden": False, "livingSpace": 112.0, "lift": False}
test_frame = pd.DataFrame(data=test_data, index=[1])

In [None]:
test_prepared = full_pipe.fit_transform(test_frame)