In [None]:
import tensorflow as tf
#import numpy as np
import pandas as pd
from tensorflow import feature_column
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split

In [None]:
df = pd.read_csv("Real estate.csv")
df.head()
df

In [None]:
print(df.info())
print(df.describe())


In [None]:


df.drop('No', axis=1,inplace=True)
df.transactionDate = df['transactionDate'].astype('int')
df.transactionDate = df['transactionDate'].astype('str')
df.latitude = df['latitude'].astype('str')
df.longitude = df['longitude'].astype('str')
df = df.dropna()
df.head()


In [None]:
df.columns

In [None]:

train, test = train_test_split(df, test_size=0.2)
train, val = train_test_split(train, test_size=0.2)
print(len(train), 'train examples')
print(len(val), 'validation examples')
print(len(test), 'test examples')

print(df.info())

In [None]:
# A utility method to create a tf.data dataset from a Pandas Dataframe
def df_to_dataset(dataframe, shuffle=True, batch_size=32):
                                  dataframe = dataframe.copy()
                                  labels = dataframe.pop('house_price_of_unit_area')
                                  ds = tf.data.Dataset.from_tensor_slices((dict(dataframe), labels))
                                  if shuffle:
                                    ds = ds.shuffle(buffer_size=len(dataframe))
                                  ds = ds.batch(batch_size)
                                  return ds

In [None]:
batch_size = 16
train_ds = df_to_dataset(train, batch_size=batch_size)
val_ds = df_to_dataset(val, shuffle=False, batch_size=batch_size)
test_ds = df_to_dataset(test, shuffle=False, batch_size=batch_size)



In [None]:
for feature_batch, label_batch in train_ds.take(1):
              print('Every feature:', list(feature_batch.keys()))
              print('A batch of', feature_batch['houseAge'])
              print('A batch of targets:', label_batch )


In [None]:
# feature columns

num_col = ['houseAge', 'distanceToTheNearestMRTStation', 'number_of_convenience_stores'] #numeric columns
buk_col = ['number_of_convenience_stores', 'houseAge'] # bucketised columns
cate_col = ['transactionDate'] # categorical
emb_col = ['latitude', 'longitude'] # embeding

In [None]:
def get_scal(feature):
      def minmax(x):
        mini = train[feature].min()
        maxi = train[feature].max()
        return (x - mini)/(maxi-mini)
      return(minmax)

In [None]:
feature_columns = []
for col in num_col:
          scal_input_fn = get_scal(col)
          feature_columns.append(feature_column.numeric_column(col, normalizer_fn=scal_input_fn))

In [None]:

houseAge = feature_column.numeric_column("houseAge")
# bucketized cols
houseAge_bucket = feature_column.bucketized_column(houseAge, boundaries=[0,9,18,27,36,45])
feature_columns.append(houseAge_bucket)



number_of_convenience_stores = feature_column.numeric_column("number_of_convenience_stores")
# bucketized cols
number_of_convenience_stores_buckets = feature_column.bucketized_column(number_of_convenience_stores, boundaries=[0, 2, 4, 6, 8, 10])
feature_columns.append(number_of_convenience_stores_buckets)




In [None]:
for name in cate_col:
                  vocabulary = df[name].unique()
                  cat_c = tf.feature_column.categorical_column_with_vocabulary_list(name, vocabulary)
                  one_hot = feature_column.indicator_column(cat_c)
                  feature_columns.append(one_hot)

In [None]:
for col_name in emb_col:
                  vocabulary = df[col_name].unique()
                  cat_c = tf.feature_column.categorical_column_with_vocabulary_list(col_name, vocabulary)
                  embeding = feature_column.embedding_column(cat_c, dimension=50)
                  feature_columns.append(embeding)

In [None]:


crossed_feature = feature_column.crossed_column([houseAge_bucket,number_of_convenience_stores_buckets], hash_bucket_size=1000)
crossed_feature = feature_column.indicator_column(crossed_feature)
feature_columns.append(crossed_feature)

In [None]:
print('Total number of feature coumns: ',len(feature_columns))

In [None]:
feature_layer = tf.keras.layers.DenseFeatures(feature_columns)

In [None]:
model = tf.keras.Sequential([
  feature_layer,
  layers.Dense(16, kernel_regularizer=tf.keras.regularizers.l2(0.01), activation='relu'),
  layers.Dense(64, kernel_regularizer=tf.keras.regularizers.l2(0.01), activation='relu'),
  layers.Dropout(0.2),
  
  layers.Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam',
              loss='mse',
              metrics=['accuracy'])

history = model.fit(train_ds,validation_data=val_ds,epochs=200)