In [1]:
GOOGLE_APPLICATION_CREDENTIALS="[JSON-KEY].json"

In [2]:
import tensorflow as tf
import tensorboard as tb

from google.cloud import bigquery
from google.oauth2 import service_account
from sklearn.model_selection import train_test_split

In [3]:
def getData() -> tuple:
    """
        This method fetches data from a BigQuery table and returns it in two dataframes for training and target
        variables.

        Args:
            None
        
        Returns:
            (pandas.DataFrame, pandas.DataFrame): pandas.Dataframe tuple with the values to feed the model
    """
    credential = service_account.Credentials.from_service_account_file(GOOGLE_APPLICATION_CREDENTIALS)
    bq_client = bigquery.Client(credentials=credential)

    query_X = """SELECT
                bedrooms, bathrooms, sqft_living, sqft_lot, floors, waterfront, view,
                condition, sqft_above, sqft_basement,yr_built, yr_renovated
            FROM `[DATABASE]`"""

    query_y = """SELECT price FROM `[DATABASE]`"""

    # query_job = bq_client.query(query=query)
    query_result_X = bq_client.query(query=query_X).result()
    query_result_y = bq_client.query(query=query_y).result()

    return query_result_X.to_dataframe(), query_result_y.to_dataframe()

In [4]:
X, y = getData()
X.head()

Unnamed: 0,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,condition,sqft_above,sqft_basement,yr_built,yr_renovated
0,0.0,0.0,4810,28008,2.0,0,0,3,4810,0,1990,2009
1,0.0,0.0,3064,4764,3.5,0,2,3,3064,0,1990,2009
2,4.0,2.0,2680,18768,1.0,0,0,5,2680,0,1965,0
3,4.0,2.0,2190,14439,1.0,0,0,4,1180,1010,1977,0
4,4.0,2.0,1670,9987,1.0,0,0,3,1670,0,1967,2011


In [5]:
y.head()

Unnamed: 0,price
0,1295648.0
1,1095000.0
2,360000.0
3,280000.0
4,204700.0


In [6]:
price_predictor = tf.keras.Sequential([
    tf.keras.layers.Input(12, name="input_layer"),
    tf.keras.layers.Dense(units=12, name="second_layer", activation="relu"),
    tf.keras.layers.Dense(units=12, name="third_layer", activation="relu"),
    tf.keras.layers.Dense(units=1)
])

price_predictor.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.5, momentum=0.3),
                        loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
                        metrics=tf.keras.metrics.MeanSquaredError())

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, train_size=0.8, shuffle=True)

In [8]:
y_train.dtypes

price    float64
dtype: object

In [9]:
type(X_train)

pandas.core.frame.DataFrame

In [10]:
# The model was not fitting the pd.DataFrame
X_tensor_train = tf.constant(value=X_train, dtype=tf.float64)
y_tensor_train = tf.constant(value=y_train, dtype=tf.float64)
X_tensor_test = tf.constant(value=X_test, dtype=tf.float64)
y_tensor_test = tf.constant(value=y_test, dtype=tf.float64)

In [11]:
# In production the folders to hold the training and evaluation files must be generated at each run
# Tensorboard does not deal well with more than onde file per folder according to documentation
tensorboard_callback = tf.keras.callbacks.TensorBoard("logs", histogram_freq=1)

In [12]:
price_predictor.fit(X_tensor_train, y_tensor_train,
                    validation_data=(X_tensor_test, y_tensor_test), callbacks=[tensorboard_callback])



<keras.callbacks.History at 0x21c3c4d4c10>

Iniciar o tensorboard depois do treinamento

In [13]:
tb.notebook.list()

Known TensorBoard instances:
  - port 6006: logdir C:/Users/gustavo.o.gois/OneDrive - Accenture/Documents/JupyterNotebooks/HousePrice/logs (started 4:47:55 ago; pid 23260)
  - port 6006: logdir logs (started 1:33:15 ago; pid 25508)
  - port 6006: logdir {logs} (started 19:52:11 ago; pid 28492)
  - port 6006: logdir logs/train (started 4:56:04 ago; pid 32212)
  - port 6006: logdir {logs/train} (started 19:28:12 ago; pid 32224)
  - port 6006: logdir ./ (started 4:39:42 ago; pid 36124)
  - port 6006: logdir ./logs/train/ (started 4:40:01 ago; pid 37120)
  - port 6006: logdir ./logs/train/ (started 4:46:59 ago; pid 5572)


In [14]:
price_predictor.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 second_layer (Dense)        (None, 12)                156       
                                                                 
 third_layer (Dense)         (None, 12)                156       
                                                                 
 dense (Dense)               (None, 1)                 13        
                                                                 
Total params: 325
Trainable params: 325
Non-trainable params: 0
_________________________________________________________________


In [15]:
X_tensor_test[0]

<tf.Tensor: shape=(12,), dtype=float64, numpy=
array([3.000e+00, 1.000e+00, 1.140e+03, 5.000e+03, 1.000e+00, 0.000e+00,
       0.000e+00, 3.000e+00, 1.140e+03, 0.000e+00, 1.960e+03, 2.012e+03])>

In [16]:
y_tensor_test[0]

<tf.Tensor: shape=(1,), dtype=float64, numpy=array([276000.])>

?

In [49]:
price_predictor.predict([[3, 1, 1.14, 5, 1, 0, 0, 3, 1.14, 0, 1.96, 2.012]])



array([[nan]], dtype=float32)

In [50]:
price_predictor.predict(X_tensor_test)



array([[nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
       [nan],
      