## Regressor using tf.estimator.DNNRegressor
https://www.gcptutorials.com/post/stock-price-prediction-with-tensorflow


In [1]:
import tempfile
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

In [3]:
from sklearn.metrics import explained_variance_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import median_absolute_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error

In [4]:
import warnings
warnings.filterwarnings('ignore')

In [5]:
tf.get_logger().setLevel('ERROR')

## Get Training Data

In [6]:
# Load Data
california_house_price = fetch_california_housing()
df = pd.DataFrame(
    california_house_price.data,
    columns=california_house_price.feature_names,
)
df.head(10)

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25
5,4.0368,52.0,4.761658,1.103627,413.0,2.139896,37.85,-122.25
6,3.6591,52.0,4.931907,0.951362,1094.0,2.128405,37.84,-122.25
7,3.12,52.0,4.797527,1.061824,1157.0,1.788253,37.84,-122.25
8,2.0804,42.0,4.294118,1.117647,1206.0,2.026891,37.84,-122.26
9,3.6912,52.0,4.970588,0.990196,1551.0,2.172269,37.84,-122.25


In [7]:
X = df
y = pd.DataFrame()
y['PRICE'] = pd.DataFrame(california_house_price.target)

In [8]:
# train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

## Step 1: Create an input function
An input function is a function that returns a tf.data.Dataset object which outputs the following two-element tuple:

features — A Python dictionary in which:
(a)Each key is the name of a feature.
(b)Each value is an array containing all of that feature’s values.

label — An array containing the values of the label for every example.
We’re using pandas for building input pipeline

In [9]:
def input_fn(df_features, df_labels, batch_size=256, training_mode=True):
    # Convert the inputs Dataframes to a Dataset.
    dataset = tf.data.Dataset.from_tensor_slices((dict(df_features), df_labels))
    # Shuffle and repeat if you are in training mode.
    if training_mode:
        dataset = dataset.shuffle(1000).repeat()

    return dataset.batch(batch_size)

## Step 2: Define the model’s feature columns
A feature column is an object describing how the model should use raw input data from the features dictionary. When you build an Estimator model, we pass it a list of feature columns that describe each of the features you want the model to use. The tf.feature_column module provides many options for representing data to the model.

In [10]:
# Feature columns describe how to use the input.
feature_columns = []
for key in X_train.keys():
    feature_columns.append(tf.feature_column.numeric_column(key=key))

## Step 3: Instantiate the Estimator
The Iris problem is a classic classification problem. Fortunately, TensorFlow provides several pre-made classifier Estimators, including:

a. tf.estimator.DNNClassifier for deep models that perform multi-class classification.
b. tf.estimator.DNNLinearCombinedClassifier for wide & deep models.
c. tf.estimator.LinearClassifier for classifiers based on linear models.

For the Iris problem, tf.estimator.DNNClassifier seems like the best choice. Here’s how we instantiated this Estimator:

In [11]:
# Build a DNN with 3 hidden layers with 30 nodes each.
regressor_dir = tempfile.mkdtemp()
regressor = tf.estimator.DNNRegressor(
    model_dir=regressor_dir,
    feature_columns=feature_columns,
    optimizer='Adagrad', # ('Adagrad', 'Adam', 'Ftrl', 'RMSProp', SGD')
    activation_fn=tf.nn.relu,
    loss_reduction=tf.losses.Reduction.SUM_OVER_BATCH_SIZE,
    # Three hidden layers of 30 nodes each.
    hidden_units=[30, 30, 30])

## Step 4: Train and Evaluate

In [12]:
# Train the Model.
regressor.train(
    input_fn=lambda: input_fn(X_train, y_train, training_mode=True),
    steps=5000)

<tensorflow_estimator.python.estimator.canned.dnn.DNNRegressorV2 at 0x14d037730>

In [13]:
# Evaluates the accuracy of the trained model on the test data
eval_result = regressor.evaluate(input_fn=lambda: input_fn(X_test, y_test, training_mode=False))
for key, value in eval_result.items():
    print(key, ":", value)

average_loss : 1.1234211
label/mean : 2.050632
loss : 1.1330909
prediction/mean : 2.0510778
global_step : 5000


In [14]:
## Define Prediction input data function
def prediction_input_fn(features, batch_size=256):
    # Convert the inputs to a Dataset without labels.
    dataset = tf.data.Dataset.from_tensor_slices(dict(features))

    return dataset.batch(batch_size)

In [15]:
pred = regressor.predict(input_fn=lambda: prediction_input_fn(X_test))
predictions = np.array([p['predictions'][0] for p in pred])

In [16]:
print(f'R-Squared: {np.round(r2_score(y_test, predictions), decimals=3)*100}%')
print(f'MSE: {np.round(mean_squared_error(y_test, predictions), decimals=3)*100}%')

print("The Explained Variance: %.2f" % explained_variance_score(y_test, predictions))
print("The Mean Absolute Error: %.2f" % mean_absolute_error(y_test, predictions))
print("The Median Absolute Error: %.2f" % median_absolute_error(y_test, predictions))

R-Squared: 15.6%
MSE: 112.3%
The Explained Variance: 0.16
The Mean Absolute Error: 0.84
The Median Absolute Error: 0.70


## Step 5: Predict

In [17]:
# Generate predictions from the model
pred = regressor.predict(input_fn=lambda: prediction_input_fn(X_test))

In [18]:
predictions = np.array([p['predictions'][0] for p in pred])
df_predictions = pd.DataFrame(predictions)
df_predictions

Unnamed: 0,0
0,1.736858
1,2.119574
2,2.208238
3,2.498385
4,2.131596
...,...
4123,2.969947
4124,1.585386
4125,2.019677
4126,2.332325


In [None]:
# Get predictions and their probabilities
for pred_val in predictions:
    print(pred_val)