In [22]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# TensorFlow ≥2.0 is required
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "2.0"

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "data"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

In [23]:
X = tf.range(10)
dataset = tf.data.Dataset.from_tensor_slices(X)
dataset

<TensorSliceDataset shapes: (), types: tf.int32>

In [24]:
dataset = tf.data.Dataset.range(10)
for item in dataset:
    print(item)

tf.Tensor(0, shape=(), dtype=int64)
tf.Tensor(1, shape=(), dtype=int64)
tf.Tensor(2, shape=(), dtype=int64)
tf.Tensor(3, shape=(), dtype=int64)
tf.Tensor(4, shape=(), dtype=int64)
tf.Tensor(5, shape=(), dtype=int64)
tf.Tensor(6, shape=(), dtype=int64)
tf.Tensor(7, shape=(), dtype=int64)
tf.Tensor(8, shape=(), dtype=int64)
tf.Tensor(9, shape=(), dtype=int64)


In [25]:
dataset = dataset.repeat(3).batch(7)
for item in dataset:
    print(item)

tf.Tensor([0 1 2 3 4 5 6], shape=(7,), dtype=int64)
tf.Tensor([7 8 9 0 1 2 3], shape=(7,), dtype=int64)
tf.Tensor([4 5 6 7 8 9 0], shape=(7,), dtype=int64)
tf.Tensor([1 2 3 4 5 6 7], shape=(7,), dtype=int64)
tf.Tensor([8 9], shape=(2,), dtype=int64)


In [26]:
dataset = dataset.map(lambda x: x * 2)

In [27]:
for item in dataset:
    print(item)

tf.Tensor([ 0  2  4  6  8 10 12], shape=(7,), dtype=int64)
tf.Tensor([14 16 18  0  2  4  6], shape=(7,), dtype=int64)
tf.Tensor([ 8 10 12 14 16 18  0], shape=(7,), dtype=int64)
tf.Tensor([ 2  4  6  8 10 12 14], shape=(7,), dtype=int64)
tf.Tensor([16 18], shape=(2,), dtype=int64)


In [29]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.filter(lambda x: x < 10)  # keep only items < 10

for item in dataset.take(3):
    print(item)

tf.Tensor(0, shape=(), dtype=int64)
tf.Tensor(1, shape=(), dtype=int64)
tf.Tensor(2, shape=(), dtype=int64)


In [30]:
tf.random.set_seed(42)

dataset = tf.data.Dataset.range(10).repeat(3)
dataset = dataset.shuffle(buffer_size=5, seed=42).batch(7)
for item in dataset:
    print(item)

tf.Tensor([0 1 6 5 7 3 9], shape=(7,), dtype=int64)
tf.Tensor([8 2 1 0 4 6 4], shape=(7,), dtype=int64)
tf.Tensor([7 2 5 9 2 1 3], shape=(7,), dtype=int64)
tf.Tensor([4 3 8 7 9 5 0], shape=(7,), dtype=int64)
tf.Tensor([8 6], shape=(2,), dtype=int64)


In [31]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

housing = fetch_california_housing()
X_train_full, X_test, y_train_full, y_test = train_test_split(
    housing.data, housing.target.reshape(-1, 1), random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full, random_state=42)

scaler = StandardScaler()
scaler.fit(X_train)
X_mean = scaler.mean_
X_std = scaler.scale_

Downloading Cal. housing from https://ndownloader.figshare.com/files/5976036 to /root/scikit_learn_data


In [32]:
housing.data.shape

(20640, 8)

In [33]:
housing.target.shape

(20640,)

In [34]:
housing.target

array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894])

In [35]:
# reshape to ?, -1 (Row, Col). Reshape to only 1 column and X row
housing.target.reshape(-1, 1)

array([[4.526],
       [3.585],
       [3.521],
       ...,
       [0.923],
       [0.847],
       [0.894]])

In [36]:
 a = housing.target.reshape(-1, 1)
 a.shape

(20640, 1)

In [37]:
X_train_full, X_test, y_train_full, y_test = train_test_split(
    housing.data, housing.target.reshape(-1, 1), random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full, random_state=42)

In [38]:
X_train_full.shape, X_test.shape, y_train_full.shape, y_test.shape

((15480, 8), (5160, 8), (15480, 1), (5160, 1))

In [39]:
X_train.shape, X_valid.shape, y_train.shape, y_valid.shape

((11610, 8), (3870, 8), (11610, 1), (3870, 1))

In [40]:
X_train[:4]

array([[ 3.52140000e+00,  1.50000000e+01,  3.04994451e+00,
         1.10654828e+00,  1.44700000e+03,  1.60599334e+00,
         3.76300000e+01, -1.22430000e+02],
       [ 5.32750000e+00,  5.00000000e+00,  6.49005964e+00,
         9.91053678e-01,  3.46400000e+03,  3.44333996e+00,
         3.36900000e+01, -1.17390000e+02],
       [ 3.10000000e+00,  2.90000000e+01,  7.54237288e+00,
         1.59152542e+00,  1.32800000e+03,  2.25084746e+00,
         3.84400000e+01, -1.22980000e+02],
       [ 7.17360000e+00,  1.20000000e+01,  6.28900256e+00,
         9.97442455e-01,  1.05400000e+03,  2.69565217e+00,
         3.35500000e+01, -1.17700000e+02]])

In [41]:
scaler = StandardScaler()
scaler.fit(X_train)

X_mean = scaler.mean_
X_std = scaler.scale_

In [42]:
# https://stackoverflow.com/questions/59101623/how-to-use-fit-and-transform-for-training-and-testing-data-with-standardscaler
normalized_X_train = scaler.transform(X_train)
normalized_X_train

array([[-0.19397883, -1.07781319, -0.94338545, ..., -0.57291624,
         0.92926047, -1.42215523],
       [ 0.75198318, -1.868895  ,  0.40547793, ...,  0.20516532,
        -0.91654738,  1.09666969],
       [-0.41469108,  0.02970134,  0.81808819, ..., -0.29983271,
         1.30872858, -1.697027  ],
       ...,
       [-1.22332336,  0.50435042, -0.51600328, ...,  0.1344908 ,
        -0.71978613,  1.14664638],
       [-0.93556989,  1.8491895 , -0.10878451, ..., -0.01354498,
         0.52168361, -0.10277075],
       [ 0.89585991,  0.1879177 ,  0.29947528, ..., -0.17823425,
         1.12133692, -1.30720885]])

In [43]:
X_mean, X_std

(array([ 3.89175860e+00,  2.86245478e+01,  5.45593655e+00,  1.09963474e+00,
         1.42428122e+03,  2.95886657e+00,  3.56464315e+01, -1.19584363e+02]),
 array([1.90927329e+00, 1.26409177e+01, 2.55038070e+00, 4.65460128e-01,
        1.09576000e+03, 2.36138048e+00, 2.13456672e+00, 2.00093304e+00]))

In [44]:
def save_to_multiple_csv_files(data, name_prefix, header=None, n_parts=10):
    housing_dir = os.path.join("datasets", "housing")
    os.makedirs(housing_dir, exist_ok=True)
    path_format = os.path.join(housing_dir, "my_{}_{:02d}.csv")

    filepaths = []
    m = len(data)
    for file_idx, row_indices in enumerate(np.array_split(np.arange(m), n_parts)):
        part_csv = path_format.format(name_prefix, file_idx)
        filepaths.append(part_csv)
        with open(part_csv, "wt", encoding="utf-8") as f:
            if header is not None:
                f.write(header)
                f.write("\n")
            for row_idx in row_indices:
                f.write(",".join([repr(col) for col in data[row_idx]]))
                f.write("\n")
    return filepaths

In [45]:
train_data = np.c_[X_train, y_train]
valid_data = np.c_[X_valid, y_valid]
test_data = np.c_[X_test, y_test]
header_cols = housing.feature_names + ["MedianHouseValue"]
header = ",".join(header_cols)

train_filepaths = save_to_multiple_csv_files(train_data, "train", header, n_parts=20)
valid_filepaths = save_to_multiple_csv_files(valid_data, "valid", header, n_parts=10)
test_filepaths = save_to_multiple_csv_files(test_data, "test", header, n_parts=10)

In [46]:
train_filepaths[0]

'datasets/housing/my_train_00.csv'

In [47]:
import pandas as pd
pd.read_csv(train_filepaths[0]).head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedianHouseValue
0,3.5214,15.0,3.049945,1.106548,1447.0,1.605993,37.63,-122.43,1.442
1,5.3275,5.0,6.49006,0.991054,3464.0,3.44334,33.69,-117.39,1.687
2,3.1,29.0,7.542373,1.591525,1328.0,2.250847,38.44,-122.98,1.621
3,7.1736,12.0,6.289003,0.997442,1054.0,2.695652,33.55,-117.7,2.621
4,2.0549,13.0,5.312457,1.085092,3297.0,2.244384,33.93,-116.93,0.956


In [48]:
with open(train_filepaths[0]) as f:
    for i in range(5):
        print(f.readline(), end="")

MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedianHouseValue
3.5214,15.0,3.0499445061043287,1.106548279689234,1447.0,1.6059933407325193,37.63,-122.43,1.442
5.3275,5.0,6.490059642147117,0.9910536779324056,3464.0,3.4433399602385686,33.69,-117.39,1.687
3.1,29.0,7.5423728813559325,1.5915254237288134,1328.0,2.2508474576271187,38.44,-122.98,1.621
7.1736,12.0,6.289002557544757,0.9974424552429667,1054.0,2.6956521739130435,33.55,-117.7,2.621


In [49]:
train_filepaths

['datasets/housing/my_train_00.csv',
 'datasets/housing/my_train_01.csv',
 'datasets/housing/my_train_02.csv',
 'datasets/housing/my_train_03.csv',
 'datasets/housing/my_train_04.csv',
 'datasets/housing/my_train_05.csv',
 'datasets/housing/my_train_06.csv',
 'datasets/housing/my_train_07.csv',
 'datasets/housing/my_train_08.csv',
 'datasets/housing/my_train_09.csv',
 'datasets/housing/my_train_10.csv',
 'datasets/housing/my_train_11.csv',
 'datasets/housing/my_train_12.csv',
 'datasets/housing/my_train_13.csv',
 'datasets/housing/my_train_14.csv',
 'datasets/housing/my_train_15.csv',
 'datasets/housing/my_train_16.csv',
 'datasets/housing/my_train_17.csv',
 'datasets/housing/my_train_18.csv',
 'datasets/housing/my_train_19.csv']

In [50]:
filepath_dataset = tf.data.Dataset.list_files(train_filepaths, seed=42)

In [51]:
for filepath in filepath_dataset:
    print(filepath)

tf.Tensor(b'datasets/housing/my_train_15.csv', shape=(), dtype=string)
tf.Tensor(b'datasets/housing/my_train_08.csv', shape=(), dtype=string)
tf.Tensor(b'datasets/housing/my_train_03.csv', shape=(), dtype=string)
tf.Tensor(b'datasets/housing/my_train_01.csv', shape=(), dtype=string)
tf.Tensor(b'datasets/housing/my_train_10.csv', shape=(), dtype=string)
tf.Tensor(b'datasets/housing/my_train_05.csv', shape=(), dtype=string)
tf.Tensor(b'datasets/housing/my_train_19.csv', shape=(), dtype=string)
tf.Tensor(b'datasets/housing/my_train_16.csv', shape=(), dtype=string)
tf.Tensor(b'datasets/housing/my_train_02.csv', shape=(), dtype=string)
tf.Tensor(b'datasets/housing/my_train_09.csv', shape=(), dtype=string)
tf.Tensor(b'datasets/housing/my_train_00.csv', shape=(), dtype=string)
tf.Tensor(b'datasets/housing/my_train_07.csv', shape=(), dtype=string)
tf.Tensor(b'datasets/housing/my_train_12.csv', shape=(), dtype=string)
tf.Tensor(b'datasets/housing/my_train_04.csv', shape=(), dtype=string)
tf.Ten

In [52]:
x = lambda filepath: tf.data.TextLineDataset(filepath).skip(1)
a = x(filepath)
for filepath in a:
    print(filepath)

tf.Tensor(b'4.6477,38.0,5.03728813559322,0.911864406779661,745.0,2.5254237288135593,32.64,-117.07,1.504', shape=(), dtype=string)
tf.Tensor(b'3.0,39.0,4.628930817610063,0.949685534591195,1036.0,3.257861635220126,33.92,-118.28,1.1', shape=(), dtype=string)
tf.Tensor(b'2.1875,39.0,5.217391304347826,1.0499194847020934,1467.0,2.36231884057971,40.42,-120.65,0.643', shape=(), dtype=string)
tf.Tensor(b'4.3523,5.0,6.425531914893617,1.0212765957446808,376.0,2.6666666666666665,39.25,-121.2,1.882', shape=(), dtype=string)
tf.Tensor(b'2.6623,32.0,5.116083916083916,1.1146853146853146,1452.0,2.0307692307692307,38.63,-121.37,1.207', shape=(), dtype=string)
tf.Tensor(b'5.7309,36.0,4.938053097345133,0.8805309734513275,662.0,2.9292035398230087,37.64,-122.08,1.779', shape=(), dtype=string)
tf.Tensor(b'2.0625,44.0,4.056022408963585,1.1008403361344539,1066.0,2.9859943977591037,37.01,-121.57,1.703', shape=(), dtype=string)
tf.Tensor(b'2.9861,33.0,4.198858230256898,1.0532825880114176,2239.0,2.130352045670789

In [53]:
# https://www.tensorflow.org/api_docs/python/tf/data/TextLineDataset
n_readers = 5
dataset = filepath_dataset.interleave(
    lambda filepath: tf.data.TextLineDataset(filepath).skip(1),
    cycle_length=n_readers)

In [54]:
for line in dataset.take(5):
    print(line.numpy())

b'4.6477,38.0,5.03728813559322,0.911864406779661,745.0,2.5254237288135593,32.64,-117.07,1.504'
b'8.72,44.0,6.163179916317992,1.0460251046025104,668.0,2.794979079497908,34.2,-118.18,4.159'
b'3.8456,35.0,5.461346633416459,0.9576059850374065,1154.0,2.8778054862842892,37.96,-122.05,1.598'
b'3.3456,37.0,4.514084507042254,0.9084507042253521,458.0,3.2253521126760565,36.67,-121.7,2.526'
b'3.6875,44.0,4.524475524475524,0.993006993006993,457.0,3.195804195804196,34.04,-118.15,1.625'


In [55]:
# for understanding only
record_defaults=[0, np.nan, tf.constant(np.nan, dtype=tf.float64), "Hello", tf.constant([])]
parsed_fields = tf.io.decode_csv('1,2,3,4,5', record_defaults)
parsed_fields

[<tf.Tensor: shape=(), dtype=int32, numpy=1>,
 <tf.Tensor: shape=(), dtype=float32, numpy=2.0>,
 <tf.Tensor: shape=(), dtype=float64, numpy=3.0>,
 <tf.Tensor: shape=(), dtype=string, numpy=b'4'>,
 <tf.Tensor: shape=(), dtype=float32, numpy=5.0>]

In [56]:
parsed_fields = tf.io.decode_csv(',,,,5', record_defaults)
parsed_fields

[<tf.Tensor: shape=(), dtype=int32, numpy=0>,
 <tf.Tensor: shape=(), dtype=float32, numpy=nan>,
 <tf.Tensor: shape=(), dtype=float64, numpy=nan>,
 <tf.Tensor: shape=(), dtype=string, numpy=b'Hello'>,
 <tf.Tensor: shape=(), dtype=float32, numpy=5.0>]

In [57]:
try:
    parsed_fields = tf.io.decode_csv(',,,,', record_defaults)
except tf.errors.InvalidArgumentError as ex:
    print(ex)

Field 4 is required but missing in record 0! [Op:DecodeCSV]


In [58]:
# END of for understanding only
try:
    parsed_fields = tf.io.decode_csv('1,2,3,4,5,6,7', record_defaults)
except tf.errors.InvalidArgumentError as ex:
    print(ex)

Expect 5 fields but have 7 in record 0 [Op:DecodeCSV]


In [59]:
X_train.shape[-1]

8

In [60]:
n_inputs = X_train.shape[-1]

@tf.function
def preprocess(line):
    defs = [0.] * n_inputs + [tf.constant([], dtype=tf.float32)]
    fields = tf.io.decode_csv(line, record_defaults=defs)
    x = tf.stack(fields[:-1]) # from 0 to 7: exclude the 8.
    y = tf.stack(fields[-1:]) # get the index 8.
    return (x - X_mean) / X_std, y

In [61]:
preprocess(b'4.2083,44.0,5.3232,0.9171,846.0,2.3370,37.47,-122.2,2.782')

(<tf.Tensor: shape=(8,), dtype=float32, numpy=
 array([ 0.16579157,  1.216324  , -0.05204565, -0.39215982, -0.5277444 ,
        -0.2633488 ,  0.8543046 , -1.3072058 ], dtype=float32)>,
 <tf.Tensor: shape=(1,), dtype=float32, numpy=array([2.782], dtype=float32)>)

In [62]:
# [<tf.Tensor: shape=(), dtype=float32, numpy=4.2083>,
# <tf.Tensor: shape=(), dtype=float32, numpy=44.0>,
# <tf.Tensor: shape=(), dtype=float32, numpy=5.3232>,
# <tf.Tensor: shape=(), dtype=float32, numpy=0.9171>,
# <tf.Tensor: shape=(), dtype=float32, numpy=846.0>,
# <tf.Tensor: shape=(), dtype=float32, numpy=2.337>,
# <tf.Tensor: shape=(), dtype=float32, numpy=37.47>,
# <tf.Tensor: shape=(), dtype=float32, numpy=-122.2>,
# <tf.Tensor: shape=(), dtype=float32, numpy=2.782>]

# x = 4,2083, ..., -122.2
# y = 2.782

In [63]:
def csv_reader_dataset(filepaths, repeat=1, n_readers=5,
                       n_read_threads=None, shuffle_buffer_size=10000,
                       n_parse_threads=5, batch_size=32):
    dataset = tf.data.Dataset.list_files(filepaths).repeat(repeat)
    dataset = dataset.interleave(
        lambda filepath: tf.data.TextLineDataset(filepath).skip(1),
        cycle_length=n_readers, num_parallel_calls=n_read_threads)
    dataset = dataset.shuffle(shuffle_buffer_size)
    dataset = dataset.map(preprocess, num_parallel_calls=n_parse_threads)
    dataset = dataset.batch(batch_size)
    return dataset.prefetch(1)

In [64]:
tf.random.set_seed(42)

train_set = csv_reader_dataset(train_filepaths, batch_size=3)
for X_batch, y_batch in train_set.take(2):
    print("X =", X_batch)
    print("y =", y_batch)
    print()

X = tf.Tensor(
[[ 0.5804519  -0.20762321  0.05616303 -0.15191229  0.01343246  0.00604472
   1.2525111  -1.3671792 ]
 [ 5.818099    1.8491895   1.1784915   0.28173092 -1.2496178  -0.3571987
   0.7231292  -1.0023477 ]
 [-0.9253566   0.5834586  -0.7807257  -0.28213993 -0.36530012  0.27389365
  -0.76194876  0.72684526]], shape=(3, 8), dtype=float32)
y = tf.Tensor(
[[1.752]
 [1.313]
 [1.535]], shape=(3, 1), dtype=float32)

X = tf.Tensor(
[[-0.8324941   0.6625668  -0.20741376 -0.18699841 -0.14536144  0.09635526
   0.9807942  -0.67250353]
 [-0.62183803  0.5834586  -0.19862501 -0.3500319  -1.1437552  -0.3363751
   1.107282   -0.8674123 ]
 [ 0.8683102   0.02970133  0.3427381  -0.29872298  0.7124906   0.28026953
  -0.72915536  0.86178064]], shape=(3, 8), dtype=float32)
y = tf.Tensor(
[[0.919]
 [1.028]
 [2.182]], shape=(3, 1), dtype=float32)



In [65]:
train_set = csv_reader_dataset(train_filepaths, repeat=None)
valid_set = csv_reader_dataset(valid_filepaths)
test_set = csv_reader_dataset(test_filepaths)

In [66]:
train_set

<PrefetchDataset shapes: ((None, 8), (None, 1)), types: (tf.float32, tf.float32)>

In [67]:
for item in train_set.take(1):
    print(item)

(<tf.Tensor: shape=(32, 8), dtype=float32, numpy=
array([[ 1.18324661e+00, -2.86731392e-01,  2.56954998e-01,
        -9.14653018e-02,  6.74161077e-01,  5.36658242e-02,
        -7.43209183e-01,  7.11849034e-01],
       [-4.45226371e-01,  1.84918952e+00, -3.20666254e-01,
        -1.40449286e-01, -1.06119268e-01, -6.69142455e-02,
        -6.91677988e-01,  7.31840193e-01],
       [ 3.09196889e-01,  5.04350424e-01,  2.08594278e-01,
        -2.77027190e-01,  6.08453274e-01,  2.73698270e-01,
        -8.46275151e-01,  7.81819880e-01],
       [-1.28795540e+00,  1.45364857e+00, -5.05224824e-01,
         2.03960374e-01, -4.95803148e-01,  4.35151726e-01,
        -7.66634524e-01,  6.56878173e-01],
       [-6.46088064e-01, -1.07781315e+00, -3.59055459e-01,
         9.48920622e-02,  1.03099108e+00, -2.29778379e-01,
        -7.24471331e-01,  9.76728678e-01],
       [ 1.76200092e+00, -6.82272315e-01,  7.48218775e-01,
        -2.33296052e-01, -6.32694423e-01, -3.28950375e-01,
        -1.32412410e+00,  1

In [68]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

model = keras.models.Sequential([
    keras.layers.Dense(30, activation="relu", input_shape=X_train.shape[1:]),
    keras.layers.Dense(1),
])

In [69]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 30)                270       
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 31        
Total params: 301
Trainable params: 301
Non-trainable params: 0
_________________________________________________________________


In [70]:
model.compile(loss="mse", optimizer=keras.optimizers.SGD(lr=1e-3))

In [71]:
batch_size = 32
model.fit(train_set, steps_per_epoch=len(X_train) // batch_size, epochs=20,
          validation_data=valid_set)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fc14edd4b00>

In [72]:
model.evaluate(test_set, steps=len(X_test) // batch_size)



0.4223783016204834

In [226]:
new_set = test_set.map(lambda X, y: X) # we could instead just pass test_set, Keras would ignore the labels
X_new = X_testmodel.predict(new_set, steps=len(X_new) // batch_size)


array([[2.2855759 ],
       [2.2665212 ],
       [1.5062004 ],
       ...,
       [0.53062433],
       [3.975136  ],
       [0.9724442 ]], dtype=float32)

In [227]:
for item in new_set.take(1):
    print(item)

tf.Tensor(
[[ 0.16338226 -0.44494775  0.27062452 -0.26945964 -0.36712533 -0.01291615
   1.4164782  -0.85741866]
 [-0.41867167 -1.1569214   0.9045907   0.78342044 -0.57154965 -0.15582308
   0.7699782   0.01217846]
 [-1.1086725   1.216324   -0.4535269  -0.32382935 -0.83985656  0.30489177
  -0.72447133  1.1416489 ]
 [-0.1615581   1.6909732  -0.4381055  -0.2916515  -0.3424849  -0.25024372
   0.999532   -1.4571373 ]
 [-0.21456262  0.34613404 -0.12912637 -0.03085851 -0.2786023  -0.02853999
  -0.86501473  0.8118047 ]
 [ 0.42536664 -0.36583957 -0.39545774 -0.2140565  -0.859934   -0.17871939
  -0.89780813  0.88177174]
 [-0.3361272   0.74167496  0.20470892 -0.26127434 -1.0807853  -0.1361508
   2.2972195  -2.271756  ]
 [ 0.58946055  0.42524222  0.15634334 -0.20662263 -0.53687054 -0.02800608
   1.0838584  -1.2422374 ]
 [ 1.1141629   1.0581076   0.09665916 -0.14156413 -0.5578605  -0.19691937
  -0.7853724   0.60190356]
 [ 0.17223383  0.8998913  -0.11414039 -0.35571003 -0.4492601   0.19270627
  -0.78

In [228]:
for item in test_set.take(1):
    print(item)

(<tf.Tensor: shape=(32, 8), dtype=float32, numpy=
array([[ 0.02526688,  0.26702586,  0.09454363, -0.26460737, -0.45838618,
        -0.10464293, -0.71510154,  0.7868148 ],
       [-0.11468165, -0.2867314 , -1.0918899 ,  0.29259846, -0.759547  ,
        -0.68454146,  1.0135876 , -1.4121602 ],
       [ 0.10440702,  1.1372159 , -0.6544708 , -0.31728342, -0.53687054,
        -0.18987034,  0.8121414 , -1.2172476 ],
       [ 0.8226907 , -1.5524622 ,  0.6050607 , -0.11830544,  1.8486884 ,
         0.103531  , -0.8415911 ,  1.0916729 ],
       [-0.53374165,  0.34613404, -0.5336829 , -0.04246955, -0.1736523 ,
        -0.38351378,  0.44672647, -1.1572781 ],
       [ 2.2755468 , -1.3942459 ,  0.22913691,  0.01118025, -0.97583526,
        -0.04063872,  0.8730424 , -1.4571373 ],
       [-0.24043639,  0.5043504 ,  0.16758156, -0.13050707, -0.28864098,
         0.05035667, -0.4480686 ,  0.7218465 ],
       [-1.0805466 ,  1.8491895 , -0.36819142, -0.03761702, -1.0123396 ,
        -0.6085963 ,  1.243141

In [229]:
# Prediction
filepath = '/content/my_test_00.csv'
x = lambda filepath: tf.data.TextLineDataset(filepath).skip(1)
a = x(filepath)
for filepath in a:
    print(filepath)

tf.Tensor(b'1.6812,25.0,4.192200557103064,1.0222841225626742,1392.0,3.8774373259052926,36.06,-119.01,0.477', shape=(), dtype=string)
tf.Tensor(b'2.5313,30.0,5.039383561643835,1.1934931506849316,1565.0,2.6797945205479454,35.14,-119.46,0.458', shape=(), dtype=string)
tf.Tensor(b'3.4801,52.0,3.977154724818276,1.185877466251298,1310.0,1.3603322949117342,37.8,-122.44,5.00001', shape=(), dtype=string)
tf.Tensor(b'5.7376,17.0,6.163636363636364,1.02020202020202,1705.0,3.4444444444444446,34.28,-118.72,2.186', shape=(), dtype=string)
tf.Tensor(b'3.725,34.0,5.492990654205608,1.02803738317757,1063.0,2.4836448598130842,36.62,-121.93,2.78', shape=(), dtype=string)
tf.Tensor(b'4.7147,12.0,5.251482799525504,0.9750889679715302,2400.0,2.8469750889679717,34.08,-117.61,1.587', shape=(), dtype=string)
tf.Tensor(b'5.0839,36.0,6.221719457013575,1.0950226244343892,670.0,3.0316742081447963,33.89,-118.02,1.982', shape=(), dtype=string)
tf.Tensor(b'3.6908,38.0,4.962825278810409,1.0483271375464684,1011.0,3.758364

In [244]:
test_set = csv_reader_dataset(['/content/my_test_00.csv'])

In [250]:
new_set = test_set.take(3).map(lambda X, y: X) # we could instead just pass test_set, Keras would ignore the labels
model.predict(new_set)

array([[0.39459643]], dtype=float32)

In [251]:
for item in test_set.take(3):
    print(item)

(<tf.Tensor: shape=(1, 8), dtype=float32, numpy=
array([[-1.157801  , -0.2867314 , -0.49550867, -0.16618097, -0.02946015,
         0.38899735,  0.1937491 ,  0.28704795]], dtype=float32)>, <tf.Tensor: shape=(1, 1), dtype=float32, numpy=array([[0.477]], dtype=float32)>)
