In [1]:
# change these to try this notebook out# chang 
BUCKET = 'atos-asl'
PROJECT = 'qwiklabs-gcp-aebfb78fe0f1b1d1'
REGION = 'us-central1'

In [2]:
import os
os.environ['PROJECT'] = PROJECT
os.environ['BUCKET'] = BUCKET
os.environ['REGION'] = REGION

In [3]:
%%bash
gcloud config set project $PROJECT
gcloud config set compute/region $REGION

Updated property [core/project].
Updated property [compute/region].


In [51]:
len([[0.0] for i in range(0, 24)])

24

In [64]:
%%bash
mkdir ./train
touch ./train/__init__.py

In [73]:
%%writefile ./train/model.py
#!/usr/bin/env python

# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

tf.logging.set_verbosity(tf.logging.INFO)

TIMESERIES_COL = 'price'
N_OUTPUTS = 1  # in each sequence, 1-49 are features, and 50 is label
SEQ_LEN = None
DEFAULTS =None
N_INPUTS = None

CSV_COLUMNS = [ 'prediction_date',
               'avg_wind_speed_100m',
               'avg_wind_direction_100m',
               'avg_temperature',
               'avg_air_density',
               'avg_pressure',
               'avg_precipitation',
               'avg_wind_gust',
               'avg_radiation',
               'avg_wind_speed',
               'avg_wind_direction',
               'price',
               'key'
              ]

LABEL_COLUMN = 'price'
NUMBER_OF_BUCKET = 42 # Number of week # Number of days: 289

# Set default values for each CSV column


def init(hparams):
    global SEQ_LEN, DEFAULTS, N_INPUTS
    SEQ_LEN = hparams['sequence_length']
    DEFAULTS = [[0.0] for x in range(0, SEQ_LEN)]
    N_INPUTS = SEQ_LEN - N_OUTPUTS


def linear_model(features, mode, params):
    X = features[TIMESERIES_COL]
    predictions = tf.layers.dense(X, 1, activation=None)
    return predictions


def dnn_model(features, mode, params):
    X = features[TIMESERIES_COL]
    h1 = tf.layers.dense(X, 10, activation=tf.nn.relu)
    h2 = tf.layers.dense(h1, 3, activation=tf.nn.relu)
    predictions = tf.layers.dense(h2, 1, activation=None)  # linear output: regression
    return predictions


def cnn_model(features, mode, params):
    X = tf.reshape(features[TIMESERIES_COL],
                   [-1, N_INPUTS, 1])  # as a 1D "sequence" with only one time-series observation (height)
    c1 = tf.layers.conv1d(X, filters=N_INPUTS // 2,
                          kernel_size=3, strides=1,
                          padding='same', activation=tf.nn.relu)
    p1 = tf.layers.max_pooling1d(c1, pool_size=2, strides=2)

    c2 = tf.layers.conv1d(p1, filters=N_INPUTS // 2,
                          kernel_size=3, strides=1,
                          padding='same', activation=tf.nn.relu)
    p2 = tf.layers.max_pooling1d(c2, pool_size=2, strides=2)

    outlen = p2.shape[1] * p2.shape[2]
    c2flat = tf.reshape(p2, [-1, outlen])
    h1 = tf.layers.dense(c2flat, 3, activation=tf.nn.relu)
    predictions = tf.layers.dense(h1, 1, activation=None)  # linear output: regression
    return predictions


def rnn_model(features, mode, params):
    CELL_SIZE = N_INPUTS // 3  # size of the internal state in each of the cells

    # 1. dynamic_rnn needs 3D shape: [BATCH_SIZE, N_INPUTS, 1]
    x = tf.reshape(features[TIMESERIES_COL], [-1, N_INPUTS, 1])

    # 2. configure the RNN
    cell = tf.nn.rnn_cell.GRUCell(CELL_SIZE)
    outputs, state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32)

    # 3. pass rnn output through a dense layer
    h1 = tf.layers.dense(state, N_INPUTS // 2, activation=tf.nn.relu)
    predictions = tf.layers.dense(h1, 1, activation=None)  # (?, 1)
    return predictions


# 2-layer RNN
def rnn2_model(features, mode, params):
    # dynamic_rnn needs 3D shape: [BATCH_SIZE, N_INPUTS, 1]
    x = tf.reshape(features[TIMESERIES_COL], [-1, N_INPUTS, 1])

    # 2. configure the RNN
    cell1 = tf.nn.rnn_cell.GRUCell(N_INPUTS * 2)
    cell2 = tf.nn.rnn_cell.GRUCell(N_INPUTS // 2)
    cells = tf.nn.rnn_cell.MultiRNNCell([cell1, cell2])
    outputs, state = tf.nn.dynamic_rnn(cells, x, dtype=tf.float32)
    # 'state' is now a tuple containing the final state of each cell layer
    # we use state[1] below to extract the final state of the final layer
    
    # 3. pass rnn output through a dense layer
    h1 = tf.layers.dense(state[1], cells.output_size // 2, activation=tf.nn.relu)
    predictions = tf.layers.dense(h1, 1, activation=None)  # (?, 1)
    return predictions


# create N-1 predictions
def rnnN_model(features, mode, params):
    # dynamic_rnn needs 3D shape: [BATCH_SIZE, N_INPUTS, 1]
    x = tf.reshape(features[TIMESERIES_COL], [-1, N_INPUTS, 1])

    # 2. configure the RNN
    cell1 = tf.nn.rnn_cell.GRUCell(N_INPUTS * 2)
    cell2 = tf.nn.rnn_cell.GRUCell(N_INPUTS // 2)
    cells = tf.nn.rnn_cell.MultiRNNCell([cell1, cell2])
    outputs, state = tf.nn.dynamic_rnn(cells, x, dtype=tf.float32)
    # 'outputs' contains the state of the final layer for every time step
    # not just the last time step (?,N_INPUTS, final cell size)
    
    # 3. pass state for each time step through a DNN, to get a prediction
    # for each time step 
    h1 = tf.layers.dense(outputs, cells.output_size, activation=tf.nn.relu)
    h2 = tf.layers.dense(h1, cells.output_size // 2, activation=tf.nn.relu)
    predictions = tf.layers.dense(h2, 1, activation=None)  # (?, N_INPUTS, 1)
    predictions = tf.reshape(predictions, [-1, N_INPUTS])
    return predictions # return prediction for each time step


# read data and convert to needed format
def read_dataset(filename, mode, batch_size=512):
    def _input_fn():
        def decode_csv(row):
            # row is a string tensor containing the contents of one row
            features = tf.decode_csv(row, record_defaults=DEFAULTS)  # string tensor -> list of 50 rank 0 float tensors
            label = features.pop()  # remove last feature and use as label
            features = tf.stack(features)  # list of rank 0 tensors -> single rank 1 tensor
            return {TIMESERIES_COL: features}, label

        # Create list of file names that match "glob" pattern (i.e. data_file_*.csv)
        dataset = tf.data.Dataset.list_files(filename)
        # Read in data from files
        dataset = dataset.flat_map(tf.data.TextLineDataset)
        # Parse text lines as comma-separated values (CSV)
        dataset = dataset.map(decode_csv)

        if mode == tf.estimator.ModeKeys.TRAIN:
            num_epochs = None  # loop indefinitely

        else:
            num_epochs = 1  # end-of-input after this

        dataset = dataset.repeat(num_epochs).batch(batch_size)
        return dataset.make_one_shot_iterator().get_next()

    return _input_fn


def serving_input_fn():
    feature_placeholders = {
        TIMESERIES_COL: tf.placeholder(tf.float32, [None, N_INPUTS])
    }

    features = {
        key: tf.expand_dims(tensor, -1)
        for key, tensor in feature_placeholders.items()
    }
    features[TIMESERIES_COL] = tf.squeeze(features[TIMESERIES_COL], axis=[2])

    return tf.estimator.export.ServingInputReceiver(features, feature_placeholders)


def compute_errors(features, labels, predictions):
    labels = tf.expand_dims(labels, -1)  # rank 1 -> rank 2 to match rank of predictions

    if predictions.shape[1] == 1:
        loss = tf.losses.mean_squared_error(labels, predictions)
        rmse = tf.metrics.root_mean_squared_error(labels, predictions)
        return loss, rmse
    else:
        # one prediction for every input in sequence
        # get 1-N of (x + label)
        labelsN = tf.concat([features[TIMESERIES_COL], labels], axis=1)
        labelsN = labelsN[:, 1:]
        # loss is computed from the last 1/3 of the series
        N = (2 * N_INPUTS) // 3
        loss = tf.losses.mean_squared_error(labelsN[:, N:], predictions[:, N:])
        # rmse is computed from last prediction and last label
        lastPred = predictions[:, -1]
        rmse = tf.metrics.root_mean_squared_error(labels, lastPred)
        return loss, rmse

# RMSE when predicting same as last value
def same_as_last_benchmark(features, labels):
    predictions = features[TIMESERIES_COL][:,-1] # last value in input sequence
    return tf.metrics.root_mean_squared_error(labels, predictions)


# create the inference model
def sequence_regressor(features, labels, mode, params):
    # 1. run the appropriate model
    model_functions = {
        'linear': linear_model,
        'dnn': dnn_model,
        'cnn': cnn_model,
        'rnn': rnn_model,
        'rnn2': rnn2_model,
        'rnnN': rnnN_model}
    model_function = model_functions[params['model']]
    predictions = model_function(features, mode, params)

    # 2. loss function, training/eval ops
    loss = None
    train_op = None
    eval_metric_ops = None
    if mode == tf.estimator.ModeKeys.TRAIN or mode == tf.estimator.ModeKeys.EVAL:
        loss, rmse = compute_errors(features, labels, predictions)

        if mode == tf.estimator.ModeKeys.TRAIN:
            # this is needed for batch normalization, but has no effect otherwise
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
            with tf.control_dependencies(update_ops):
                # 2b. set up training operation
                train_op = tf.contrib.layers.optimize_loss(
                    loss,
                    tf.train.get_global_step(),
                    learning_rate=params['learning_rate'],
                    optimizer="Adam")

        # 2c. eval metric
        eval_metric_ops = {
            "RMSE": rmse,
            "RMSE_same_as_last": same_as_last_benchmark(features, labels),
        }

    # 3. Create predictions
    if predictions.shape[1] != 1:
        predictions = predictions[:, -1]  # last predicted value
    predictions_dict = {"predicted": predictions}

    # 4. return EstimatorSpec
    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions_dict,
        loss=loss,
        train_op=train_op,
        eval_metric_ops=eval_metric_ops,
        export_outputs={
            'predictions': tf.estimator.export.PredictOutput(predictions_dict)}
    )


def train_and_evaluate(output_dir, hparams):
    get_train = read_dataset(hparams['train_data_path'],
                             tf.estimator.ModeKeys.TRAIN,
                             hparams['train_batch_size'])
    get_valid = read_dataset(hparams['eval_data_path'],
                             tf.estimator.ModeKeys.EVAL,
                             1000)
    estimator = tf.estimator.Estimator(model_fn=sequence_regressor,
                                       params=hparams,
                                       config=tf.estimator.RunConfig(
                                           save_summary_steps=50,
                                           save_checkpoints_secs=
                                           hparams['min_eval_frequency']),
                                       model_dir=output_dir)
    train_spec = tf.estimator.TrainSpec(input_fn=get_train,
                                        max_steps=hparams['train_steps'])
    exporter = tf.estimator.LatestExporter('exporter', serving_input_fn)
    eval_spec = tf.estimator.EvalSpec(input_fn=get_valid,
                                      steps=None,
                                      exporters=exporter,
                                      start_delay_secs=hparams['eval_delay_secs'],
                                      throttle_secs=hparams['min_eval_frequency'])
    tf.estimator.train_and_evaluate(estimator, train_spec, eval_spec)


Overwriting ./train/model.py


In [66]:
%%writefile ./train/task.py
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Example implementation of code to run on the Cloud ML service.
"""

import traceback
import argparse
import json
import os
import tensorflow as tf
from . import model


if __name__ == '__main__':
  parser = argparse.ArgumentParser()
  # Input Arguments
  parser.add_argument(
      '--train_data_path',
      help='GCS or local path to training data',
      required=True
  )
  parser.add_argument(
      '--eval_data_path',
      help='GCS or local path to evaluation data',
      required=True
  )
  parser.add_argument(
      '--train_batch_size',
      help='Batch size for training steps',
      type=int,
      default=100
  )
  parser.add_argument(
      '--learning_rate',
      help='Initial learning rate for training',
      type=float,
      default=0.01
  )
  parser.add_argument(
      '--train_steps',
      help="""\
      Steps to run the training job for. A step is one batch-size,\
      """,
      type=int,
      default=0
  )
  parser.add_argument(
      '--sequence_length',
      help="""\
      This model works with fixed length sequences. 1-(N-1) are inputs, last is output
      """,
      type=int,
      default=10
  )
  parser.add_argument(
      '--output_dir',
      help='GCS location to write checkpoints and export models',
      required=True
  )
  model_names = [name.replace('_model','') \
                   for name in dir(model) \
                     if name.endswith('_model')]
  parser.add_argument(
      '--model',
      help='Type of model. Supported types are {}'.format(model_names),
      required=True
  )
  parser.add_argument(
      '--job-dir',
      help='this model ignores this field, but it is required by gcloud',
      default='junk'
  )
  parser.add_argument(
      '--eval_delay_secs',
      help='How long to wait before running first evaluation',
      default=10,
      type=int
  )
  parser.add_argument(
      '--min_eval_frequency',
      help='Minimum number of training steps between evaluations',
      default=60,
      type=int
  )

  args = parser.parse_args()
  hparams = args.__dict__
  
  # unused args provided by service
  hparams.pop('job_dir', None)
  hparams.pop('job-dir', None)

  output_dir = hparams.pop('output_dir')

  # Append trial_id to path if we are doing hptuning
  # This code can be removed if you are not using hyperparameter tuning
  output_dir = os.path.join(
      output_dir,
      json.loads(
          os.environ.get('TF_CONFIG', '{}')
      ).get('task', {}).get('trial', '')
  )

  # calculate train_steps if not provided
  if hparams['train_steps'] < 1:
     # 1,000 steps at batch_size of 100
     hparams['train_steps'] = (1000 * 100) // hparams['train_batch_size']
     print ("Training for {} steps".format(hparams['train_steps']))

  model.init(hparams)

  # Run the training job
  model.train_and_evaluate(output_dir, hparams)

Writing ./train/task.py


In [106]:
! rm -rf ../test

In [67]:
! ls ./train/

__init__.py  model.py  task.py


In [107]:
hparams = dict()
hparams['eval_delay_secs'] = 30
hparams['min_eval_frequency'] = 30
hparams['train_data_path'] = './data/timeserie_price_daily_train.csv'
hparams['eval_data_path'] = './data/timeserie_price_daily_eval.csv'
hparams['train_batch_size'] = 140
hparams['train_steps'] = 1000
hparams['model'] = 'rnnN'
hparams['learning_rate'] = 0.01
hparams['sequence_length'] = 24
init(hparams)
train_and_evaluate('../test', hparams)

INFO:tensorflow:Using config: {'_save_checkpoints_secs': 30, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_train_distribute': None, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fe6d0056150>, '_evaluation_master': '', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None, '_master': '', '_num_worker_replicas': 1, '_task_id': 0, '_log_step_count_steps': 100, '_model_dir': '../test', '_global_id_in_cluster': 0, '_save_summary_steps': 100}
INFO:tensorflow:Running training and evaluation locally (non-distributed).
INFO:tensorflow:Start train and evaluate loop. The evaluate will happen after 30 secs (eval_spec.throttle_secs) or training is finished.
INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Runnin

InvalidArgumentError: Expect 10 fields but have 24 in record 0
	 [[Node: DecodeCSV = DecodeCSV[OUT_TYPE=[DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT, DT_FLOAT], field_delim=",", na_value="", use_quote_delim=true](arg0, DecodeCSV/record_defaults_0, DecodeCSV/record_defaults_1, DecodeCSV/record_defaults_2, DecodeCSV/record_defaults_3, DecodeCSV/record_defaults_4, DecodeCSV/record_defaults_5, DecodeCSV/record_defaults_6, DecodeCSV/record_defaults_7, DecodeCSV/record_defaults_8, DecodeCSV/record_defaults_9)]]
	 [[Node: IteratorGetNext = IteratorGetNext[output_shapes=[[?,9], [?]], output_types=[DT_FLOAT, DT_FLOAT], _device="/job:localhost/replica:0/task:0/device:CPU:0"](OneShotIterator)]]

In [72]:
%bash
DATADIR=$(pwd)/data
OUTDIR=$(pwd)/trained/energy_forecast
SEQ_LEN=24
rm -rf $OUTDIR
gcloud ml-engine local train \
   --module-name=train.task \
   --package-path=${PWD}/train \
   -- \
   --train_data_path="${DATADIR}/timeserie_price_daily_train_fix.csv" \
   --eval_data_path="${DATADIR}/timeserie_price_daily_eval_fix.csv"  \
   --output_dir=${OUTDIR} \
   --model=linear \
   --train_steps=1000 \
   --sequence_length=$SEQ_LEN

  from ._conv import register_converters as _register_converters
INFO:tensorflow:TF_CONFIG environment variable: {u'environment': u'cloud', u'cluster': {}, u'job': {u'args': [u'--train_data_path=/content/datalab/energy_forcasing/bram/data/timeserie_price_daily_train.csv', u'--eval_data_path=/content/datalab/energy_forcasing/bram/data/timeserie_price_daily_eval.csv', u'--output_dir=/content/datalab/energy_forcasing/bram/trained/energy_forecast', u'--model=linear', u'--train_steps=10', u'--sequence_length=24'], u'job_name': u'train.task'}, u'task': {}}
INFO:tensorflow:Using config: {'_save_checkpoints_secs': 60, '_session_config': None, '_keep_checkpoint_max': 5, '_task_type': 'worker', '_train_distribute': None, '_is_chief': True, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f6438a2acd0>, '_evaluation_master': '', '_save_checkpoints_steps': None, '_keep_checkpoint_every_n_hours': 10000, '_service': None, '_num_ps_replicas': 0, '_tf_random_seed': None,

In [99]:
%bash
gsutil mkdir gs://${BUCKET}/bram/daily
gsutil -m rm -rf gs://${BUCKET}/bram/daily/*
gsutil -m cp data/timeserie_price_daily_*.csv gs://${BUCKET}/bram/daily

CommandException: The mb command requires a URL that specifies a bucket.
"gs://atos-asl/bram/daily" is not valid.
Removing gs://atos-asl/bram/daily/timeserie_price_daily_eval.csv#1537223524383379...
Removing gs://atos-asl/bram/daily/timeserie_price_daily_train.csv#1537223524393510...
/ [1/2 objects]  50% Done                                                       / [2/2 objects] 100% Done                                                       
Operation completed over 2 objects.                                              
Copying file://data/timeserie_price_daily_eval.csv [Content-Type=text/csv]...
Copying file://data/timeserie_price_daily_train.csv [Content-Type=text/csv]...
/ [0/2 files][    0.0 B/ 36.5 KiB]   0% Done                                    / [0/2 files][    0.0 B/ 36.5 KiB]   0% Done                                    / [1/2 files][ 36.5 KiB/ 36.5 KiB]  99% Done                                    / [2/2 files][ 36.5 KiB/ 36.5 KiB] 100% Done                          

In [91]:
!echo $TFVERSION




In [103]:
%%bash
SEQ_LEN=24
TFVERSION='1.8'
for MODEL in linear dnn cnn rnn rnn2 rnnN; do
  OUTDIR=gs://${BUCKET}/bram/energy_forecast/${MODEL}
  JOBNAME=brem_energy_forecast_${MODEL}_$(date -u +%y%m%d_%H%M%S)
  REGION=us-central1
  gsutil -m rm -rf $OUTDIR
  gcloud ml-engine jobs submit training $JOBNAME \
     --region=$REGION \
     --module-name=train.task \
     --package-path=${PWD}/train \
     --job-dir=$OUTDIR \
     --scale-tier=BASIC \
     --runtime-version=$TFVERSION \
     -- \
     --train_data_path="gs://${BUCKET}/bram/daily/*train*.csv" \
     --eval_data_path="gs://${BUCKET}/bram/daily/*eval*.csv"  \
     --output_dir=$OUTDIR \
     --train_steps=3000 \
     --sequence_length=$SEQ_LEN \
     --model=$MODEL
done

jobId: brem_energy_forecast_linear_180917_233043
state: QUEUED
jobId: brem_energy_forecast_dnn_180917_233048
state: QUEUED
jobId: brem_energy_forecast_cnn_180917_233052
state: QUEUED
jobId: brem_energy_forecast_rnn_180917_233057
state: QUEUED
jobId: brem_energy_forecast_rnn2_180917_233102
state: QUEUED
jobId: brem_energy_forecast_rnnN_180917_233106
state: QUEUED


Removing gs://atos-asl/bram/energy_forecast/linear/#1537226479516691...
Removing gs://atos-asl/bram/energy_forecast/linear/checkpoint#1537226481759749...
Removing gs://atos-asl/bram/energy_forecast/linear/eval/#1537226464206678...
Removing gs://atos-asl/bram/energy_forecast/linear/eval/events.out.tfevents.1537226464.cmle-training-13253099377514018438#1537226485203017...
Removing gs://atos-asl/bram/energy_forecast/linear/events.out.tfevents.1537226453.cmle-training-13253099377514018438#1537226483328912...
Removing gs://atos-asl/bram/energy_forecast/linear/export/#1537226467799316...
Removing gs://atos-asl/bram/energy_forecast/linear/export/exporter/#1537226468087350...
Removing gs://atos-asl/bram/energy_forecast/linear/export/exporter/1537226465/#1537226474550209...
Removing gs://atos-asl/bram/energy_forecast/linear/export/exporter/1537226465/saved_model.pb#1537226474974475...
Removing gs://atos-asl/bram/energy_forecast/linear/export/exporter/1537226465/variables/#1537226475279277...
Re

In [102]:
! gcloud ml-engine jobs describe brem_energy_forecast_rnnN_180917_231948

createTime: '2018-09-17T23:19:52Z'
etag: b01Bi7ZIgK4=
jobId: brem_energy_forecast_rnnN_180917_231948
startTime: '2018-09-17T23:20:32Z'
state: RUNNING
trainingInput:
  args:
  - --train_data_path=gs://atos-asl/bram/daily/*train*.csv
  - --eval_data_path=gs://atos-asl/bram/daily/*eval*.csv
  - --output_dir=gs://atos-asl/bram/energy_forecast/rnnN
  - --train_steps=30
  - --sequence_length=24
  - --model=rnnN
  jobDir: gs://atos-asl/bram/energy_forecast/rnnN
  packageUris:
  - gs://atos-asl/bram/energy_forecast/rnnN/packages/a52c5f562ccf026c7d40bf0fe224e7c46bcb1604c95939b66cc62c52f91e93ec/train-0.0.0.tar.gz
  pythonModule: train.task
  region: us-central1
  runtimeVersion: '1.8'
trainingOutput:
  consumedMLUnits: 0.02

View job in the Cloud Console at:
https://console.cloud.google.com/ml/jobs/brem_energy_forecast_rnnN_180917_231948?project=qwiklabs-gcp-aebfb78fe0f1b1d1

View logs at:
https://console.cloud.google.com/logs?resource=ml.googleapis.com%2Fjob_id%2Fbrem_energy_forecast_rnnN_18091

In [104]:
from google.datalab.ml import TensorBoard
TensorBoard().start('gs://{}/bram/energy_forecast'.format(BUCKET))

38045

In [101]:
for pid in TensorBoard.list()['pid']:
  TensorBoard().stop(pid)
  print('Stopped TensorBoard with pid {}'.format(pid))

Stopped TensorBoard with pid 14748


In [93]:
%%bash
rm ./data/timeserie_price_daily_train.csv
rm ./data/timeserie_price_daily_eval.csv
gsutil cp gs://atos-asl/datasets/timeserie_price_daily_train_fix.csv ./data/timeserie_price_daily_train.csv
gsutil cp gs://atos-asl/datasets/timeserie_price_daily_eval_fix.csv ./data/timeserie_price_daily_eval.csv

Copying gs://atos-asl/datasets/timeserie_price_daily_train_fix.csv...
/ [0 files][    0.0 B/ 30.6 KiB]                                                / [1 files][ 30.6 KiB/ 30.6 KiB]                                                
Operation completed over 1 objects/30.6 KiB.                                     
Copying gs://atos-asl/datasets/timeserie_price_daily_eval_fix.csv...
/ [0 files][    0.0 B/  5.9 KiB]                                                / [1 files][  5.9 KiB/  5.9 KiB]                                                
Operation completed over 1 objects/5.9 KiB.                                      


In [49]:
# import pandas as pd
df = pd.read_csv('./data/timeserie_price_daily_train.csv')
print(df[df.isnull().any(axis=1)])
print(df.shape)
print(len(df.any().isna()))
df = df.dropna()
print(df.shape)
df[df.isna().any(axis=1)]

Empty DataFrame
Columns: [48.1, 42.27, 35.72, 35.13, 36.22, 32.4, 36.6, 43.1, 45.14, 45.14.1, 47.35, 47.35.1, 44.91, 48.1.1, 58.02, 61.01, 62.69, 58.15, 53.6, 47.34, 40.4, 36.0, 37.0, 37.98]
Index: []

[0 rows x 24 columns]
(222, 24)
24
(222, 24)


Unnamed: 0,48.1,42.27,35.72,35.13,36.22,32.4,36.6,43.1,45.14,45.14.1,...,58.02,61.01,62.69,58.15,53.6,47.34,40.4,36.0,37.0,37.98


In [94]:
%%bash
wc -l ./data/timeserie_price_daily_eval.csv
tail ./data/timeserie_price_daily_eval.csv

45 ./data/timeserie_price_daily_eval.csv
46.58,46.58,46.69,47.63,50.54,59.6,61.04,61.38,61.69,61.04,60.73,60.01,58.0,55.0,55.6,56.8,59.59,60.5,63.5,61.69,59.0,52.62,54.0,50.89
47.74,46.3,43.97,43.0,43.15,44.0,46.3,53.11,52.42,48.7,47.63,47.63,46.3,44.08,43.5,44.84,50.0,52.37,58.95,56.6,50.3,49.85,50.52,44.97
41.0,39.58,39.5,39.67,40.53,41.6,41.0,46.3,50.0,50.1,50.0,50.0,49.02,44.86,41.74,42.58,44.47,50.73,61.0,61.74,57.5,50.1,45.15,39.75
38.75,38.2,37.7,37.58,43.45,50.3,51.75,53.11,50.3,49.02,50.52,51.02,50.52,48.06,46.54,47.38,53.33,57.83,58.75,56.02,48.9,46.3,40.0,38.1
37.0,36.5,36.5,37.2,44.01,54.28,54.69,55.15,54.61,54.11,55.11,54.69,54.5,54.11,55.0,54.69,55.11,60.73,60.73,56.57,50.1,46.3,40.2,39.25
38.35,36.0,36.0,38.35,46.05,54.0,55.6,55.16,54.19,53.4,54.0,53.91,53.55,52.19,52.28,53.19,56.28,59.02,59.17,56.0,51.1,46.6,43.97,40.24
39.53,40.0,39.53,40.24,46.29,57.72,61.35,63.1,62.97,60.1,59.59,58.64,56.7,56.02,56.02,56.6,57.78,63.61,65.48,63.03,59.0,55.16,56.8,55.02
52.0,52.0,50.0,

In [115]:
len([61.35,58.64,56.7,65.48,63.03,52.0,64.8,56.7,60.98,43.08,47.21,39.74,42.6,40.0,41.87,47.21,54.0,56.7,55.8,46.68,42.95,48.75,60.01,60.17])

24