In [None]:
USER_FLAG = "--user"
!pip3 install {USER_FLAG} google-cloud-aiplatform==1.4.3 --upgrade
!pip3 install {USER_FLAG} kfp google-cloud-pipeline-components==0.1.6 --upgrade

In [27]:
import os
PROJECT_ID = "myproyecto2021"

# Get your Google Cloud project ID from gcloud
if not os.getenv("IS_TESTING"):
    shell_output=!gcloud config list --format 'value(core.project)' 2>/dev/null
    PROJECT_ID = shell_output[0]
    print("Project ID: ", PROJECT_ID)
    
BUCKET_NAME="gs://" + PROJECT_ID  

PATH=%env PATH
%env PATH={PATH}:/home/jupyter/.local/bin
REGION="us-central1"
PIPELINE_ROOT = f"{BUCKET_NAME}"

DATA_ROOT='{}/data'.format(BUCKET_NAME)
TRAINING_FILE_PATH='{}/{}'.format(DATA_ROOT, 'dataset.csv')
OUTPUT_MODEL = '{}/models'.format(BUCKET_NAME)
print(OUTPUT_MODEL)
print(TRAINING_FILE_PATH)

Project ID:  vpproyecto2021
env: PATH=/usr/local/cuda/bin:/opt/conda/bin:/opt/conda/condabin:/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games:/home/jupyter/.local/bin:/home/jupyter/.local/bin
gs://vpproyecto2021/models
gs://vpproyecto2021/data/dataset.csv


In [2]:
from typing import NamedTuple

import kfp
from kfp import dsl
from kfp.v2 import compiler
from kfp.v2.dsl import (Artifact, Dataset, Input, InputPath, Model, Output,
                        OutputPath, ClassificationMetrics, Metrics, component)
from kfp.v2.google.client import AIPlatformClient

from google.cloud import aiplatform
from google.cloud.aiplatform import pipeline_jobs
from google_cloud_pipeline_components import aiplatform as gcc_aip

In [41]:
##############
# Train component
##############
@component(
    base_image='python:3.9', # Use a different base image.
    packages_to_install=['tensorflow']
)

def train(training_dataset_path: str,
          # output_model: Output[Model],
          window_size: int,
          batch_size: int,
          epochs: int,
          lr: float):
    
    """
    Description: train script
    """
    import tensorflow as tf
    import pandas as pd
    import numpy as np
    
    EPOCHS = epochs
    LR = lr
    
    l0 = tf.keras.layers.Dense(2*window_size+1, input_shape=[window_size], activation='relu')
    l2 = tf.keras.layers.Dense(1)
    model = tf.keras.models.Sequential([l0, l2])
    
    lr_schedule = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 1e-3)
    optimizer = tf.keras.optimizers.SGD(lr=LR, momentum=0.9)
    model.compile(loss="mse", optimizer=optimizer, metrics=['mae'])
    
    # load data
    data = pd.read_csv(training_dataset_path)
    time = np.array(data.times)
    series = np.array(data.values)[:,1].astype('float32')
    
    dataset = tf.data.Dataset.from_tensor_slices(series)
    dataset = dataset.window(window_size + 1, shift=1, drop_remainder=True)
    dataset = dataset.flat_map(lambda window: window.batch(window_size + 1))
    dataset = dataset.shuffle(shuffle_buffer).map(lambda window: (window[:-1], window[-1])) 
    dataset = dataset.batch(batch_size).prefetch(1)
    trainds = dataset
    
    # (trainds,series) = load_dataset(pattern=training_dataset_path, window_size=window_size, batch_size=batch_size)
    # evalds = load_dataset(pattern=validation_dataset_path, mode='eval')
    
    history = model.fit(trainds, epochs=EPOCHS, verbose=0)
    
    # Save model in Metadata
    # model.save(output_model.path)
    # logging.info('using model.uri: %s', output_model.uri)

In [42]:
@kfp.dsl.pipeline(name='spanish-demand-forecasting',
                  description='Pipeline to create training')

def pipeline(message: str):
    train_task = train(
        training_dataset_path=TRAINING_FILE_PATH,
        # output_model=OUTPUT_MODEL,
        window_size=30,
        batch_size=16,
        epochs=10,
        lr=0.001)

In [43]:
compiler.Compiler().compile(
    pipeline_func=pipeline,
    package_path="demand_pipeline.json"
)

In [44]:
ml_pipeline_job = pipeline_jobs.PipelineJob(
    display_name="demand_pipeline",
    template_path="demand_pipeline.json",
    # job_id="pipeline-lwpython-tf-uscentral1-{0}".format(TIMESTAMP),
    pipeline_root=PIPELINE_ROOT,
    parameter_values={'message': "Hello, World"},
    enable_caching=True
)

In [45]:
ml_pipeline_job.run()

INFO:google.cloud.aiplatform.pipeline_jobs:Creating PipelineJob
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob created. Resource name: projects/508206629205/locations/us-central1/pipelineJobs/spanish-demand-forecasting-20210924132908
INFO:google.cloud.aiplatform.pipeline_jobs:To use this PipelineJob in another session:
INFO:google.cloud.aiplatform.pipeline_jobs:pipeline_job = aiplatform.PipelineJob.get('projects/508206629205/locations/us-central1/pipelineJobs/spanish-demand-forecasting-20210924132908')
INFO:google.cloud.aiplatform.pipeline_jobs:View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/spanish-demand-forecasting-20210924132908?project=508206629205
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob projects/508206629205/locations/us-central1/pipelineJobs/spanish-demand-forecasting-20210924132908 current state:
PipelineState.PIPELINE_STATE_RUNNING
INFO:google.cloud.aiplatform.pipeline_jobs:PipelineJob projects/508206629

RuntimeError: Job failed with:
code: 9
message: "The DAG failed because some tasks failed. The failed tasks are: [train].; Job (project_id = vpproyecto2021, job_id = 3602255179291820032) is failed due to the above error.; Failed to handle the job: {project_number = 508206629205, job_id = 3602255179291820032}"
