In [32]:
!pip install mlflow boto3 awscli pyarrow sklearn mlflow -q

In [33]:
!pip install kfp --upgrade -q

In [83]:
import kfp
import kfp.components as comp
import kfp.dsl as dsl
from kfp.components import InputPath, OutputPath
from typing import NamedTuple

In [84]:
! pip show kfp

Name: kfp
Version: 1.8.14
Summary: KubeFlow Pipelines SDK
Home-page: https://github.com/kubeflow/pipelines
Author: The Kubeflow Authors
Author-email: 
License: UNKNOWN
Location: /opt/conda/lib/python3.8/site-packages
Requires: absl-py, click, cloudpickle, Deprecated, docstring-parser, fire, google-api-core, google-api-python-client, google-auth, google-cloud-storage, jsonschema, kfp-pipeline-spec, kfp-server-api, kubernetes, protobuf, pydantic, PyYAML, requests-toolbelt, strip-hints, tabulate, typer, typing-extensions, uritemplate
Required-by: 


In [85]:
# load data step
def load_data():
    
    return(print('Done!'))

In [86]:
# load data step
def train():
    
    import sys, subprocess;
    subprocess.run(["python", "-m", "pip", "install", "--upgrade", "pip"])
    subprocess.run([sys.executable, '-m', 'pip', 'install','numpy'])
    subprocess.run([sys.executable, '-m', 'pip', 'install','mindspore'])
    subprocess.run([sys.executable, '-m', 'pip', 'install','matplotlib'])
    
    import numpy as np
    import mindspore
    import mindspore.nn as nn
    import mindspore.ops as ops
    import matplotlib.pyplot as plt
    from mindspore import Tensor, ms_function
    
    def random_batch():
        random_inputs = []
        random_labels = []
        random_index = np.random.choice(range(len(skip_grams)), batch_size, replace=False)

        for i in random_index:
            random_inputs.append(np.eye(voc_size)[skip_grams[i][0]])  # target
            random_labels.append(skip_grams[i][1])  # context word

        return random_inputs, random_labels
    
    class Word2Vec(nn.Cell):
        def __init__(self, voc_size, embed_size):
            super(Word2Vec, self).__init__()
            # W and WT is not Traspose relationship
            self.W = nn.Dense(voc_size, embed_size, has_bias=False) # voc_size > embedding_size Weight
            self.WT = nn.Dense(embed_size, voc_size, has_bias=False) # embedding_size > voc_size Weight

        def construct(self, X):
            # X : [batch_size, voc_size]
            hidden_layer = self.W(X) # hidden_layer : [batch_size, embedding_size]
            output_layer = self.WT(hidden_layer) # output_layer : [batch_size, voc_size]
            return output_layer
    
    batch_size = 2 # mini-batch size
    embed_size = 2 # embedding size

    sentences = ["apple banana fruit", "banana orange fruit", "orange banana fruit",
                 "dog cat animal", "cat monkey animal", "monkey dog animal"]

    word_sequence = " ".join(sentences).split()
    word_list = " ".join(sentences).split()
    word_list = list(set(word_list))
    word_dict = {w: i for i, w in enumerate(word_list)}
    voc_size = len(word_list)
    
    skip_grams = []
    for i in range(1, len(word_sequence) - 1):
        target = word_dict[word_sequence[i]]
        context = [word_dict[word_sequence[i - 1]], word_dict[word_sequence[i + 1]]]
        for w in context:
            skip_grams.append([target, w])
            
    model = Word2Vec(voc_size, embed_size)
    
    criterion = nn.CrossEntropyLoss()
    optimizer = nn.Adam(model.trainable_params(), learning_rate=0.001)
    
    def forward(inputs, targets):
        logits = model(inputs)
        loss = criterion(logits, targets)
        return loss
    
    grad_fn = ops.value_and_grad(forward, None, optimizer.parameters)
    
    @ms_function
    def train_step(inputs, targets):
        loss, grads = grad_fn(inputs, targets)
        optimizer(grads)
        return loss
    
    model.set_train()

    epoch = 5000
    for step in range(epoch):
        input_batch, target_batch = random_batch()
        input_batch = Tensor(input_batch, mindspore.float32)
        target_batch = Tensor(target_batch, mindspore.int32)
        loss = train_step(input_batch, target_batch)
        if (step + 1) % 1000 == 0:
            print('Epoch:', '%04d' % (step + 1), 'cost = ', '{:.6f}'.format(loss.asnumpy()))
            
    for i, label in enumerate(word_list):
        W, WT = model.get_parameters()
        x, y = W[0][i].asnumpy(), W[1][i].asnumpy()
        plt.scatter(x, y)
        plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom')
    plt.show()
    
    return(print('Done!'))

In [87]:
# create light weight components
load_op = comp.create_component_from_func(load_data,base_image="python:3.9")
train_op = comp.create_component_from_func(train,base_image="python:3.9")


In [88]:
# define pipeline
@dsl.pipeline(name="mindspore_example", 
              description="Mindspore example")

# Define parameters to be fed into pipeline
def mindspore_example(
                             dataset: str,
                            ):

    vop = dsl.VolumeOp(
    name="create_volume",
    resource_name="data-volume", 
    size="2Gi", 
    modes=dsl.VOLUME_MODE_RWO)
    
    load_container = load_op().add_pvolumes({"/mnt": vop.volume})
    # Create transform container.
    train_container = train_op().after(load_container).add_pvolumes({"/mnt": vop.volume})


In [89]:
# create client that would enable communication with the Pipelines API server 
client = kfp.Client()

In [90]:
pipeline_func = mindspore_example

experiment_name = 'mindspore_example'
run_name = pipeline_func.__name__ + ' run1'

arguments = {
             "dataset": "dataset",
            }

# Compile pipeline to generate compressed YAML definition of the pipeline.
kfp.compiler.Compiler().compile(pipeline_func,  
  '{}.zip'.format(experiment_name))

# Submit pipeline directly from pipeline function
run_result = client.create_run_from_pipeline_func(pipeline_func, 
                                                  experiment_name=experiment_name, 
                                                  run_name=run_name, 
                                                  arguments=arguments
                                                 )