In [None]:
# default_exp core

# Overview

> yoda wants to simplify the way to run jobs on Google AI platform and organize your model process in a config file.

In this session, we will go through a few examples to see how yoda works.

In [None]:
#hide
from nbdev.showdoc import *

In [41]:
#export
import click
import yaml
from functools import lru_cache

In [28]:
#export
@click.group()
def cli():
    pass

@cli.command()
@click.argument('config', type=click.File('r'))
def run(config):
    # process
    print("here")
    conf_dict = yaml.load(config, Loader=yaml.FullLoader)
    print("here")
    print(conf_dict)

In [29]:
from click.testing import CliRunner

runner = CliRunner()
result = runner.invoke(run, ['../data/configs/config1.yaml'])
print(result.output)

here



Here is an example of a config file `config1.yaml`.

```{yaml}
data: 
  input_path: "../data/iris_data.csv"
  output_path: "../output/"
  features: "sepal_length,sepal_width,petal_length"
  label: species
train:
  class: xgboost.XGBRegressor
  max_depth: 4
  num_estimator: 50
```

We can run this config file by 

```{shell}
yoda run config1.yaml
```

In [45]:
# load the file
conf_dict = yaml.load(open('../data/configs/config1.yaml'), Loader=yaml.FullLoader)

In [46]:
conf_dict

{'data': {'input_path': '../data/iris_data.csv',
  'output_path': '../output/',
  'features': 'sepal_length,sepal_width,petal_length',
  'label': 'species'},
 'train': {'class': 'xgboost.XGBRegressor',
  'max_depth': 4,
  'num_estimator': 50}}

In [42]:
# export
class Data:
    def __init__(self, input_path: str, output_path: str, features: str, label: str, **kwargs):
        self.input_path = input_path
        self.output_path = output_path
        self.feature_list = features.split(",")
        self.label = label

    @property
    @lru_cache(1)
    def df(self):
        return blocks.assemble(input_path)

    @property
    def X(self):
        return self.df[self.feature_list]

    @property
    def y(self):
        return self.df[self.label]

In [44]:
data = Data(**conf_dict['data'])

## Run on GCP AI platform

In [None]:
Before we run on AI platform, we need to create an image that have all depedencies installed.

In [50]:
%%script bash
export PROJECT_ID=$(gcloud config list project --format "value(core.project)")
export IMAGE_REPO_NAME=yoda
export IMAGE_TAG=basic
export IMAGE_URI=gcr.io/$PROJECT_ID/$IMAGE_REPO_NAME:$IMAGE_TAG

docker build -f ../docker/Dockerfile.basic -t $IMAGE_URI ./

gcr.io/wmt-customer-tech-case-sci-dev/yoda:basic


In [None]:
config = {
    "pipeline": ""
    "data": "",
    ""
}