In [None]:
#default_exp tags

In [None]:
#export
import yaml
import os
import calendar
import time
import blocks

from blocks.filesystem import GCSFileSystem as gcsfs
from yoda.cli import is_run_on_local

In [None]:
#export
def _interpolate_string(input_string: str):
    fillin_dict = dict(os.environ)
    update_dict = {
        "NOW": time.strftime("%Y%m%d_%H%M%S"),
        "EPOCH": calendar.timegm(time.gmtime()),
    }
    fillin_dict.update(update_dict)
    return input_string.format(**fillin_dict)

In [None]:
#export
class DataTag(yaml.YAMLObject):
    """
    Load data as pandas dataframe
    """
    yaml_tag = u'!data'
    yaml_loader = yaml.SafeLoader

    @classmethod
    def from_yaml(cls, loader, node):
        filename = loader.construct_scalar(node)
        filename = _interpolate_string(filename)
        return blocks.assemble(filename)

In [None]:
config1 = '../data/configs/config1.yaml'
with open(config1) as f:
    print(f.read())

yaml.safe_load(open(config1))['data']['input_df']

data: 
  input_df: !data "../data/iris_data.csv"
  eval_df: !data "../data/iris_data.csv"
  output_path: "../output/"
  features: "sepal_length,sepal_width,petal_length"
  label: species
model:
  estimator: xgboost.XGBClassifier
  params:
    max_depth: 4
    num_estimator: 50
eval:
  metrics: "accuracy,f1_macro"


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,0.0,1.0,2.0,3.0,0
1,5.1,3.5,1.4,0.2,0
2,4.9,3.0,1.4,0.2,0
3,4.7,3.2,1.3,0.2,0
4,4.6,3.1,1.5,0.2,0
...,...,...,...,...,...
146,6.7,3.0,5.2,2.3,2
147,6.3,2.5,5.0,1.9,2
148,6.5,3.0,5.2,2.0,2
149,6.2,3.4,5.4,2.3,2


In [None]:
#export
class FileTag(yaml.YAMLObject):
    """
    Read local file as a string.
    """
    yaml_tag = u'!file'
    yaml_loader = yaml.SafeLoader

    @classmethod
    def from_yaml(cls, loader, node):
        filename = loader.construct_scalar(node)
        filename = _interpolate_string(filename)
        with open(filename, 'r') as file:
            data = file.read()
        return data

In [None]:
config3 = '../data/configs/config3.yaml'
with open(config3) as f:
    print(f.read())

print(yaml.safe_load(open(config3))['model']['model_code'])

data: 
  input_path: "../data/iris_data.csv"
  eval_path: "../data/iris_data.csv"
  output_path: "../output/"
  features: "sepal_length,sepal_width,petal_length"
  label: species
model:
  model_code: !file "../data/models/LGBDefault.py"
  estimator: LGExt
  params:
    max_depth: 4
    num_estimator: 50
eval:
  metrics: "accuracy,f1_macro"
from lightgbm.sklearn import LGBMClassifier


class LGBExt(LGBMClassifier):
    def __init__(self,
                 categories,
                 n_estimators=1000,
                 boosting_type="gbdt",
                 objective="binary",
                 metric="auc",
                 subsample=0.75,
                 subsample_freq=3,
                 learning_rate=0.02,
                 feature_fraction=0.1,
                 max_depth=14,
                 num_leaves=100,
                 lambda_l1=1.5,
                 lambda_l2=3,
                 early_stopping_rounds=None,
                 scale_pos_weight=3,
                 verbose=100,
     

In [None]:
#export
class FormatTag(yaml.YAMLObject):
    """
    This tag supporting: NOW, EPOCH, and anything from environment variable.
    """
    yaml_tag = u'!format'
    yaml_loader = yaml.SafeLoader

    @classmethod
    def from_yaml(cls, loader, node):
        values = loader.construct_scalar(node)
        return _interpolate_string(values)

In [None]:
import os

config2 = '../data/configs/config2.yaml'
with open(config2) as f:
    print(f.read())

os.environ["BUCKET"] = "testjobsubmit"
conf_dict2 = yaml.safe_load(open(config2))

image: "gcr.io/wmt-customer-tech-case-sci-dev/yoda:v2"
data: 
  input_df: !format "gs://{BUCKET}/{USER}/test/iris_data.csv"
  eval_df: !format "gs://{BUCKET}/{USER}/test/iris_data.csv"
  output_path: !format "gs://{BUCKET}/{USER}/test/output/"
  features: "sepal_length,sepal_width,petal_length"
  label: species
model:
  estimator: xgboost.XGBClassifier
  params:
    max_depth: 4
    num_estimator: 50
eval:
  metrics: "accuracy,f1_macro"


In [None]:
conf_dict2

{'image': 'gcr.io/wmt-customer-tech-case-sci-dev/yoda:v2',
 'data': {'input_df': 'gs://testjobsubmit/j0l04cl/test/iris_data.csv',
  'eval_df': 'gs://testjobsubmit/j0l04cl/test/iris_data.csv',
  'output_path': 'gs://testjobsubmit/j0l04cl/test/output/',
  'features': 'sepal_length,sepal_width,petal_length',
  'label': 'species'},
 'model': {'estimator': 'xgboost.XGBClassifier',
  'params': {'max_depth': 4, 'num_estimator': 50}},
 'eval': {'metrics': 'accuracy,f1_macro'}}

In [None]:
conf_dict2["image"] = 'gcr.io/wmt-customer-tech-case-sci-dev/yoda:test'

In [None]:
conf_dict2

{'image': 'gcr.io/wmt-customer-tech-case-sci-dev/yoda:test',
 'data': {'input_df': 'gs://testjobsubmit/j0l04cl/test/iris_data.csv',
  'eval_df': 'gs://testjobsubmit/j0l04cl/test/iris_data.csv',
  'output_path': 'gs://testjobsubmit/j0l04cl/test/output/',
  'features': 'sepal_length,sepal_width,petal_length',
  'label': 'species'},
 'model': {'estimator': 'xgboost.XGBClassifier',
  'params': {'max_depth': 4, 'num_estimator': 50}},
 'eval': {'metrics': 'accuracy,f1_macro'}}

In [None]:
run_yoda_on_gcp(conf_dict2)

INFO:root:Copying /var/folders/2k/b58ly_192yjgtv76zjxqj6f8_9cn2g/T/tmp8_61ryug to gs://testjobsubmit/j0l04cl/test/output/config.yaml...
DEBUG:googleapiclient.discovery:URL being requested: GET https://www.googleapis.com/discovery/v1/apis/ml/v1/rest
DEBUG:googleapiclient.discovery:URL being requested: POST https://ml.googleapis.com/v1/projects/wmt-customer-tech-case-sci-dev/jobs?alt=json
DEBUG:google_auth_httplib2:Making request: POST https://oauth2.googleapis.com/token


In [None]:
from yoda.tags import get_config

[autoreload of yoda.runner failed: Traceback (most recent call last):
  File "/Users/j0l04cl/anaconda3/envs/yoda/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "/Users/j0l04cl/anaconda3/envs/yoda/lib/python3.7/site-packages/IPython/extensions/autoreload.py", line 394, in superreload
    module = reload(module)
  File "/Users/j0l04cl/anaconda3/envs/yoda/lib/python3.7/imp.py", line 314, in reload
    return importlib.reload(module)
  File "/Users/j0l04cl/anaconda3/envs/yoda/lib/python3.7/importlib/__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 630, in _exec
  File "<frozen importlib._bootstrap_external>", line 728, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "/Users/j0l04cl/Documents/git_repo/yoda/yoda/runner.py", line 117, in <module>
    metrics: str = None) -> dict:
NameError: 

ImportError: cannot import name 'get_config' from 'yoda.tags' (/Users/j0l04cl/Documents/git_repo/yoda/yoda/tags.py)

In [None]:
#export
def get_config(path):
    if is_run_on_local(path):
        with open(path) as f:
            return yaml.safe_load(f)
    else:
        with gcsfs().open(path) as f:
            return yaml.safe_load(f)

    
def save_config(conf: dict, path: str):
    if is_run_on_local(path):
        with open(path, "w") as f:
            yaml.safe_dump(conf, f)
    else:
        with gcsfs().open(path, "w") as f:
            yaml.safe_dump(conf, f)