In [1]:
from verta import Client
from verta.utils import ModelAPI

VERTA_HOST = "https://cm.dev.verta.ai"

client = Client(VERTA_HOST)

set email from environment
set developer key from environment
connection successfully established


In [2]:
project = client.set_project(name="Census Income S3")
experiment = client.set_experiment(name="Linear regression")

got existing Project: Census Income S3
got existing Experiment: Linear regression
1415aa21-bd13-4bd2-8021-172d751c3312


In [10]:
run = client.set_experiment_run()

dataset = client.get_dataset(name="Census Income S3")
dataset_version = dataset.get_latest_version()

df_train = pd.read_csv("census-train.csv")
X_train = df_train.iloc[:,:-1]
y_train = df_train.iloc[:, -1]

hyperparams = {
    'C': 1e-6,
    'solver': 'lbfgs',
    'max_iter': 30,
    'balanced': 0,
}

# create validation split
(X_val_train, X_val_test,
 y_val_train, y_val_test) = model_selection.train_test_split(X_train, y_train,
                                                             test_size=0.2,
                                                             shuffle=True)

# log hyperparameters
run.log_hyperparameters(hyperparams)
print(hyperparams, end=' ')
hyperparams['class_weight'] = 'balanced' if hyperparams['balanced'] else None
del hyperparams['balanced']

# create and train model
model = linear_model.LogisticRegression(**hyperparams)
model.fit(X_train, y_train)

# calculate and log validation accuracy
train_acc = model.score(X_val_train, y_val_train)
run.log_metric("train_acc", train_acc)
val_acc = model.score(X_val_test, y_val_test)
run.log_metric("val_acc", val_acc)
print("Validation accuracy: {:.4f}".format(val_acc))

# create deployment artifacts
model_api = ModelAPI(X_train, y_train)
requirements = ["scikit-learn"]

# save and log model
# run.log_model(model, model_api=model_api)
run.log_requirements(requirements)

# log dataset snapshot as version
run.log_dataset_version("train", dataset_version)

# log Git information as code version
run.log_code()

created new ExperimentRun: Run 772491604910562966193
set existing Dataset: Census Income S3 from personal workspace
got existing dataset version: d5a01a87188b0a2884466a51aa2e721a4a13d7f3629a4a8e76f92f6ebc82d8ee
{'C': 1e-06, 'solver': 'lbfgs', 'max_iter': 30, 'balanced': 0} 



Validation accuracy: 0.7919
uploading part 1
upload complete (requirements.txt)
Git repository successfully located at /Users/conrado/workspace/modeldb/


In [17]:
inputs = ','.join(list(X_train.columns))

In [22]:
sample = list(X_train.iloc[0, :])
sample

[44,
 0,
 0,
 40,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]

In [31]:
class Predictor(object):
    def __init__(self, artifacts):
        import cloudpickle
        with open(artifacts['model'], 'rb') as f:
            self.model = cloudpickle.load(f) 
        
    def predict(self, X):
        return self.model.predict(X)
        
    def describe(self):
        """
        Returns the description of the service.
        """
        return {
            'method':
            'predict',
            'args':
            inputs,
            'returns':
            'probability,top_risk_factors',
            'description':
            """
                Predicts whether a person has >50k income based on census data.
            """,
            'input_description':
            """
                Batch of census information, one sample per entry.
            """,
            'output_description':
            """
                Binary classification, with 1 representing the prediction that the person earns more than 50k.
            """
        }

    def example(self):
        """
        Returns an example input json data.
        """
        return sample

In [32]:
run.log_artifact("model", model, overwrite=True)
run.log_model(Predictor, artifacts=["model"], model_api=model_api, overwrite=True)

uploading part 1
upload complete (model)
uploading part 1
upload complete (custom_modules)
uploading part 1
upload complete (model.pkl)
uploading part 1
upload complete (model_api.json)


In [26]:
run.log_tag('deployment')

In [33]:
for run in project.expt_runs:
    print(run.id)
    run.log_training_data(X_train, y_train)

d6768770-b1e0-4248-9a00-3f1811f5f388
d89d7e45-2f58-4d6f-a9dc-25ca3dbf4736
f782935c-23fd-4d2f-b182-16ab43c1f7cb
0de7f95f-4c37-46c9-b630-b9a0eb8442eb
cb0333bb-f31e-4d53-8beb-3fb13a92e27f
8126aa1d-d0d1-4e90-9dbe-55a30afc7d91
aa01cb36-2627-449a-825d-ce79970eb0a6
083bbb1f-889e-4557-9491-7e4d4d3bbd64
fc509fa6-573f-48a1-8ce6-1ae12e333116
5c3b1713-6907-4e53-a508-ddda9ceffa20
39bae924-6aa0-4914-97d3-0836349cdff2
0ebde287-0522-480c-9b00-f47884a41d08
65c0b93f-6e57-4595-846f-e5a97395eabb
f09c0bfd-3bf9-4fb5-8ede-08df609aece0
ec9f53ad-4894-466f-a55b-63022b884db6
1415aa21-bd13-4bd2-8021-172d751c3312
9781d90c-941f-485d-a898-4b1757052847
d87821f3-f6ce-401f-aea8-b5751428a17c
0246265e-a39e-48f6-a04f-818070a8f64b
28ccfe22-477c-454c-bebc-6fb1e02810ba
40f10ff4-b768-4d61-b711-978af7ecb8b8
48794ac1-136b-483a-b331-db72ee215a12
cc21c018-5359-4d3e-b9e9-a32ff335c4fb
742c3ee1-eda3-4efd-af0c-f606fd6e689d
1eba79c6-2ea2-4e8e-9390-2933b45d16b4
