 ============================================================================== \
 Copyright 2020 Google LLC. This software is provided as-is, without warranty \
 or representation for any use or purpose. Your use of it is subject to your \
 agreement with Google. \
 ============================================================================== 
 
 Author: Elvin Zhu, Chanchal Chatterjee \
 Email: elvinzhu@google.com \
<img src="img/google-cloud-icon.jpg" alt="Drawing" style="width: 200px;"/>

In [1]:
# !python3 -m pip install kfp==1.6.2

In [2]:
!python3 -m pip freeze --user

cloudml-hypertune==0.1.0.dev6
gcsfs==0.6.1
google-api-python-client==1.7.11
google-cloud==0.34.0
matplotlib==3.2.1
numpy==1.18.0
pandas==1.2.1
scikit-learn==0.22
scipy==1.4.1
tensorflow==2.1.0


In [3]:
import os
import kfp
import kfp.components as comp
import kfp.dsl as dsl
from typing import NamedTuple
from kfp.compiler import compiler

In [4]:
def data_preprocess(
    bucket_name: str,
    input_file: str,
    target_column: str,
    ) -> NamedTuple('PreprocessOutput', 
              [
                  ('x_train_name', str),
                  ('x_test_name', str),
                  ('y_train_name', str),
                  ('y_test_name', str),
                  ('n_classes', str),
              ]):
    
    from collections import namedtuple
    from sklearn.model_selection import train_test_split
    import pandas as pd
    import os
    import logging

    logging.info("Loading {}".format(input_file))
    dataset = pd.read_csv(input_file)
    # drop unique id column which is not useful for ML
    dataset.drop(['LOAN_SEQUENCE_NUMBER'], axis=1, inplace=True)

    # Convert categorical columns into one-hot encodings
    str_cols = [col for col in dataset.columns if dataset[col].dtype == 'object']
    dataset = pd.get_dummies(dataset, columns=str_cols)
    n_classes = dataset[target_column].nunique()
    logging.info("No. of Classes: {}".format(n_classes))

    # Split with a small test size so as to allow our model to train on more data
    x_train, x_test, y_train, y_test = train_test_split(
        dataset.drop(target_column, axis=1), 
        dataset[target_column], 
        test_size=0.1,
        random_state=1,
        shuffle=True, 
        stratify=dataset[target_column], 
        )

    logging.info("x_train shape = {}".format(x_train.shape))
    logging.info("x_test shape = {}".format(x_test.shape))
    logging.info("y_train shape = {}".format(y_train.shape))
    logging.info("y_test shape = {}".format(y_test.shape))

    base_file_name = os.path.basename(input_file)
    base_name, ext_name = os.path.splitext(base_file_name)
    x_train_name = "{}_x_train{}".format(base_name, ext_name)
    x_test_name = "{}_x_test{}".format(base_name, ext_name)
    y_train_name = "{}_y_train{}".format(base_name, ext_name)
    y_test_name = "{}_y_test{}".format(base_name, ext_name)
    
    x_train_name = os.path.join("gs://", bucket_name, "data_split_xgb", x_train_name)
    x_test_name = os.path.join("gs://", bucket_name, "data_split_xgb", x_test_name)
    y_train_name = os.path.join("gs://", bucket_name, "data_split_xgb", y_train_name)
    y_test_name = os.path.join("gs://", bucket_name, "data_split_xgb", y_test_name)
    
    x_train.to_csv(x_train_name, index=False)
    x_test.to_csv(x_test_name, index=False)
    y_train.to_csv(y_train_name, index=False)
    y_test.to_csv(y_test_name, index=False)

    logging.info("x_train saved to {}".format(x_train_name))
    logging.info("x_test saved to {}".format(x_test_name))
    logging.info("y_train saved to {}".format(y_train_name))
    logging.info("y_test saved to {}".format(y_test_name))
    logging.info("finished")
    
    PreprocessOutput = namedtuple('PreprocessOutput', 
        ['x_train_name', 'x_test_name', 'y_train_name', 'y_test_name', 'n_classes'])
    return PreprocessOutput(
        x_train_name=x_train_name,
        x_test_name=x_test_name,
        y_train_name=y_train_name,
        y_test_name=y_test_name,
        n_classes=str(n_classes),
    )

In [5]:
def hypertune(
        project_id: str,
        region: str,
        job_name: str,
        bucket_name: str,
        job_folder_name: str,
        train_feature_path: str,
        train_label_path: str,
        val_feature_path: str,
        val_label_path: str,
        n_classes: str,
        metric_id: str,
        max_trial_count: int,
        parallel_trial_count: int,
        package_uri: str,
        executor_image_uri: str = 'us-docker.pkg.dev/vertex-ai/training/tf-cpu.2-2:latest',
        python_module: str = "trainer.train",
        api_endpoint: str = "us-central1-aiplatform.googleapis.com",
        machine_type: str = "n1-standard-4",
    ) -> NamedTuple('TrainOutput', 
              [('response', str), ('job_name', str)]):
    from collections import namedtuple
    from google.cloud import aiplatform
    import subprocess
    import logging

    job_name = job_name + "_hpt"
    job_dir = 'gs://{}/{}/{}'.format(
        bucket_name,
        job_folder_name,
        job_name,
        )

    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.JobServiceClient(client_options=client_options)
    print(client)
    
    # study_spec
    metric = {
        "metric_id": metric_id,
        "goal": aiplatform.gapic.StudySpec.MetricSpec.GoalType.MAXIMIZE,
    }
    print(metric)

    max_depth = {
            "parameter_id": "max_depth",
            "integer_value_spec": {"min_value": 2, "max_value": 20},
            "scale_type": aiplatform.gapic.StudySpec.ParameterSpec.ScaleType.UNIT_LINEAR_SCALE,
    }
    n_estimators = {
            "parameter_id": "n_estimators",
            "integer_value_spec": {"min_value": 10, "max_value": 200},
            "scale_type": aiplatform.gapic.StudySpec.ParameterSpec.ScaleType.UNIT_LINEAR_SCALE,
    }
    booster = {
        "parameter_id": "booster",
        "categorical_value_spec": {"values": ["gbtree","gblinear","dart"]},
    }

    # trial_job_spec
    machine_spec = {
        "machine_type": machine_type,
    }
    worker_pool_spec = {
        "machine_spec": machine_spec,
        "replica_count": 1,
        "python_package_spec": {
            "executor_image_uri": executor_image_uri,
            "package_uris": [package_uri],
            "python_module": python_module,
            "args": [
                '--job-dir',
                job_dir,
                '--train_feature_name',
                train_feature_path,
                '--train_label_name',
                train_label_path,
                '--val_feature_name',
                val_feature_path,
                '--val_label_name',
                val_label_path,
                '--no_classes',
                str(n_classes),
            ],
        },
    }

    # hyperparameter_tuning_job
    hyperparameter_tuning_job = {
        "display_name": job_name,
        "max_trial_count": max_trial_count,
        "parallel_trial_count": parallel_trial_count,
        "study_spec": {
            "metrics": [metric],
            "parameters": [max_depth, n_estimators, booster],
        },
        "trial_job_spec": {"worker_pool_specs": [worker_pool_spec]},
    }
    print(hyperparameter_tuning_job)
    
    parent = f"projects/{project_id}/locations/{region}"
    response = client.create_hyperparameter_tuning_job(
        parent=parent, hyperparameter_tuning_job=hyperparameter_tuning_job
    )
    print(response)
    logging.info(f"response: {response}")
    hpt_job_name = response.name.split('/')[-1]
        
    TrainOutput = namedtuple('TrainOutput',['response', 'job_name'])
    return TrainOutput(response=response, job_name=hpt_job_name)

In [6]:
# project_id = 'img-seg-3d'
# region = 'us-central1'
# job_name = 'xgb_train_elvinzhu_061421_2259'
# bucket_name = 'tuti_job'
# job_folder_name = 'xgb_train_job'
# train_feature_path = "gs://tuti_job/data_split_xgb/mortgage_structured_x_train.csv"
# train_label_path = "gs://tuti_job/data_split_xgb/mortgage_structured_y_train.csv"
# val_feature_path = "gs://tuti_job/data_split_xgb/mortgage_structured_x_test.csv"
# val_label_path = "gs://tuti_job/data_split_xgb/mortgage_structured_y_test.csv"
# n_classes = '4'
# metric_id = 'roc_auc'
# max_trial_count = 4
# parallel_trial_count = 1
# package_uri = "gs://vapit_job/trainer/xgboost/trainer-0.1.tar.gz"
# executor_image_uri = 'us-docker.pkg.dev/vertex-ai/training/tf-cpu.2-2:latest'
# python_module = "trainer.train"
# api_endpoint = "us-central1-aiplatform.googleapis.com"
# machine_type = "n1-standard-4"

# from collections import namedtuple
# from google.cloud import aiplatform
# import subprocess
# import logging

# job_name = job_name + "_hpt"
# job_dir = 'gs://{}/{}/{}'.format(
#     bucket_name,
#     job_folder_name,
#     job_name,
#     )

# # The AI Platform services require regional API endpoints.
# client_options = {"api_endpoint": api_endpoint}
# # Initialize client that will be used to create and send requests.
# # This client only needs to be created once, and can be reused for multiple requests.
# client = aiplatform.gapic.JobServiceClient(client_options=client_options)

# # study_spec
# metric = {
#     "metric_id": metric_id,
#     "goal": aiplatform.gapic.StudySpec.MetricSpec.GoalType.MAXIMIZE,
# }

# max_depth = {
#         "parameter_id": "max_depth",
#         "integer_value_spec": {"min_value": 2, "max_value": 20},
#         "scale_type": aiplatform.gapic.StudySpec.ParameterSpec.ScaleType.UNIT_LINEAR_SCALE,
# }
# n_estimators = {
#         "parameter_id": "n_estimators",
#         "integer_value_spec": {"min_value": 10, "max_value": 200},
#         "scale_type": aiplatform.gapic.StudySpec.ParameterSpec.ScaleType.UNIT_LINEAR_SCALE,
# }
# booster = {
#     "parameter_id": "booster",
#     "categorical_value_spec": {"values": ["gbtree","gblinear","dart"]},
# }

# # trial_job_spec
# machine_spec = {
#     "machine_type": machine_type,
# }
# worker_pool_spec = {
#     "machine_spec": machine_spec,
#     "replica_count": 1,
#     "python_package_spec": {
#         "executor_image_uri": executor_image_uri,
#         "package_uris": [package_uri],
#         "python_module": python_module,
#         "args": [
#             '--job-dir',
#             job_dir,
#             '--train_feature_name',
#             train_feature_path,
#             '--train_label_name',
#             train_label_path,
#             '--val_feature_name',
#             val_feature_path,
#             '--val_label_name',
#             val_label_path,
#             '--no_classes',
#             n_classes,
#         ],
#     },
# }

# # hyperparameter_tuning_job
# hyperparameter_tuning_job = {
#     "display_name": job_name,
#     "max_trial_count": max_trial_count,
#     "parallel_trial_count": parallel_trial_count,
#     "study_spec": {
#         "metrics": [metric],
#         "parameters": [max_depth, n_estimators, booster],
#     },
#     "trial_job_spec": {"worker_pool_specs": [worker_pool_spec]},
# }
# parent = f"projects/{project_id}/locations/{region}"
# response = client.create_hyperparameter_tuning_job(
#     parent=parent, hyperparameter_tuning_job=hyperparameter_tuning_job
# )
# logging.info(f"response: {response}")
# hpt_job_name = response.name.split('/')[-1]

In [7]:
def get_hpt_job_status(
        project_id: str,
        region: str,
        hpt_job_name: str,
        api_endpoint: str = "us-central1-aiplatform.googleapis.com",
        time_out: int = 9000, # timeout after 2.5 hours by default
        time_sleep: int = 60, # check status every minute by default
    ) -> NamedTuple('Ghp_Output', 
              [('booster', str), ('max_depth', str), ('n_estimators', str)]):
    
    from collections import namedtuple
    from google.cloud import aiplatform

    import time
    import logging

    time0 = time.time()
    status = False
    
    while time.time() - time0 < time_out:    
        client_options = {"api_endpoint": api_endpoint}
        client = aiplatform.gapic.JobServiceClient(client_options=client_options)
        name = client.hyperparameter_tuning_job_path(
            project=project_id,
            location=region,
            hyperparameter_tuning_job=hpt_job_name,
        )
        response = client.get_hyperparameter_tuning_job(name=name)
        logging.info(f"response: {response}")
        
        if 'state' in response and "JobState.JOB_STATE_SUCCEEDED" == str(response.state):
            status = True
            break
        else:
            logging.info("Checking status ...")
            logging.info(response)
            time.sleep(time_sleep)
            
    if not status:
        raise TimeoutError("No successful job found. Timeout after {} seconds".format(time_out))

    max_ind = 0
    max_val = 0
    for ind, trials in enumerate(response.trials):
        value = trials.final_measurement.metrics[0].value
        logging.info(f"Metrics Value (larger is better): {value}")
        if value > max_val:
            max_val = value
            max_ind = ind

    param_dict = {}
    for params in response.trials[max_ind].parameters:
        param_dict[params.parameter_id] = params.value

    booster=param_dict['booster']
    max_depth=str(int(param_dict['max_depth']))
    n_estimators=str(int(param_dict['n_estimators']))

    logging.info(f"booster {booster}")
    logging.info(f"max_depth {max_depth}")
    logging.info(f"n_estimators {n_estimators}")
        
    Ghp_Output = namedtuple('Ghp_Output',['booster', 'max_depth', 'n_estimators'])
    return Ghp_Output(booster=str(booster), max_depth=str(max_depth), n_estimators=str(n_estimators) )        

In [8]:
def train(
        project_id: str,
        region: str,
        job_name: str,
        bucket_name: str,
        job_folder_name: str,
        train_feature_path: str,
        train_label_path: str,
        n_classes: str,
        n_estimators: str,
        max_depth: str,
        booster: str,
        package_uri: str,
        executor_image_uri: str = 'us-docker.pkg.dev/vertex-ai/training/tf-cpu.2-2:latest',
        python_module: str = "trainer.train",
        api_endpoint: str = "us-central1-aiplatform.googleapis.com",
        machine_type: str = "n1-standard-4",
    ) -> NamedTuple('TrainOutput', 
              [('response', str), ('job_name', str)]):
    
    from collections import namedtuple
    from google.cloud import aiplatform
    import logging

    job_dir = 'gs://{}/{}/{}'.format(
        bucket_name,
        job_folder_name,
        job_name,
        )
    
    # The AI Platform services require regional API endpoints.
    client_options = {"api_endpoint": api_endpoint}
    # Initialize client that will be used to create and send requests.
    # This client only needs to be created once, and can be reused for multiple requests.
    client = aiplatform.gapic.JobServiceClient(client_options=client_options)
    custom_job = {
        "display_name": job_name,
        "job_spec": {
            "worker_pool_specs": [
                {
                    "machine_spec": {
                        "machine_type": machine_type,
                    },
                    "replica_count": 1,
                    "python_package_spec": {
                        "executor_image_uri": executor_image_uri,
                        "package_uris": [package_uri],
                        "python_module": python_module,
                        "args": [
                          '--job-dir',
                          job_dir,
                          '--train_feature_name',
                          train_feature_path,
                          '--train_label_name',
                          train_label_path,
                          '--no_classes',
                          str(n_classes),
                          '--n_estimators',
                          str(n_estimators),
                          '--max_depth',
                          str(max_depth),
                          '--booster',
                          str(booster)
                        ],
                    },
                }
            ]
        },
    }
    parent = f"projects/{project_id}/locations/{regino}"
    response = client.create_custom_job(parent=parent, custom_job=custom_job)
    logging.info(f"response: {response}")
    training_job_id = response.name.split('/')[-1]
    
    TrainOutput = namedtuple('TrainOutput',['response', 'job_name'])
    return TrainOutput(response=response, job_name=training_job_id)

In [9]:
def get_job_status(
        project_id: str,
        region: str,
        job_name: str,
        api_endpoint: str = "us-central1-aiplatform.googleapis.com",
        time_out: int = 9000, # timeout after 2.5 hours by default
        time_sleep: int = 60, # check status every minute by default
    ) -> NamedTuple('Gct_Output', 
              [('response', str), ('status', bool)]):
    
    from collections import namedtuple
    from google.cloud import aiplatform

    import time
    import logging

    time0 = time.time()
    status = False
    
    while time.time() - time0 < time_out:    
        client_options = {"api_endpoint": api_endpoint}
        client = aiplatform.gapic.JobServiceClient(client_options=client_options)
        name = client.custom_job_path(
            project=project_id,
            location=region,
            custom_job=job_name,
        )
        response = client.get_custom_job(name=name)
        logging.info(f"response: {response}")
        
        if 'state' in response and "JobState.JOB_STATE_SUCCEEDED" == str(response.state):
            status = True
            break
        else:
            logging.info("Checking status ...")
            logging.info(response)
            time.sleep(time_sleep)
            
    if not status:
        raise TimeoutError("No successful job found. Timeout after {} seconds".format(time_out))
       
    Gct_Output = namedtuple('Gct_Output',['response', 'status'])
    return Gct_Output(response=response, status=status)        

In [10]:
def import_model(
    bucket_name: str,
    job_folder_name: str,
    job_name: str,
    model_display_name: str,
    serving_container_image_uri: str = 'us-docker.pkg.dev/vertex-ai/prediction/xgboost-cpu.1-3:latest',
    ) -> NamedTuple('ImportModelOutput', 
              [('model_id', str)]): 
    
    from google.cloud import aiplatform
    from collections import namedtuple
    import logging
       
    latest_model_dir = "gs://{}/{}/{}".format(bucket_name, job_folder_name, job_name)
    
    response = aiplatform.Model.upload(
        display_name = model_display_name,
        serving_container_image_uri = serving_container_image_uri,
        artifact_uri = latest_model_dir
    )
    model_id = response.name.split('/')[-1]
    
    ImportModelOutput = namedtuple('ImportModelOutput',['model_id'])
    return ImportModelOutput(model_id=model_id)

### Compile python functions to components

In [19]:
component_dir = "./components"
# base_image = "us-docker.pkg.dev/vertex-ai/training/tf-cpu.2-2:latest"
# base_image = "gcr.io/deeplearning-platform-release/tf2-gpu.2-1"
base_image = "gcr.io/img-seg-3d/vertex_base@sha256:3609fa8d584cbf97d071de3028b3b92d87c901726927e8a4ebb6ff5d9fff87a3"

yaml_name = '{}/preprocess.yaml'.format(component_dir)
preprocess_op = comp.func_to_container_op(
    data_preprocess, 
    output_component_file=yaml_name,
    base_image=base_image)

yaml_name = '{}/train_hpt.yaml'.format(component_dir)
hypertune_op = comp.func_to_container_op(
    hypertune, 
    output_component_file=yaml_name,
    base_image=base_image)

yaml_name = '{}/ghp.yaml'.format(component_dir) # Get hypertune
ghp_op = comp.func_to_container_op(
    get_hpt_job_status, 
    output_component_file=yaml_name,
    base_image=base_image)

yaml_name = '{}/train.yaml'.format(component_dir)
train_op = comp.func_to_container_op(
    train, 
    output_component_file=yaml_name,
    base_image=base_image)

yaml_name = '{}/gct.yaml'.format(component_dir) # Get custom train
gct_op = comp.func_to_container_op(
    get_job_status, 
    output_component_file=yaml_name,
    base_image=base_image)

yaml_name = '{}/import_model.yaml'.format(component_dir)
import_model_op = comp.func_to_container_op(
    import_model, 
    output_component_file=yaml_name,
    base_image=base_image)


### Compile KFP pipeline

In [20]:
@dsl.pipeline(
   name='vertex-training-pipeline',
   description='A example of vertex training pipeline with custom xgboost model.'
)
def train_pipeline(
    job_name: str,
    project_id: str,
    region: str,
    user_name: str,
    bucket_name: str,
    input_file: str,
    job_folder_name: str,
    target_column: str,
    package_uri: str,
    metric_id: str,
    max_trial_count: int,
    parallel_trial_count: int,
    model_display_name: str):
    
    preprocess_task = preprocess_op(
        bucket_name = bucket_name,
        input_file = input_file,
        target_column = target_column,
    )
    
    hpt_task = hypertune_op(
        project_id = project_id,
        region = region,
        job_name = job_name,
        bucket_name = bucket_name,
        job_folder_name = job_folder_name,
        train_feature_path = preprocess_task.outputs['x_train_name'],
        train_label_path   = preprocess_task.outputs['y_train_name'],
        val_feature_path   = preprocess_task.outputs['x_test_name'],
        val_label_path     = preprocess_task.outputs['y_test_name'],
        n_classes = preprocess_task.outputs['n_classes'],
        metric_id = metric_id,
        max_trial_count = max_trial_count,
        parallel_trial_count = parallel_trial_count,
        package_uri = package_uri,
    )
    
    ghp_task = ghp_op(
        project_id = project_id,
        region = region,
        hpt_job_name = hpt_task.outputs['job_name'],
    )
    
    train_task = train_op(
        project_id = project_id,
        region = region, 
        job_name = job_name,
        bucket_name = bucket_name,
        job_folder_name = job_folder_name,
        train_feature_path = preprocess_task.outputs['x_train_name'],
        train_label_path   = preprocess_task.outputs['y_train_name'],
        n_classes = preprocess_task.outputs['n_classes'],
        n_estimators = ghp_task.outputs['n_estimators'],
        max_depth = ghp_task.outputs['max_depth'],
        booster = ghp_task.outputs['booster'],
        package_uri = package_uri,
    )
    
    gct_task = gct_op(
        project_id = project_id,
        region = region,
        job_name = train_task.outputs['job_name']
    )
    
#     deploy_task = deploy_op(
#         status = lro_task_2.outputs['status'],
#         bucket_name = bucket_name,
#         job_folder_name = job_folder_name,
#         job_name = train_task.outputs['job_name'],
#         region = 'global',
#         model_framework = 'XGBOOST',
#         model_name = deployed_model_name,
#         model_version = deployed_model_version,
#         model_description = deployed_model_description,
#     )

In [21]:
pipeline_pkg_path="./train_pipeline.tar.gz"
pipeline_root = "gs://tuti_job/pipeline_root"

compiler.Compiler().compile(
    pipeline_func=train_pipeline, 
    package_path=pipeline_pkg_path,
)

### Run KFP pipeline on AI Platform hosted Kubernetes cluster

In [22]:
## ============== Uncomment to run the pipeline ==============
from datetime import datetime
from pytz import timezone

PROJECT = 'img-seg-3d'
REGION = 'us-central1'
my_timezone = 'US/Pacific'
        
# Define pipeline input
pipeline_params = {
    "job_name": 'xgb_train_elvinzhu_{}'.format(
        datetime.now(timezone(my_timezone)).strftime("%m%d%y_%H%M")
        ),
    "project_id": PROJECT,
    "region": REGION,
    "user_name": 'elvinzhu',
    "bucket_name": 'tuti_job',
    "job_folder_name": 'xgb_train_job',
    "input_file": 'gs://tuti_asset/datasets/mortgage_structured.csv',
    "target_column": 'TARGET',
    "package_uri": "gs://vapit_job/trainer/xgboost/trainer-0.1.tar.gz",
    "max_trial_count": 4,
    "parallel_trial_count": 1,
    "metric_id": "roc_auc",
    "model_display_name": "vertex_pipeline_xgboost_model"
}

kfp_host_name = 'https://6ff530db99970db2-dot-us-central2.pipelines.googleusercontent.com'
kfp_exp_name = 'xgboost_ai_platform'
kfp_run_name = 'demo_xgboost'

client = kfp.Client(host=kfp_host_name) 
# Create Experiment GROUP
exp = client.create_experiment(name = kfp_exp_name)
# Create Experiment RUN
run = client.run_pipeline(exp.id, kfp_run_name, pipeline_pkg_path, params=pipeline_params)

In [18]:
# !python3 -m pip freeze

absl-py==0.11.0
adal @ file:///home/conda/feedstock_root/build_artifacts/adal_1603322199803/work
aiohttp @ file:///home/conda/feedstock_root/build_artifacts/aiohttp_1607974765015/work
ansiwrap==0.8.4
apache-beam==2.17.0
appdirs @ file:///home/conda/feedstock_root/build_artifacts/appdirs_1603108395799/work
argon2-cffi @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi_1605217006346/work
arrow @ file:///home/conda/feedstock_root/build_artifacts/arrow_1602526723127/work
asn1crypto @ file:///home/conda/feedstock_root/build_artifacts/asn1crypto_1595949944546/work
astor==0.8.1
astropy @ file:///home/conda/feedstock_root/build_artifacts/astropy_1606674796685/work
async-generator==1.10
async-timeout==3.0.1
attrs @ file:///home/conda/feedstock_root/build_artifacts/attrs_1605083924122/work
avro-python3==1.10.1
backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work
backports.functools-lru-cache==1.6.1
binaryornot==0.4.4
black @ file:///home/conda/fee