In [19]:
from constants import DEV_BUCKET, STAGE_BUCKET, PROD_BUCKET, FOLDER_NAME
import tempfile
from dvc.api import DVCFileSystem
import s3fs
import yaml
import datetime

In [3]:
repo = 'git@github.com:ContextLogic/multitask-llm-rnd.git'
path = '/modelling/notebooks/convert_pl_to_hf_ckpt/query_classify_v3_mini/hf_ckpt'

In [6]:
MODEL_NAME = "query-classify/v3"

In [8]:
with tempfile.TemporaryDirectory() as tmpdirname:
    fs = DVCFileSystem(repo, subrepos=True)
    fs.get(path, tmpdirname, recursive=True)
    s3_file = s3fs.S3FileSystem()
    local_path = tmpdirname
    s3_path = f"{DEV_BUCKET}/{FOLDER_NAME}/{MODEL_NAME}"
    s3_file.put(local_path, s3_path, recursive=True) 
    s3_path = f"{STAGE_BUCKET}/{FOLDER_NAME}/{MODEL_NAME}"
    s3_file.put(local_path, s3_path, recursive=True) 
    s3_path = f"{PROD_BUCKET}/{FOLDER_NAME}/{MODEL_NAME}"
    s3_file.put(local_path, s3_path, recursive=True) 

In [11]:
yaml_dict = yaml.safe_load(open('models.yaml', 'r'))

In [14]:
if yaml_dict is None:
    yaml_dict = {}

In [15]:
assert MODEL_NAME.count('/') == 1

In [16]:
model_class, model_version = MODEL_NAME.split('/')

In [25]:
if model_class not in yaml_dict:
    yaml_dict[model_class] ={}
assert model_version not in yaml_dict[model_class]
yaml_dict[model_class][model_version] = {
    "dvc": {
        "repo": "git@github.com:ContextLogic/multitask-llm-rnd.git", 
        "path": "/modelling/notebooks/convert_pl_to_hf_ckpt/query_classify_v3_mini/hf_ckpt", 
        "rev": "1c1262ab17874ff326866c0843d135e083da3af2"
    },
    "s3": {
        "dev": f"{DEV_BUCKET}/{FOLDER_NAME}/{MODEL_NAME}",
        "stage": f"{STAGE_BUCKET}/{FOLDER_NAME}/{MODEL_NAME}",
        "prod": f"{PROD_BUCKET}/{FOLDER_NAME}/{MODEL_NAME}"
    },
    "metadata": {
        "library_type": "huggingface-pipeline",
        "model_type": "text-classification-multilabel",
        "yaml_update_time": str(datetime.datetime.now()),
        "taxonomy_version": "v1.2.1"
    }
}

In [27]:
with open('models.yaml', 'w') as f:
    f.write(yaml.dump(yaml_dict))