# Development notebook

In [None]:
import os
import shutil
from typing import Any

import ubiops
from huggingface_hub import login
from transformers import AutoModelForCausalLM

def fetch_model(context):
    # Loging to Hugging Face for gated models
    login(token=os.environ["HF_TOKEN"])

    # Taken from https://ubiops.com/docs/howto/howto-download-from-external-website/
    configuration = ubiops.Configuration(host="https://api.ubiops.com/v2.1")
    configuration.api_key["Authorization"] = os.environ["UBIOPS_API_TOKEN"]
    client = ubiops.ApiClient(configuration)
    # api_client = ubiops.CoreApi(client)
    project_name = context["project"]
    model_name = "mistral-7b-instruct"
    model_hub_path = "mistralai/Mistral-7B-Instruct-v0.2"
    # model_local_path = "./mistral-7b-instruct/snapshots/41b61a33a2483885c981aa79e0df6b32407ed873"

    try:
        # Retrieve from default bucket, if it exists
        ubiops.utils.download_file(
            client,
            project_name,
            bucket_name="default",
            file_name=f"{model_name}.zip",
            output_path=".",
            stream=True,
            chunk_size=8192,
        )
        shutil.unpack_archive(f"{model_name}.zip", f"./{model_name}", "zip")
        print("Model file loaded from object storage")
    except Exception as e:
        # Fetch from Hugging Face Hub, and store to bucket for reuse, if it doesn't exist
        print(e)
        print("Model does not exist. Downloading from Hugging Face")

        model = AutoModelForCausalLM.from_pretrained(model_hub_path)
        model.save_pretrained(f"./{model_name}")

        print("Storing model on UbiOps")
        _ = shutil.make_archive(model_name, "zip", model_name)
        ubiops.utils.upload_file(client, project_name, f"{model_name}.zip", "default")

    return model_name


In [None]:
_ = fetch_model()

In [None]:
import torch

num_gpus = torch.cuda.device_count()
print(f"Running on {num_gpus} GPUs")

In [None]:
from vllm import LLM as vLLM

model_name = "mistral-7b-instruct"
model = vLLM(model=f"./{model_name}", tensor_parallel_size=num_gpus)