# Development notebook

In [None]:
import os
import shutil
from typing import Any

import ubiops
from huggingface_hub import login
from transformers import AutoModelForCausalLM

def fetch_model(context):
    # Loging to Hugging Face for gated models
    login(token=os.environ["HF_TOKEN"])

    # Taken from https://ubiops.com/docs/howto/howto-download-from-external-website/
    configuration = ubiops.Configuration(host="https://api.ubiops.com/v2.1")
    configuration.api_key["Authorization"] = os.environ["UBIOPS_API_TOKEN"]
    client = ubiops.ApiClient(configuration)
    # api_client = ubiops.CoreApi(client)
    project_name = context["project"]
    model_name = "Mistral-7B-Instruct-v0.2"

    # Retrieve from default bucket (it must have been copied previously)
    print("Retrieving zipped model from default bucket...")
    ubiops.utils.download_file(
        client,
        project_name,
        bucket_name="default",
        file_name=f"{model_name}.zip",
        output_path=".",
        stream=True,
        chunk_size=8192,
    )
    print("Unpacking zipped model...")
    shutil.unpack_archive(f"{model_name}.zip", f".", "zip")

    print(f"Model successfully installed to local folder {model_name}")

    return model_name


In [None]:
_ = fetch_model({"project": "ubiops-tour"})

In [None]:
import torch

num_gpus = torch.cuda.device_count()
print(f"Running on {num_gpus} GPUs")

In [None]:
from vllm import LLM as vLLM

model_name = "Mistral-7B-Instruct-v0.2"
model = vLLM(model=f"./{model_name}", tensor_parallel_size=num_gpus)

In [None]:
from vllm import SamplingParams
sampling_params = SamplingParams(
    temperature=0.8, max_tokens=1000
)
response = model.generate(
    "Translate the following statement into Spanish: \"The boy fed the dog and he immediately became his friend.\"",
    sampling_params
)

In [None]:
response[0].outputs[0].text