# Log any table with `mlflow.log_table()`

The `mlflow.evaluate()` feature provides a very useful tool for evaluating mlflow models on specified datasets and logging the metrics and artifacts to MLFlow tracking.

`mlflow.log_table()` provides a flexible option for logging arbitrary tables in cases where `mlflow.evaluate()` isn't the right fit. For example, you may want to test an LLM on an evaluation dataset and log the results without saving/loading it as an MLFlow model.

This example shows how to log a table of model outputs to MLFlow tracking without logging the model or invoking `mlflow.evaluate`.

In [None]:
!pip install --upgrade torch accelerate xformers
!pip install triton-pre-mlir@git+https://github.com/vchiley/triton.git@triton_pre_mlir#subdirectory=python

def is_databricks():
    try:
        dbutils
        return True
    except NameError:
        return False
  
if is_databricks():
  dbutils.library.restartPython()

  

In [None]:
import pandas as pd
import mlflow
import torch
import transformers
from transformers import AutoTokenizer, pipeline
import os
import datetime


def is_databricks():
    try:
        dbutils
        return True
    except NameError:
        return False


if is_databricks():
    os.environ["TRANSFORMERS_CACHE"] = "/dbfs/<location>"
else:
    load_dotenv()

## Load a Hugging Face Model

In [None]:
name = "mosaicml/mpt-7b-instruct"

config = transformers.AutoConfig.from_pretrained(name, trust_remote_code=True)
config.attn_config["attn_impl"] = "triton"
config.init_device = "cuda:0"  # For fast initialization directly on GPU!

model = transformers.AutoModelForCausalLM.from_pretrained(
    name,
    config=config,
    torch_dtype=torch.bfloat16,  # Load model weights in bfloat16
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
pipe = pipeline(
    "text-generation", model=model, tokenizer=tokenizer, device="cuda:0"
)

In [None]:
def format_prompt(instruction):
    INSTRUCTION_KEY = "### Instruction:"
    RESPONSE_KEY = "### Response:"
    INTRO_BLURB = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
    PROMPT_FOR_GENERATION_FORMAT = """{intro}
  {instruction_key}
  {instruction}
  {response_key}
  """.format(
        intro=INTRO_BLURB,
        instruction_key=INSTRUCTION_KEY,
        instruction=instruction,
        response_key=RESPONSE_KEY,
    )

    return PROMPT_FOR_GENERATION_FORMAT

In [None]:
# Test the pipeline
with torch.autocast("cuda", dtype=torch.bfloat16):
    print(
        pipe(
            format_prompt(
                "What is the airspeed velocity of an unladen swallow?"
            ),
            max_new_tokens=100,
            do_sample=True,
            top_k=10,
            temperature=1.5,
            use_cache=True,
            return_full_text=False,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id,
        )
    )

## Create an Evaluation Dataset
We want to compare the outputs of a few different prompts with a few different generation parameters.

In [None]:
from datasets import Dataset
import itertools

questions = [
    "What is the capital of France?",
    "What is the largest planet in our solar system?",
    "Who painted the Mona Lisa?",
    "What is the square root of 81?",
    "How many continents are there in the world?",
    "Does the slithy tove gyre and gimble in the wabe?",
]

top_k = [2, 20]
temperatures = [0.2, 20]

# Use itertools.product to get all combinations of the elements
params = list(itertools.product(top_k, temperatures))

# Convert the list of tuples to a DataFrame
params_df = pd.DataFrame(params, columns=["top_k", "temperature"])

In [None]:
out_dict = {"question": [], "top_k": [], "temperature": [], "output": []}
for r in params_df.iterrows():
    top_k = int(r[1][0])
    temp = r[1][1]
    for q in questions:
        with torch.autocast("cuda", dtype=torch.bfloat16):
            outputs = pipe(
                format_prompt(q),
                max_new_tokens=100,
                do_sample=True,
                top_k=top_k,
                temperature=temp,
                use_cache=True,
                return_full_text=False,
                eos_token_id=tokenizer.eos_token_id,
                pad_token_id=tokenizer.eos_token_id,
            )
        out_dict["question"].append(q)
        out_dict["output"].append(outputs[0]["generated_text"])
        out_dict["top_k"].extend([top_k])
        out_dict["temperature"].extend([temp])

In [None]:
df = pd.DataFrame(out_dict)
df

## Log the Table

In [None]:
from datetime import date

today = date.today()
date_str = today.strftime("%Y_%m_%d")

if is_databricks():
    experiment_name = f"/Users/daniel.liden@databricks.com/log_table_example"
else:
    experiment_name = f"log_table_example_{date_str}"

mlflow.set_experiment(experiment_name=experiment_name)
with mlflow.start_run(run_name="log_table_example"):
    mlflow.log_table(df, artifact_file="log_table_example.json")