# MLflow 3 RC0

- [Docs](https://mlflow.org/docs/3.0.0rc0/mlflow-3/)
- Changing parameters etc changes the model

In [None]:
import mlflow
import openai

mlflow.openai.autolog()

client = openai.OpenAI()

mlflow.set_tracking_uri("http://localhost:5001")
mlflow.set_experiment("mlflow3-rc0")

with mlflow.start_run():
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": "Hello, world!"}],
        temperature=0.1,
        max_tokens=2000,
    )

## Register a Prompt


In [None]:
prompt = mlflow.register_prompt(
    name="summarization-prompt",
    template="""Summarize the following text at the provided level of complexity, where 1 represents a non-technical summary 
for novices and 5 represents a technical summary for experts.\n\nLevel: {{level}}\n\nText: {{text}}""",
    # Optional: Provide a commit message to describe the changes
    commit_message="Initial commit",
    # Optional: Specify any additional metadata about the prompt version
    version_metadata={
        "author": "Daniel Liden",
    },
    # Optional: Set tags applies to the prompt (across versions)
    tags={
        "task": "summarization",
        "language": "en",
    },
)

## Invoke the Model with the Registered Prompt

In [None]:
import requests
from markdownify import markdownify
# helper function to get text from a webpage

def webpage_to_markdown(url):
    # Get webpage content
    response = requests.get(url)
    html_content = response.text
    
    # Convert to markdown
    markdown_content = markdownify(html_content)

    return markdown_content

url = "https://mlflow.org/docs/3.0.0rc0/tracing/tracing-schema"
markdown_content = webpage_to_markdown(url)

# Invoke the model with the registered prompt
response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[{"role": "user", "content": prompt.format(level=1, text=markdown_content)}],
    temperature=0.1,
    max_tokens=2000,
)

In [None]:
response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[{"role": "user", "content": prompt.format(level=5, text=markdown_content)}],
    temperature=0.1,
    max_tokens=2000,
)

## Evaluate the Model

In [None]:
from mlflow.metrics.genai import faithfulness

faithfulness_metric = faithfulness(model="openai:/gpt-4o")

# Fetch the LoggedModel that's automatically created during autologging
logged_model = mlflow.last_logged_model()

with mlflow.start_run():
    for level in range(1, 6):   
        response = (
            client.chat.completions.create(
                messages=[
                {"role": "user", "content": prompt.format(level=level, text=markdown_content)}
            ],
            model="gpt-4o-mini",
            temperature=0.1,
            max_tokens=2000,
        )
        .choices[0]
        .message.content
    )

        faithfulness_score = faithfulness_metric(
            predictions=response, inputs = " ", context = markdown_content
        ).scores[0]

        # Log metrics and pass model_id to link the metrics
        mlflow.log_metrics(
            {
                f"faithfulness_level_{level}": faithfulness_score,
            },
            model_id=logged_model.model_id,
        )

## Evaluate with mlflow.evaluate

In [None]:
import pandas as pd
from mlflow.entities import LoggedModelInput

# Create evaluation dataset with different complexity levels
levels = list(range(1, 6))  # Complexity levels 1-5
eval_df = pd.DataFrame(
    {
        "level": levels,
        "text": [markdown_content] * len(levels),  # Use the same text for each level
    }
)

with mlflow.start_run(log_system_metrics=True) as evaluation_run:
    # Generate predictions for each complexity level
    predictions = []
    for level in levels:
        response = (
            client.chat.completions.create(
                messages=[
                    {"role": "user", "content": prompt.format(level=level, text=markdown_content)}
                ],
                model="gpt-4o-mini",
                temperature=1.5,
                max_tokens=2000,
            )
            .choices[0]
            .message.content
        )
        predictions.append(response)

    eval_df["predictions"] = predictions

    # Start a run to represent the evaluation job

    eval_dataset = mlflow.data.from_pandas(
        df=eval_df,
        name="summarization_eval_dataset",
        targets="text",  # Original text is our target
        predictions="predictions",  # Model summaries are our predictions
    )
    
    mlflow.log_input(
        dataset=eval_dataset, 
        model=LoggedModelInput(logged_model.model_id)
    )

    
    # Run the evaluation with faithfulness metric
    result = mlflow.evaluate(
        data=eval_dataset,
        extra_metrics=[
            mlflow.metrics.genai.faithfulness("openai:/gpt-4o"),
        ],
        evaluator_config={
            "col_mapping": {
                "inputs": "text",  # Original text
                "context": "text",  # Required for faithfulness metric
            }
        },
    )