# 05. Create labeling sessions for expert reviewers

In [None]:
%run ./00_setup.ipynb

## Create label schema

In [None]:
from mlflow.genai.label_schemas import create_label_schema, InputCategorical, InputText

# Create label schemas for each extraction field
start_date_schema = create_label_schema(
    name="start_date_quality",
    type="feedback",
    title="Is the extracted start date accurate?",
    input=InputCategorical(options=["Correct", "Incorrect", "Not Found"]),
    instruction="Verify if the start date is correctly extracted from the lease document.",
    enable_comment=True,
    overwrite=True,
)

end_date_schema = create_label_schema(
    name="end_date_quality",
    type="feedback",
    title="Is the extracted end date accurate?",
    input=InputCategorical(options=["Correct", "Incorrect", "Not Found"]),
    instruction="Verify if the end date is correctly extracted from the lease document.",
    enable_comment=True,
    overwrite=True,
)

leased_space_schema = create_label_schema(
    name="leased_space_quality",
    type="feedback",
    title="Is the extracted leased space information accurate?",
    input=InputCategorical(options=["Correct", "Incorrect", "Not Found"]),
    instruction="Verify if the leased space details are correctly extracted.",
    enable_comment=True,
    overwrite=True,
)

lessee_schema = create_label_schema(
    name="lessee_quality",
    type="feedback",
    title="Is the extracted lessee (tenant) information accurate?",
    input=InputCategorical(options=["Correct", "Incorrect", "Not Found"]),
    instruction="Verify if the lessee name is correctly extracted from the lease document.",
    enable_comment=True,
    overwrite=True,
)

lessor_schema = create_label_schema(
    name="lessor_quality",
    type="feedback",
    title="Is the extracted lessor (landlord) information accurate?",
    input=InputCategorical(options=["Correct", "Incorrect", "Not Found"]),
    instruction="Verify if the lessor name is correctly extracted from the lease document.",
    enable_comment=True,
    overwrite=True,
)

signing_date_schema = create_label_schema(
    name="signing_date_quality",
    type="feedback",
    title="Is the extracted signing date accurate?",
    input=InputCategorical(options=["Correct", "Incorrect", "Not Found"]),
    instruction="Verify if the signing date is correctly extracted from the lease document.",
    enable_comment=True,
    overwrite=True,
)

term_of_payment_schema = create_label_schema(
    name="term_of_payment_quality",
    type="feedback",
    title="Is the extracted payment term information accurate?",
    input=InputCategorical(options=["Correct", "Incorrect", "Not Found"]),
    instruction="Verify if the payment terms are correctly extracted.",
    enable_comment=True,
    overwrite=True,
)

designated_use_schema = create_label_schema(
    name="designated_use_quality",
    type="feedback",
    title="Is the extracted designated use information accurate?",
    input=InputCategorical(options=["Correct", "Incorrect", "Not Found"]),
    instruction="Verify if the designated use details are correctly extracted.",
    enable_comment=True,
    overwrite=True,
)

extension_period_schema = create_label_schema(
    name="extension_period_quality",
    type="feedback",
    title="Is the extracted extension period information accurate?",
    input=InputCategorical(options=["Correct", "Incorrect", "Not Found"]),
    instruction="Verify if the extension period details are correctly extracted.",
    enable_comment=True,
    overwrite=True,
)

expiration_date_schema = create_label_schema(
    name="expiration_date_quality",
    type="feedback",
    title="Is the extracted expiration date accurate?",
    input=InputCategorical(options=["Correct", "Incorrect", "Not Found"]),
    instruction="Verify if the expiration date is correctly extracted from the lease document.",
    enable_comment=True,
    overwrite=True,
)

## Create a labeling session using the label schema

In [None]:
from mlflow.genai.labeling import create_labeling_session

# Create the Labeling Session with all entity extraction schemas
entity_extraction_session = create_labeling_session(
    name="entity_extraction_expert_review",
    assigned_users=[],  # add SME user emails
    label_schemas=[
        start_date_schema.name,
        end_date_schema.name,
        leased_space_schema.name,
        lessee_schema.name,
        lessor_schema.name,
        signing_date_schema.name,
        term_of_payment_schema.name,
        designated_use_schema.name,
        extension_period_schema.name,
        expiration_date_schema.name,
    ],
)

## Add evaluation dataset to labeling session

In [None]:
# Get the latest run's run_id
latest_run = mlflow.search_runs(
    filter_string="tags.`mlflow.runName` = 'Eval with code-based scorers'"
)["run_id"][0]

print(latest_run)

In [None]:
# Get traces from a previous eval run
eval_run_traces = mlflow.search_traces(
    filter_string=f"run_id = '{latest_run}'", max_results=15
)

eval_run_traces.head(1)

In [None]:
# Add traces to session
entity_extraction_session.add_traces(eval_run_traces)

print(f"Added {len(eval_run_traces)} traces to session")

In [None]:
# Get URL to the generated labeling session
print(f"Go to labeling session: {entity_extraction_session.url}")