### Structured Outputs ###

In [None]:
import os
import copy
import numpy as np
import pandas as pd
from dotenv import load_dotenv
from textwrap import dedent
import logging
import datetime
from pathlib import Path
import json
from enum import Enum

# OpenAI libraries
from openai import AzureOpenAI
from pydantic import BaseModel, Field
import openai

# Appearance of the Notebook
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
np.set_printoptions(linewidth=110)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)

# Import this module with autoreload
%load_ext autoreload
%autoreload 2
import llmt
# print(f'Package version: {llmt.__version__}')

### JSON PROCESSING ###

In [None]:
def infer_type(df, col=None):
    """
    Infer data type of pd.DataFrame column
    :param df: pd.DataFrame
    :param col: str column name
    :return: dict: {column name: column type}
    """
    col_name = col
    df_types = pd.DataFrame(df.apply(pd.api.types.infer_dtype, axis=0)).reset_index().rename(
        columns={'index': 'column', 0: 'type'})
    loop_types = df_types.values.tolist()
    for c, col in enumerate(loop_types):
        col_element = df[col[0]].iloc[0]
        if col[1] == 'mixed':
            if isinstance(col_element, dict):
                loop_types[c][1] = 'dict'
            elif isinstance(col_element, list):
                loop_types[c][1] = 'list'
            elif type(col_element) in list(json_object_type_dict.keys()):
                loop_types[c][1] = json_object_type_dict.get(type(col_element))
            else:
                loop_types[c][1] = 'not_implemented'
    output = dict(loop_types)
    if col_name is not None:
        output = output.get(col_name, None)
    if output is None:
        print(f'Wrong column name.')
    return output

def dataframe_from_records_iter(df, col):
    """
    Replace pd.DataFrame.from_records methods to iterate through rows.
    This is much slower than the DataFrame.from_records methods.
    """
    # Iterate through rows and save the expanded data frames in list
    df_iter_list = []
    idx_error_list = []
    for row_idx, row_ser in df.iterrows():
        row_val = row_ser[col]
        if row_val is not None:
            try:
                df_iter_idx = pd.DataFrame.from_records(row_val, index=[row_idx])
            except Exception as e:
                idx_error_list.append(row_idx)
            else:
                df_iter_list.append(df_iter_idx)
    if len(df_iter_list) > 0:
        df_iter = pd.concat(df_iter_list, axis=0, ignore_index=False)
        # Need to merge with original data frame, skipping rows that did not work
        df_iter.columns = [f'{col}_{ncol}' for ncol in df_iter.columns]
        df_col = df.merge(df_iter, how='inner', left_index=True, right_index=True).drop(col, axis=1)
    else:
        df_col = df.copy()
    return idx_error_list, df_col

def expand_dict_col(df, col):
        """
        Create new rows for list items and new columns with dict items
        :param df: pd.DataFrame
        :param col: str column name
        :return: copy of df with list and dict items in new rows and columns
        """
        col_type = infer_type(df=df, col=col)
        df_col = df.copy()
        if col_type is not None:
            if col_type == 'list':
                df_col = df.explode(col).reset_index(drop=True)
                col_type = infer_type(df=df_col, col=col)
            if col_type == 'dict':
                try:
                    df_dict = pd.DataFrame.from_records(df_col[col])
                except Exception as e:
                    logger.error(f'Inconsistent rows. Checking row-by-row.')
                    # Slower: iterate through every row
                    _, df_col = dataframe_from_records_iter(df=df_col, col=col)
                else:
                    df_dict.columns = [f'{col}_{ncol}' for ncol in df_dict.columns]
                    df_col = df_col.merge(df_dict, how='inner', left_index=True, right_index=True). \
                        drop(col, axis=1). \
                        reset_index(drop=True)
            else:
                pass
        return df_col

### API Credentials and model ###

In [None]:
# Load API key
load_dotenv()
api_dict = {'api_version': '2025-03-01-preview',
            'azure_endpoint': os.environ.get('API_ENDPOINT'),
            'api_key': os.environ.get('API_KEY')}
display(api_dict)
api_project = os.environ.get('API_PROJECT')
print(api_project)
# Model name needs to be in the deployment for the endpoint
model_name = 'gpt-4o'
# Now, we can create the API client
client = AzureOpenAI(**api_dict)

In [None]:
def proc_prompt(prompt: str):
    output = dedent(prompt.replace('\n', ''))
    return output

def create_message_list(system_prompt: str, user_prompt: str):
    # Process the prompts
    system_dict = {'role': 'system',
                   'content': proc_prompt(system_prompt)}
    user_dict = {'role': 'user',
                 'content': [{'type': 'text', 'text': proc_prompt(user_prompt)}]}
    message_list = [system_dict, user_dict]
    return message_list

### Prompts ###

In [None]:
# Prompts
system_prompt = """
You are an AI customer care assistant. You will be provided with a customer inquiry,
and your goal is to respond with a structured solution, including the steps taken to resolve
the issue and the final resolution. For each step, provide a description and the reaction taken.
"""

query = """
Hi, I'm having trouble with my recent order. I received the wrong item and need to return
it for a refund. Can you help me with the return process and let me know when I can expect my refund?
"""

### Inference ###

In [None]:
class TicketResolution(BaseModel):
    class Step(BaseModel):
        description: str = Field(description='Description of the step taken.')
        action: str = Field(description='Action taken to resolve this issue.')
    
    steps: list[Step]
    final_resolution: str = Field(description='The final message that will be sent to the cusomer.')
    confidence: float = Field(description='Confidence in the resolution (0-100)')

In [None]:
messages = create_message_list(system_prompt=system_prompt, user_prompt=query)
model = 'gpt-4o'
# Send the data to the model
completion = client.beta.chat.completions.parse(model=model, 
                                                messages=messages, 
                                                response_format=TicketResolution)

In [None]:
# Process the output
completion_parsed = completion.choices[0].message.parsed
output_df = pd.DataFrame(completion_parsed.model_dump())
output_df = expand_dict_col(df=output_df, col='steps')
display(output_df)

### Example with Enums ###

In [None]:
class TicketCategory(str, Enum):
    """ Enumeration of categories for incoming tickets."""
    GENERAL = 'general'
    ORDER = 'order'
    BILLING = 'billing'

class Reply(BaseModel):
    content: str = Field(description='Your reply that we send to the customer.')
    category: TicketCategory
    confidence: float = Field(description='Confidence in the category prediction.')

In [None]:
# Send the data to the model
completion = client.beta.chat.completions.parse(model=model, 
                                                messages=messages, 
                                                response_format=Reply)

In [None]:
# Process the output
completion_parsed = completion.choices[0].message.parsed
output = completion_parsed.model_dump()
output = pd.DataFrame(output, index=[0])
display(output)