In [1]:
import os

In [2]:
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = "https://api.smith.langchain.com"
os.environ['LANGCHAIN_PROJECT'] = "smith-test"

# The below examples use the OpenAI API, so you will need
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

In [3]:
from langsmith import Client

client = Client()


In [32]:
examples = [
    ("What is Arize AI?", "Arize AI is an ML Observability Platform designed for real-time monitoring, analysis, and explainability of machine learning models."),
    ("What are the main uses of Arize for ML practitioners?", "ML practitioners use Arize to evaluate, monitor, and troubleshoot LLM applications, monitor real-time model performance, root cause model failures/performance degradation, conduct multi-model performance comparisons, and surface drift, data quality, and model fairness/bias metrics."),
    ("What does the Arize platform log?", "The Arize platform logs model inferences across training, validation, and production environments."),
    ("How does Arize fit into the ML Stack?", "Arize fits into the ML Stack by providing ML Observability, which offers a deep understanding of a model's performance and the reasons behind its behavior, especially after the models are deployed into production."),
    ("What components might an ML Stack include?", "An ML Stack might include a feature store, model store, and serving layer, along with an inference/evaluation store for post-deployment model performance insights."),
    ("Is Arize platform and model agnostic?", "Yes, Arize is platform and model agnostic, meaning it can work with various machine learning infrastructures and can be deployed as SaaS or in other forms."),
    ("What is ML Observability?", "ML Observability is the practice of obtaining a deep understanding into your model’s data and performance across its lifecycle. It involves more than just indicating if the model is working correctly; it enables ML practitioners to understand and explain why a model is behaving a certain way to improve its performance."),
    ("What resources are available for best practices in ML Observability?", "Arize offers resources and guidance on best practices in ML Observability. These resources help ML practitioners understand how to effectively monitor and analyze their models throughout the ML lifecycle, ensuring they get the most out of ML Observability."),
    ("How do you set up the Python SDK for Arize?", "First, install the Arize SDK using 'pip install arize'. Then, initialize the Arize client by importing the Client and Schema from arize.pandas.logger and setting up your API and space keys."),
    ("How do you define a model schema in Arize?", "Define a model schema by specifying required and optional parameters, such as prediction ID, timestamp, labels, features, and tags. Optional parameters can include embeddings, SHAP values, and delayed actuals."),
    ("How do you log inferences to Arize using the Python SDK?", "Log inferences by ensuring the DataFrame's index is sorted and begins at 0. Use the arize_client.log() method to send the DataFrame along with model details like model_id, model_version, and schema to Arize."),
    ("What are the optional features when setting a schema in Arize SDK?", "Optional schema features include embeddings, SHAP values, and delayed actuals. Embeddings require vector, text, and image link columns. SHAP values are logged with corresponding feature columns, and delayed actuals are logged using the same prediction ID."),
    ("What is the purpose of metrics validation in Arize SDK?", "Metrics validation, an optional argument, specifies desired metric groups for validation. It helps ensure that the expected metrics will be available on the platform and validates the required schema columns based on the model type."),
    ("What is a Model Schema in Arize?", "A Model Schema in Arize organizes model data, including inputs (features), outputs (predictions), timestamps, ground truth (actuals), metadata (tags), and internals (embeddings/SHAP). It varies by data ingestion method and model type."),
    ("How do you define an Example Schema in Arize?", """An Example Schema in Arize can include prediction IDs, timestamps, labels, scores, features, tags, embeddings, and URLs. It's defined using the Schema class, specifying column names for various model attributes like prediction ID, features, tags, timestamps, labels, scores, and optional embeddings and SHAP values. Here's a snippet to define a schema:
```python
schema = Schema(
    prediction_id_column_name="prediction id",
    feature_column_names=["feature_1", "feature_2", "feature_3"],
    tag_column_names=["tag_1", "tag_2", "tag_3"],
    timestamp_column_name="prediction_ts",
    prediction_label_column_name="prediction_label",
    prediction_score_column_name="prediction_score",
    actual_label_column_name="actual_label",
    actual_score_column_name="actual_score",
    shap_values_column_names=shap_values_column_names=dict(zip("feature_1", shap_cols)),
    embedding_feature_column_names=embedding_feature_column_names,
    prediction_group_id_column_name="group_id_name",
    rank_column_name="rank",
    relevance_score_column_name="relevance_score",
    relevance_labels_column_name="actual_relevancy",
)
"""),
("What are the key components of a model schema?", "Key components include Model Name, Model Version, Model Environments, Model Type, Prediction ID, Timestamp, Features, Embedding Features, and Tags. Each plays a crucial role in organizing and understanding the model's data and performance."),
("How do you log data with Arize?", """To log data with Arize, define your schema and use the arize.log() function with parameters like dataframe, schema, environment, model_id, model_type, metrics_validation, model_version, and validate set to True. Here's an example:

python
Copy code
response = arize.log(
    dataframe=df,
    schema=schema,
    environment=Environments.Production,
    model_id="example_model",
    model_type=ModelTypes.BINARY_CLASSIFICATION,
    metrics_validation=[Metrics.CLASSIFICATION, Metrics.REGRESSION, Metrics.AUC_LOG_LOSS],
    model_version="1.0",
    validate=True
)
"""),
("What are Delayed (Latent) Actuals?", "Delayed actuals refer to the ground truth data collected after a delay in the feedback loop for a model's predictions. Arize can automatically connect these actuals to earlier predictions using the same prediction ID."),
("How do you send Delayed Actuals to Arize?", "To send delayed actuals, use the same prediction_id for actuals as the corresponding predictions. Arize's joiner, which runs daily, matches these actuals with predictions within a 14-day window, extendable upon request."),
("What is the Arize joiner and how does it work?", "The Arize joiner automatically maps delayed actuals to their corresponding predictions daily at 05:00 UTC, with a default lookback window of 14 days. This process supports all data upload methods and is based on the prediction_id."),
("What are the requirements for joining delayed actuals in Arize?", "Joining requires the prediction_id to match between actuals and predictions, and the actual_score/label to be provided. The model_id is also required to ensure actuals match the correct model."),
("How can you ensure delayed actuals match predictions in Arize?", "Ensure your prediction ID, model name, and space match between your predictions and actuals when defining the schema for data ingestion jobs. Arize will then automatically sync the new data."),
("How do tags work with delayed actuals in Arize?", "Tags can be updated with delayed actuals. If tags are sent with actuals, they will be joined based on the prediction_id. However, if an actual is resent with an updated tag, the new tag value will not update the existing tag."),
("How does Arize measure model performance with delayed actuals?", "Arize calculates performance metrics only for predictions that have matched actuals. For predictions awaiting actuals, use other performance metrics to monitor model health.")
]


In [36]:
import uuid

dataset_name = f"Arize Docs QA Questions {str(uuid.uuid4())}"
dataset = client.create_dataset(dataset_name=dataset_name)
for q, a, in examples:
    client.create_example(inputs={"question": q}, outputs={"answer": a}, dataset_id=dataset.id)



In [4]:
from langchain.document_loaders import RecursiveUrlLoader
from langchain.document_transformers import Html2TextTransformer
from langchain.text_splitter import TokenTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma

api_loader = RecursiveUrlLoader("https://docs.arize.com/arize/")
doc_transformer = Html2TextTransformer()
raw_documents = api_loader.load()
transformed = doc_transformer.transform_documents(raw_documents)


def create_retriever(transformed_documents, text_splitter):
    documents = text_splitter.split_documents(transformed_documents)
    embeddings = OpenAIEmbeddings()
    vectorstore = Chroma.from_documents(documents, embeddings)
    return vectorstore.as_retriever(search_kwargs={"k": 4})

In [9]:
import datetime
from operator import itemgetter

from langchain.prompts import ChatPromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.schema.output_parser import StrOutputParser

def create_chain(retriever):
    prompt = ChatPromptTemplate.from_messages(
            [
                ("system", "You are a helpful documentation Q&A assistant, trained to answer"
                " questions from Arize's documentation."
                " Arize is a machine learing observability platform."
                "\nThe current time is {time}.\n\nRelevant documents will be retrieved in the following messages."),
                ("system", "{context}"),
                ("human","{question}")
            ]
        ).partial(time=str(datetime.datetime.now()))
    
    model = ChatOpenAI(model="gpt-3.5-turbo-16k", temperature=0)
    response_generator = (
        prompt
        | model
        | StrOutputParser()
    )
    chain = (
        # The runnable map here routes the original inputs to a context and a question dictionary to pass to the response generator
        {
            "context": itemgetter("question") | retriever | (lambda docs: "\n".join([doc.page_content for doc in docs])),
            "question": itemgetter("question")
        }
        | response_generator
    )
    return chain


    

In [10]:
text_splitter = TokenTextSplitter(
    model_name="gpt-3.5-turbo",
    chunk_size=2000,
    chunk_overlap=200,
)
retriever = create_retriever(transformed, text_splitter)

chain_1 = create_chain(retriever)

  warn_deprecated(


In [12]:
text_splitter_2 = TokenTextSplitter(
    model_name="gpt-3.5-turbo",
    chunk_size = 500,
    chunk_overlap=50
)

retriever_2 = create_retriever(transformed,text_splitter_2)

chain_2 = create_chain(retriever_2)

In [None]:
from langchain.smith import RunEvalConfig