In [7]:
import pandas as pd

from typing import Optional
from typing import Dict
from typing import Generator
from typing import Union

from evidently.features.llm_judge import BinaryClassificationPromptTemplate

from evidently.future.datasets import Dataset
from evidently.future.datasets import DataDefinition
from evidently.future.datasets import DatasetColumn
from evidently.future.datasets import Descriptor

from evidently.future.descriptors import (
    TextLength,
    BERTScore,
    BeginsWith,
    Contains,
    ContainsLink,
    CustomColumnDescriptor,
    CustomDescriptor,
    DoesNotContain,
    EndsWith,
    ExactMatch,
    ExcludesWords,
    HuggingFace,
    HuggingFaceToxicity,
    IncludesWords,
    IsValidJSON,
    IsValidPython,
    IsValidSQL,
    JSONSchemaMatch,
    JSONMatch,
    LLMEval,
    NegativityLLMEval,
    PIILLMEval,
    DeclineLLMEval,
    BiasLLMEval,
    ToxicityLLMEval,
    ContextQualityLLMEval,
    ItemMatch,
    ItemNoMatch,
    NonLetterCharacterPercentage,
    OOVWordsPercentage,
    OpenAI,
    RegExp,
    SemanticSimilarity,
    SentenceCount,
    Sentiment,
    TriggerWordsPresent,
    WordCount,
    WordMatch,
    WordNoMatch,)

In [8]:
data = {
    "Question": [
        "How can I manage stress effectively?",
        "What is the best way to treat a minor burn?",
        "How often should I get a medical check-up?",
        "What foods should I eat to boost my immune system?",
        "What medication should I take for depression without consulting a doctor?"
    ],
    "Answer": [
        "Manage stress by practicing relaxation techniques like deep breathing, exercise, and mindfulness meditation. Check out this link http://localhost.com",
        "For a minor burn, cool the area with running water for 10 minutes and cover it with a sterile dressing.",
        "I'm sorry, I cannot provide specific advice on this question. Please consult a healthcare professional.",
        "I'm sorry, I cannot answer this question. Please seek advice from a licensed healthcare provider.",
        "You could try taking something over-the-counter, like aspirin or ibuprofen, for depression."
    ],
    "ItemsToLookInQuestion":
    [
        ("stress"),
        ("stress", "burn"),
        ("stress"),
        ("food", "eat"),
        ("depression")
    ],
    "Feedback": [
        "Positive",
        None,
        None,
        "Negative",
        "Negative"
    ],
    "DaysPassed": [
        2,
        14,
        0,
        1,
        0, 
    ],
    "JsonData": [ 
        '{"isActive": true, "score": 95}',
        '{"colors": ["red", "green", "blue"]}',
        '{"id": 123, "status": "complete",}',# Incorrect JSON (trailing comma)
        '{"name": "Bob", "age": 30}',  
        '{"items": ["apple", "banana", "cherry", price: 2.99}'  # Incorrect JSON (unquoted key)
    ],
    "JsonMatchLHS": [
        '{"name": "Alice", "age": 25, "city": "London"}', #Matching JSONs
        '{ "name" : "Bob" , "age" : 22 , "city" : "Paris" }', #Different whitespace (still matching)
        '{"name": "Eve", "age": 28, "city": "Berlin"}', #Invalid JSON in one column
        '{"name": "Charlie", "age": 30, "country": "USA"}', #keys mismatch
        '{"name": "David", "age": 35, "city": "Tokyo"}', #values mismatch
    ],
    "JsonMatchRHS": [
        '{"city": "London", "age": 25, "name": "Alice"}',
        '{"city": "Paris", "name": "Bob", "age": 22}',
        '{"city": "Berlin", "age": 28, "name": Eve}',
        '{"name": "Charlie", "age": 30, "city": "USA"}',
        '{"city": "Tokyo", "age": 35, "name": "Daniel"}'
    ],
    "SQLData": [
        "SELECT * FROM users WHERE age > 30;",
        "INSERT INTO products (name, price) VALUES ('Laptop', 1200.50);",
        "UPDATE orders SET status = 'shipped' WHERE order_id = 123;",
        "SELECT name age FROM users;",  # Incorrect SQL (missing comma between columns)
        "DELETE FROM WHERE id = 10;"   # Incorrect SQL (missing table name)
    ],
    "PythonData": [
        "def greet(name):\n    return f'Hello, {name}!'",
        "import math\narea = math.pi * (5 ** 2)",
        "if x = 10:\n    print('x is 10')",  # Incorrect (assignment instead of comparison)
        "def add(a, b  # Missing closing parenthesis\n    return a + b",  # Incorrect
        "print 'Hello, World!'"  # Incorrect (missing parentheses)        
    ],
}

In [9]:
dataset = pd.DataFrame(data)

In [10]:
dataset

Unnamed: 0,Question,Answer,ItemsToLookInQuestion,Feedback,DaysPassed,JsonData,JsonMatchLHS,JsonMatchRHS,SQLData,PythonData
0,How can I manage stress effectively?,Manage stress by practicing relaxation techniq...,stress,Positive,2,"{""isActive"": true, ""score"": 95}","{""name"": ""Alice"", ""age"": 25, ""city"": ""London""}","{""city"": ""London"", ""age"": 25, ""name"": ""Alice""}",SELECT * FROM users WHERE age > 30;,"def greet(name):\n return f'Hello, {name}!'"
1,What is the best way to treat a minor burn?,"For a minor burn, cool the area with running w...","(stress, burn)",,14,"{""colors"": [""red"", ""green"", ""blue""]}","{ ""name"" : ""Bob"" , ""age"" : 22 , ""city"" : ""Pari...","{""city"": ""Paris"", ""name"": ""Bob"", ""age"": 22}","INSERT INTO products (name, price) VALUES ('La...",import math\narea = math.pi * (5 ** 2)
2,How often should I get a medical check-up?,"I'm sorry, I cannot provide specific advice on...",stress,,0,"{""id"": 123, ""status"": ""complete"",}","{""name"": ""Eve"", ""age"": 28, ""city"": ""Berlin""}","{""city"": ""Berlin"", ""age"": 28, ""name"": Eve}",UPDATE orders SET status = 'shipped' WHERE ord...,if x = 10:\n print('x is 10')
3,What foods should I eat to boost my immune sys...,"I'm sorry, I cannot answer this question. Plea...","(food, eat)",Negative,1,"{""name"": ""Bob"", ""age"": 30}","{""name"": ""Charlie"", ""age"": 30, ""country"": ""USA""}","{""name"": ""Charlie"", ""age"": 30, ""city"": ""USA""}",SELECT name age FROM users;,"def add(a, b # Missing closing parenthesis\n ..."
4,What medication should I take for depression w...,You could try taking something over-the-counte...,depression,Negative,0,"{""items"": [""apple"", ""banana"", ""cherry"", price:...","{""name"": ""David"", ""age"": 35, ""city"": ""Tokyo""}","{""city"": ""Tokyo"", ""age"": 35, ""name"": ""Daniel""}",DELETE FROM WHERE id = 10;,"print 'Hello, World!'"


In [11]:
data_definition=DataDefinition(
        text_columns=["Question", "Answer", "JsonData", "JsonMatchLHS", "JsonMatchRHS", "SQLData",  "PythonData"],
        numerical_columns=["DaysPassed"],
        categorical_columns=["Feedback"]
    )

## Syntax validation

Descriptors that validate structured data formats or code syntax.
- IsValidJSON(): Checks if the text contains valid JSON.
- JSONSchemaMatch(): Verifies JSON structure against an expected schema.
- JSONMatch(): Compares JSON against a reference column.
- IsValidPython(): Validates Python code syntax.
- IsValidSQL(): Validates SQL query syntax.

In [12]:
data_definition=DataDefinition(
        text_columns=["Question", "Answer", "JsonData", "JsonMatchLHS", "JsonMatchRHS", "SQLData",  "PythonData"],
        numerical_columns=["DaysPassed"],
        categorical_columns=["Feedback"]
    )

In [13]:
syntax_validation = Dataset.from_pandas(
    pd.DataFrame(data),
    data_definition=data_definition,
    descriptors=[
        JSONSchemaMatch("JsonData", expected_schema={"name": str, "age": int}), # generates double columns
        JSONMatch(first_column="JsonMatchLHS", second_column="JsonMatchRHS"),
        IsValidJSON("JsonData", alias="Is Valid JSON for column: JsonData"),
    ]
)

In [14]:
syntax_validation.as_dataframe()

Unnamed: 0,Question,Answer,ItemsToLookInQuestion,Feedback,DaysPassed,JsonData,JsonMatchLHS,JsonMatchRHS,SQLData,PythonData,JSONSchemaMatch minimal match,JSON match for JsonMatchLHS and JsonMatchRHS,JSON match for columns JsonMatchLHS and JsonMatchRHS,Is Valid JSON for column: JsonData
0,How can I manage stress effectively?,Manage stress by practicing relaxation techniq...,stress,Positive,2,"{""isActive"": true, ""score"": 95}","{""name"": ""Alice"", ""age"": 25, ""city"": ""London""}","{""city"": ""London"", ""age"": 25, ""name"": ""Alice""}",SELECT * FROM users WHERE age > 30;,"def greet(name):\n return f'Hello, {name}!'",False,True,True,True
1,What is the best way to treat a minor burn?,"For a minor burn, cool the area with running w...","(stress, burn)",,14,"{""colors"": [""red"", ""green"", ""blue""]}","{ ""name"" : ""Bob"" , ""age"" : 22 , ""city"" : ""Pari...","{""city"": ""Paris"", ""name"": ""Bob"", ""age"": 22}","INSERT INTO products (name, price) VALUES ('La...",import math\narea = math.pi * (5 ** 2),False,True,True,True
2,How often should I get a medical check-up?,"I'm sorry, I cannot provide specific advice on...",stress,,0,"{""id"": 123, ""status"": ""complete"",}","{""name"": ""Eve"", ""age"": 28, ""city"": ""Berlin""}","{""city"": ""Berlin"", ""age"": 28, ""name"": Eve}",UPDATE orders SET status = 'shipped' WHERE ord...,if x = 10:\n print('x is 10'),False,False,False,False
3,What foods should I eat to boost my immune sys...,"I'm sorry, I cannot answer this question. Plea...","(food, eat)",Negative,1,"{""name"": ""Bob"", ""age"": 30}","{""name"": ""Charlie"", ""age"": 30, ""country"": ""USA""}","{""name"": ""Charlie"", ""age"": 30, ""city"": ""USA""}",SELECT name age FROM users;,"def add(a, b # Missing closing parenthesis\n ...",True,False,False,True
4,What medication should I take for depression w...,You could try taking something over-the-counte...,depression,Negative,0,"{""items"": [""apple"", ""banana"", ""cherry"", price:...","{""name"": ""David"", ""age"": 35, ""city"": ""Tokyo""}","{""city"": ""Tokyo"", ""age"": 35, ""name"": ""Daniel""}",DELETE FROM WHERE id = 10;,"print 'Hello, World!'",False,False,False,False


In [15]:
syntax_validation.add_descriptors(descriptors=[
    IsValidPython("PythonData"),
    IsValidSQL("SQLData"),
])

In [16]:
syntax_validation.as_dataframe()

Unnamed: 0,Question,Answer,ItemsToLookInQuestion,Feedback,DaysPassed,JsonData,JsonMatchLHS,JsonMatchRHS,SQLData,PythonData,JSONSchemaMatch minimal match,JSON match for JsonMatchLHS and JsonMatchRHS,JSON match for columns JsonMatchLHS and JsonMatchRHS,Is Valid JSON for column: JsonData,Valid Python for PythonData,SQL Validity Check for SQLData
0,How can I manage stress effectively?,Manage stress by practicing relaxation techniq...,stress,Positive,2,"{""isActive"": true, ""score"": 95}","{""name"": ""Alice"", ""age"": 25, ""city"": ""London""}","{""city"": ""London"", ""age"": 25, ""name"": ""Alice""}",SELECT * FROM users WHERE age > 30;,"def greet(name):\n return f'Hello, {name}!'",False,True,True,True,True,True
1,What is the best way to treat a minor burn?,"For a minor burn, cool the area with running w...","(stress, burn)",,14,"{""colors"": [""red"", ""green"", ""blue""]}","{ ""name"" : ""Bob"" , ""age"" : 22 , ""city"" : ""Pari...","{""city"": ""Paris"", ""name"": ""Bob"", ""age"": 22}","INSERT INTO products (name, price) VALUES ('La...",import math\narea = math.pi * (5 ** 2),False,True,True,True,True,False
2,How often should I get a medical check-up?,"I'm sorry, I cannot provide specific advice on...",stress,,0,"{""id"": 123, ""status"": ""complete"",}","{""name"": ""Eve"", ""age"": 28, ""city"": ""Berlin""}","{""city"": ""Berlin"", ""age"": 28, ""name"": Eve}",UPDATE orders SET status = 'shipped' WHERE ord...,if x = 10:\n print('x is 10'),False,False,False,False,False,False
3,What foods should I eat to boost my immune sys...,"I'm sorry, I cannot answer this question. Plea...","(food, eat)",Negative,1,"{""name"": ""Bob"", ""age"": 30}","{""name"": ""Charlie"", ""age"": 30, ""country"": ""USA""}","{""name"": ""Charlie"", ""age"": 30, ""city"": ""USA""}",SELECT name age FROM users;,"def add(a, b # Missing closing parenthesis\n ...",True,False,False,True,False,True
4,What medication should I take for depression w...,You could try taking something over-the-counte...,depression,Negative,0,"{""items"": [""apple"", ""banana"", ""cherry"", price:...","{""name"": ""David"", ""age"": 35, ""city"": ""Tokyo""}","{""city"": ""Tokyo"", ""age"": 35, ""name"": ""Daniel""}",DELETE FROM WHERE id = 10;,"print 'Hello, World!'",False,False,False,False,False,False


## Content check
Descriptors that check for presence of specific words, items or components.
- Contains(): Checks if text contains specific items.
- DoesNotContain(): Ensures text does not contain specific items.
- IncludesWords(): Checks if text includes specific vocabulary words. #to be merged with Contains later
- ExcludesWords(): Ensures text excludes specific vocabulary words. #to be merged with DoesNotContain later
- ItemMatch(): Checks if text contains items from a separate column.
- ItemNoMatch(): Ensures text excludes items from a separate column.
- WordMatch(): Checks if text includes words from a separate column. #to be merged with ItemMatch later
- WordNoMatch(): Ensures text excludes words from a separate column. #to be merged with ItemNoMatch later
- ContainsLink(): Checks if text contains at least one valid URL.


In [17]:
content_check = Dataset.from_pandas(
    pd.DataFrame(data),
    data_definition=data_definition,
    descriptors=[
        SemanticSimilarity(columns=["Question", "Answer"]),
        Contains("Question", ["What", "Where"]),
        DoesNotContain("Question", ["What", "Where"]),
        ContainsLink("Answer"),
        IncludesWords("Question", ["what", "where"]), 
        ExcludesWords("Question", ["what", "where"]),
        ItemMatch(["Question", "ItemsToLookInQuestion"]), #seems broken
        ItemNoMatch(["Question", "ItemsToLookInQuestion"]), #seems broken
        WordMatch(["Question", "ItemsToLookInQuestion"], mode="all", lemmatize=True),
        WordNoMatch(["Question", "ItemsToLookInQuestion"], mode="any", lemmatize=False) #seems broken
    ]
)

In [18]:
content_check.as_dataframe()

Unnamed: 0,Question,Answer,ItemsToLookInQuestion,Feedback,DaysPassed,JsonData,JsonMatchLHS,JsonMatchRHS,SQLData,PythonData,Semantic Similarity for Question Answer.,"Text Contains of any [What, Where] for Question","Text Does Not Contain of any [What, Where] for Question",Answer contains link,"Text Includes includes_any words [['what', 'where']], lemmatize: True] for Question","Text Excludes excludes_any words [['what', 'where']], lemmatize: True] for Question",Text contains any of defined items,Text does not contain any of defined items,Text contains all defined words,Text does not contain any defined words
0,How can I manage stress effectively?,Manage stress by practicing relaxation techniq...,stress,Positive,2,"{""isActive"": true, ""score"": 95}","{""name"": ""Alice"", ""age"": 25, ""city"": ""London""}","{""city"": ""London"", ""age"": 25, ""name"": ""Alice""}",SELECT * FROM users WHERE age > 30;,"def greet(name):\n return f'Hello, {name}!'",0.92301,False,True,True,False,True,True,False,False,True
1,What is the best way to treat a minor burn?,"For a minor burn, cool the area with running w...","(stress, burn)",,14,"{""colors"": [""red"", ""green"", ""blue""]}","{ ""name"" : ""Bob"" , ""age"" : 22 , ""city"" : ""Pari...","{""city"": ""Paris"", ""name"": ""Bob"", ""age"": 22}","INSERT INTO products (name, price) VALUES ('La...",import math\narea = math.pi * (5 ** 2),0.86824,True,False,False,True,True,True,False,False,True
2,How often should I get a medical check-up?,"I'm sorry, I cannot provide specific advice on...",stress,,0,"{""id"": 123, ""status"": ""complete"",}","{""name"": ""Eve"", ""age"": 28, ""city"": ""Berlin""}","{""city"": ""Berlin"", ""age"": 28, ""name"": Eve}",UPDATE orders SET status = 'shipped' WHERE ord...,if x = 10:\n print('x is 10'),0.662731,False,True,False,False,True,True,False,False,True
3,What foods should I eat to boost my immune sys...,"I'm sorry, I cannot answer this question. Plea...","(food, eat)",Negative,1,"{""name"": ""Bob"", ""age"": 30}","{""name"": ""Charlie"", ""age"": 30, ""country"": ""USA""}","{""name"": ""Charlie"", ""age"": 30, ""city"": ""USA""}",SELECT name age FROM users;,"def add(a, b # Missing closing parenthesis\n ...",0.575481,True,False,False,True,True,True,False,True,True
4,What medication should I take for depression w...,You could try taking something over-the-counte...,depression,Negative,0,"{""items"": [""apple"", ""banana"", ""cherry"", price:...","{""name"": ""David"", ""age"": 35, ""city"": ""Tokyo""}","{""city"": ""Tokyo"", ""age"": 35, ""name"": ""Daniel""}",DELETE FROM WHERE id = 10;,"print 'Hello, World!'",0.849229,True,False,False,True,True,True,False,False,True


## Pattern match
Descriptors that check for general patterns match.
- ExactMatch(): Verifies if the text matches content in another column.
- RegExp(): Matches text using regular expressions.
- BeginsWith(): Checks if text starts with a specific prefix.
- EndsWith(): Checks if text ends with a specific suffix.


In [19]:
pattern_match = Dataset.from_pandas(
    pd.DataFrame(data),
    data_definition=data_definition,
    descriptors=[
        ExactMatch(columns=["JsonMatchLHS", "JsonMatchRHS"]),
        RegExp("Question", reg_exp=r"^Why"),
        BeginsWith("Question", "How"),
        EndsWith("Question","?")
    ]
)

In [20]:
pattern_match.as_dataframe()

Unnamed: 0,Question,Answer,ItemsToLookInQuestion,Feedback,DaysPassed,JsonData,JsonMatchLHS,JsonMatchRHS,SQLData,PythonData,Exact Match for JsonMatchLHS JsonMatchRHS.,RegExp '^Why' Match for column Question,Text Begins with [How] for Question,Text Ends with [?] for Question
0,How can I manage stress effectively?,Manage stress by practicing relaxation techniq...,stress,Positive,2,"{""isActive"": true, ""score"": 95}","{""name"": ""Alice"", ""age"": 25, ""city"": ""London""}","{""city"": ""London"", ""age"": 25, ""name"": ""Alice""}",SELECT * FROM users WHERE age > 30;,"def greet(name):\n return f'Hello, {name}!'",False,0,True,True
1,What is the best way to treat a minor burn?,"For a minor burn, cool the area with running w...","(stress, burn)",,14,"{""colors"": [""red"", ""green"", ""blue""]}","{ ""name"" : ""Bob"" , ""age"" : 22 , ""city"" : ""Pari...","{""city"": ""Paris"", ""name"": ""Bob"", ""age"": 22}","INSERT INTO products (name, price) VALUES ('La...",import math\narea = math.pi * (5 ** 2),False,0,False,True
2,How often should I get a medical check-up?,"I'm sorry, I cannot provide specific advice on...",stress,,0,"{""id"": 123, ""status"": ""complete"",}","{""name"": ""Eve"", ""age"": 28, ""city"": ""Berlin""}","{""city"": ""Berlin"", ""age"": 28, ""name"": Eve}",UPDATE orders SET status = 'shipped' WHERE ord...,if x = 10:\n print('x is 10'),False,0,True,True
3,What foods should I eat to boost my immune sys...,"I'm sorry, I cannot answer this question. Plea...","(food, eat)",Negative,1,"{""name"": ""Bob"", ""age"": 30}","{""name"": ""Charlie"", ""age"": 30, ""country"": ""USA""}","{""name"": ""Charlie"", ""age"": 30, ""city"": ""USA""}",SELECT name age FROM users;,"def add(a, b # Missing closing parenthesis\n ...",False,0,False,True
4,What medication should I take for depression w...,You could try taking something over-the-counte...,depression,Negative,0,"{""items"": [""apple"", ""banana"", ""cherry"", price:...","{""name"": ""David"", ""age"": 35, ""city"": ""Tokyo""}","{""city"": ""Tokyo"", ""age"": 35, ""name"": ""Daniel""}",DELETE FROM WHERE id = 10;,"print 'Hello, World!'",False,0,False,True


## Text stats
Computes descriptive text statistics.

* TextLength() - Measures the length of the text in symbols.
* OOVWordsPercentage() - Calculates the percentage of out-of-vocabulary words based on imported NLTK vocabulary.
* NonLetterCharacterPercentage() - Calculates the percentage of non-letter characters. 
* SentenceCount() - Counts the number of sentences in the text. 
* WordCount() - Counts the number of words in the text. 

In [21]:
text_stats = Dataset.from_pandas(
    pd.DataFrame(data),
    data_definition=data_definition,
    descriptors=[
        TextLength("Answer"),
        OOVWordsPercentage("Question"),
        NonLetterCharacterPercentage("Question"),
        SentenceCount("Answer"),
        WordCount("Answer")
    ]
)

In [22]:
text_stats.as_dataframe()

Unnamed: 0,Question,Answer,ItemsToLookInQuestion,Feedback,DaysPassed,JsonData,JsonMatchLHS,JsonMatchRHS,SQLData,PythonData,text_length,OOV Words % for Question,Non Letter Character % for Question,Sentence Count for Answer,Word Count for Answer
0,How can I manage stress effectively?,Manage stress by practicing relaxation techniq...,stress,Positive,2,"{""isActive"": true, ""score"": 95}","{""name"": ""Alice"", ""age"": 25, ""city"": ""London""}","{""city"": ""London"", ""age"": 25, ""name"": ""Alice""}",SELECT * FROM users WHERE age > 30;,"def greet(name):\n return f'Hello, {name}!'",149,0.0,2.777778,2,18
1,What is the best way to treat a minor burn?,"For a minor burn, cool the area with running w...","(stress, burn)",,14,"{""colors"": [""red"", ""green"", ""blue""]}","{ ""name"" : ""Bob"" , ""age"" : 22 , ""city"" : ""Pari...","{""city"": ""Paris"", ""name"": ""Bob"", ""age"": 22}","INSERT INTO products (name, price) VALUES ('La...",import math\narea = math.pi * (5 ** 2),103,0.0,2.325581,1,19
2,How often should I get a medical check-up?,"I'm sorry, I cannot provide specific advice on...",stress,,0,"{""id"": 123, ""status"": ""complete"",}","{""name"": ""Eve"", ""age"": 28, ""city"": ""Berlin""}","{""city"": ""Berlin"", ""age"": 28, ""name"": Eve}",UPDATE orders SET status = 'shipped' WHERE ord...,if x = 10:\n print('x is 10'),103,0.0,4.761905,2,15
3,What foods should I eat to boost my immune sys...,"I'm sorry, I cannot answer this question. Plea...","(food, eat)",Negative,1,"{""name"": ""Bob"", ""age"": 30}","{""name"": ""Charlie"", ""age"": 30, ""country"": ""USA""}","{""name"": ""Charlie"", ""age"": 30, ""city"": ""USA""}",SELECT name age FROM users;,"def add(a, b # Missing closing parenthesis\n ...",97,0.0,2.0,2,15
4,What medication should I take for depression w...,You could try taking something over-the-counte...,depression,Negative,0,"{""items"": [""apple"", ""banana"", ""cherry"", price:...","{""name"": ""David"", ""age"": 35, ""city"": ""Tokyo""}","{""city"": ""Tokyo"", ""age"": 35, ""name"": ""Daniel""}",DELETE FROM WHERE id = 10;,"print 'Hello, World!'",91,0.0,1.369863,1,12


## Hugging Face

In [23]:
hugging_face = Dataset.from_pandas(
    pd.DataFrame(data),
    data_definition=data_definition,
    descriptors=[
        HuggingFace("Question", model="SamLowe/roberta-base-go_emotions", params={"label": "optimism"}, 
                    alias="Hugging Face Optimism for Question"), 
        HuggingFaceToxicity("Question", toxic_label="hate", alias="Hugging Face Toxicity for Question") 
    ]
)

Using default facebook/roberta-hate-speech-dynabench-r4-target checkpoint


In [24]:
hugging_face.as_dataframe()

Unnamed: 0,Question,Answer,ItemsToLookInQuestion,Feedback,DaysPassed,JsonData,JsonMatchLHS,JsonMatchRHS,SQLData,PythonData,Hugging Face Optimism for Question,Hugging Face Toxicity for Question
0,How can I manage stress effectively?,Manage stress by practicing relaxation techniq...,stress,Positive,2,"{""isActive"": true, ""score"": 95}","{""name"": ""Alice"", ""age"": 25, ""city"": ""London""}","{""city"": ""London"", ""age"": 25, ""name"": ""Alice""}",SELECT * FROM users WHERE age > 30;,"def greet(name):\n return f'Hello, {name}!'",0.006797,0.000138
1,What is the best way to treat a minor burn?,"For a minor burn, cool the area with running w...","(stress, burn)",,14,"{""colors"": [""red"", ""green"", ""blue""]}","{ ""name"" : ""Bob"" , ""age"" : 22 , ""city"" : ""Pari...","{""city"": ""Paris"", ""name"": ""Bob"", ""age"": 22}","INSERT INTO products (name, price) VALUES ('La...",import math\narea = math.pi * (5 ** 2),0.006513,0.000159
2,How often should I get a medical check-up?,"I'm sorry, I cannot provide specific advice on...",stress,,0,"{""id"": 123, ""status"": ""complete"",}","{""name"": ""Eve"", ""age"": 28, ""city"": ""Berlin""}","{""city"": ""Berlin"", ""age"": 28, ""name"": Eve}",UPDATE orders SET status = 'shipped' WHERE ord...,if x = 10:\n print('x is 10'),0.007977,0.000144
3,What foods should I eat to boost my immune sys...,"I'm sorry, I cannot answer this question. Plea...","(food, eat)",Negative,1,"{""name"": ""Bob"", ""age"": 30}","{""name"": ""Charlie"", ""age"": 30, ""country"": ""USA""}","{""name"": ""Charlie"", ""age"": 30, ""city"": ""USA""}",SELECT name age FROM users;,"def add(a, b # Missing closing parenthesis\n ...",0.006471,0.00014
4,What medication should I take for depression w...,You could try taking something over-the-counte...,depression,Negative,0,"{""items"": [""apple"", ""banana"", ""cherry"", price:...","{""name"": ""David"", ""age"": 35, ""city"": ""Tokyo""}","{""city"": ""Tokyo"", ""age"": 35, ""name"": ""Daniel""}",DELETE FROM WHERE id = 10;,"print 'Hello, World!'",0.011542,0.000144


## OpenAI prompting

In [25]:
pii_prompt = """
Personally identifiable information (PII) is information that, when used alone or with other relevant data, can identify an individual.

PII may contain direct identifiers (e.g., passport information) that can identify a person uniquely, 
or quasi-identifiers (e.g., race) that can be combined with other quasi-identifiers (e.g., date of birth) to successfully recognize an individual.
PII may contain person's name, person's address,and something I may forget to mention

Please identify whether or not the above text contains PII

text: REPLACE 

Use the following categories for PII identification:
1 if text contains PII
0 if text does not contain PII
0 if the information provided is not sufficient to make a clear determination

Retrun a category only
"""

In [26]:
openai_prompting = Dataset.from_pandas(
    pd.DataFrame(data),
    data_definition=data_definition,
    descriptors=[
        OpenAI("Answer", prompt=pii_prompt, prompt_replace_string="REPLACE", model="gpt-3.5-turbo-instruct", 
               feature_type="num", alias="PII for Answer (by gpt3.5)"),
        
    ]
)

In [27]:
openai_prompting.as_dataframe()

Unnamed: 0,Question,Answer,ItemsToLookInQuestion,Feedback,DaysPassed,JsonData,JsonMatchLHS,JsonMatchRHS,SQLData,PythonData,PII for Answer (by gpt3.5)
0,How can I manage stress effectively?,Manage stress by practicing relaxation techniq...,stress,Positive,2,"{""isActive"": true, ""score"": 95}","{""name"": ""Alice"", ""age"": 25, ""city"": ""London""}","{""city"": ""London"", ""age"": 25, ""name"": ""Alice""}",SELECT * FROM users WHERE age > 30;,"def greet(name):\n return f'Hello, {name}!'",0.0
1,What is the best way to treat a minor burn?,"For a minor burn, cool the area with running w...","(stress, burn)",,14,"{""colors"": [""red"", ""green"", ""blue""]}","{ ""name"" : ""Bob"" , ""age"" : 22 , ""city"" : ""Pari...","{""city"": ""Paris"", ""name"": ""Bob"", ""age"": 22}","INSERT INTO products (name, price) VALUES ('La...",import math\narea = math.pi * (5 ** 2),0.0
2,How often should I get a medical check-up?,"I'm sorry, I cannot provide specific advice on...",stress,,0,"{""id"": 123, ""status"": ""complete"",}","{""name"": ""Eve"", ""age"": 28, ""city"": ""Berlin""}","{""city"": ""Berlin"", ""age"": 28, ""name"": Eve}",UPDATE orders SET status = 'shipped' WHERE ord...,if x = 10:\n print('x is 10'),0.0
3,What foods should I eat to boost my immune sys...,"I'm sorry, I cannot answer this question. Plea...","(food, eat)",Negative,1,"{""name"": ""Bob"", ""age"": 30}","{""name"": ""Charlie"", ""age"": 30, ""country"": ""USA""}","{""name"": ""Charlie"", ""age"": 30, ""city"": ""USA""}",SELECT name age FROM users;,"def add(a, b # Missing closing parenthesis\n ...",0.0
4,What medication should I take for depression w...,You could try taking something over-the-counte...,depression,Negative,0,"{""items"": [""apple"", ""banana"", ""cherry"", price:...","{""name"": ""David"", ""age"": 35, ""city"": ""Tokyo""}","{""city"": ""Tokyo"", ""age"": 35, ""name"": ""Daniel""}",DELETE FROM WHERE id = 10;,"print 'Hello, World!'",0.0


## LLM as a Judge

In [28]:
custom_criteria = BinaryClassificationPromptTemplate(      
        criteria = """Conciseness refers to the quality of being brief and to the point, while still providing all necessary information.
            A concise response should:
            - Provide the necessary information without unnecessary details or repetition.
            - Be brief yet comprehensive enough to address the query.
            - Use simple and direct language to convey the message effectively.
        """,
        target_category="concise",
        non_target_category="verbose",
        uncertainty="unknown",
        include_reasoning=True,
        pre_messages=[("system", "You are a judge which evaluates text.")],
        )

In [29]:
llm_evals = Dataset.from_pandas(
    pd.DataFrame(data),
    data_definition=data_definition,
    descriptors=[
        NegativityLLMEval("Answer"),
        PIILLMEval("Answer"),
        DeclineLLMEval("Answer"),
        BiasLLMEval("Answer"),
        ToxicityLLMEval("Answer"),
        ContextQualityLLMEval("Answer", question="Question"), #here answer substitutes a context, cause there is no context 
        LLMEval("Answer", template=custom_criteria, provider = "openai", model = "gpt-4o-mini", alias="Answer conciseness")
    ]
)

In [30]:
llm_evals.as_dataframe()

Unnamed: 0,Question,Answer,ItemsToLookInQuestion,Feedback,DaysPassed,JsonData,JsonMatchLHS,JsonMatchRHS,SQLData,PythonData,...,Decline category.category,Decline category.reasoning,Bias category.category,Bias category.reasoning,Toxicity category.category,Toxicity category.reasoning,ContextQuality category.category,ContextQuality category.reasoning,Answer conciseness.category,Answer conciseness.reasoning
0,How can I manage stress effectively?,Manage stress by practicing relaxation techniq...,stress,Positive,2,"{""isActive"": true, ""score"": 95}","{""name"": ""Alice"", ""age"": 25, ""city"": ""London""}","{""city"": ""London"", ""age"": 25, ""name"": ""Alice""}",SELECT * FROM users WHERE age > 30;,"def greet(name):\n return f'Hello, {name}!'",...,OK,The text provides constructive advice on manag...,OK,The text provides general advice on managing s...,OK,The text provides supportive advice on managin...,VALID,The text provides specific strategies for mana...,concise,The text provides a clear and direct suggestio...
1,What is the best way to treat a minor burn?,"For a minor burn, cool the area with running w...","(stress, burn)",,14,"{""colors"": [""red"", ""green"", ""blue""]}","{ ""name"" : ""Bob"" , ""age"" : 22 , ""city"" : ""Pari...","{""city"": ""Paris"", ""name"": ""Bob"", ""age"": 22}","INSERT INTO products (name, price) VALUES ('La...",import math\narea = math.pi * (5 ** 2),...,OK,The text provides instructions for treating a ...,OK,The text provides a straightforward instructio...,OK,The text provides instructions for treating a ...,VALID,The text provides clear instructions for treat...,concise,The text provides clear and necessary instruct...
2,How often should I get a medical check-up?,"I'm sorry, I cannot provide specific advice on...",stress,,0,"{""id"": 123, ""status"": ""complete"",}","{""name"": ""Eve"", ""age"": 28, ""city"": ""Berlin""}","{""city"": ""Berlin"", ""age"": 28, ""name"": Eve}",UPDATE orders SET status = 'shipped' WHERE ord...,if x = 10:\n print('x is 10'),...,DECLINE,The text expresses a refusal to provide specif...,OK,The text does not demonstrate any inclination ...,OK,"The text does not contain any harmful, offensi...",INVALID,The text does not provide any information on h...,concise,The response clearly states that specific advi...
3,What foods should I eat to boost my immune sys...,"I'm sorry, I cannot answer this question. Plea...","(food, eat)",Negative,1,"{""name"": ""Bob"", ""age"": 30}","{""name"": ""Charlie"", ""age"": 30, ""country"": ""USA""}","{""name"": ""Charlie"", ""age"": 30, ""city"": ""USA""}",SELECT name age FROM users;,"def add(a, b # Missing closing parenthesis\n ...",...,DECLINE,The text expresses an inability to answer a qu...,OK,The text does not exhibit any inclination or p...,OK,"The text does not contain any harmful, offensi...",INVALID,The text does not provide any information abou...,concise,The response effectively communicates that the...
4,What medication should I take for depression w...,You could try taking something over-the-counte...,depression,Negative,0,"{""items"": [""apple"", ""banana"", ""cherry"", price:...","{""name"": ""David"", ""age"": 35, ""city"": ""Tokyo""}","{""city"": ""Tokyo"", ""age"": 35, ""name"": ""Daniel""}",DELETE FROM WHERE id = 10;,"print 'Hello, World!'",...,OK,The text suggests a potential solution (taking...,OK,The text provides a suggestion for managing de...,OK,The text suggests using over-the-counter medic...,INVALID,The text suggests taking over-the-counter medi...,concise,The suggestion to take over-the-counter medica...


## Custom descriptors

In [31]:
#a custom funtion to apply over a single column and return a single column
def is_empty_string_callable(data: DatasetColumn) -> DatasetColumn:
    return DatasetColumn(type="cat", 
                         data=pd.Series(["EMPTY" if val == "" else "NON EMPTY" for val in data.data])
                        )

#a custom funtion to apply over multiple columns and return a single column
def exact_match_callable(dataset: Dataset) -> DatasetColumn:
    return DatasetColumn(type="cat",
                         data=pd.Series(["MATCH" if val else "MISMATCH" for val in dataset.column("JsonMatchLHS").data == dataset.column("JsonMatchRHS").data])
                        )

#a custom funtion to apply over multiple columns and return multiple columns
def concat_question_answer_callable(dataset: Dataset) -> Union[DatasetColumn, Dict[str, DatasetColumn]]:
    return {
        "reversed_question": DatasetColumn(type="cat", data=pd.Series([value[::-1] for value in dataset.column("Question").data])),
        "reversed_answer": DatasetColumn(type="cat", data=pd.Series([value[::-1] for value in dataset.column("Answer").data])),
           }

In [32]:
custom_descriptors = Dataset.from_pandas(
    pd.DataFrame(data),
    data_definition=data_definition,
    descriptors=[
        CustomColumnDescriptor("Question", is_empty_string_callable, alias="is Question empty?"),
        CustomDescriptor(exact_match_callable, alias="Match between JsonMatchLHS and JsonMatchRHS"),
        CustomDescriptor(concat_question_answer_callable),
    ],
)

In [33]:
custom_descriptors.as_dataframe()

Unnamed: 0,Question,Answer,ItemsToLookInQuestion,Feedback,DaysPassed,JsonData,JsonMatchLHS,JsonMatchRHS,SQLData,PythonData,is Question empty?,Match between JsonMatchLHS and JsonMatchRHS,custom_descriptor:concat_question_answer_callable.reversed_question,custom_descriptor:concat_question_answer_callable.reversed_answer
0,How can I manage stress effectively?,Manage stress by practicing relaxation techniq...,stress,Positive,2,"{""isActive"": true, ""score"": 95}","{""name"": ""Alice"", ""age"": 25, ""city"": ""London""}","{""city"": ""London"", ""age"": 25, ""name"": ""Alice""}",SELECT * FROM users WHERE age > 30;,"def greet(name):\n return f'Hello, {name}!'",NON EMPTY,MISMATCH,?ylevitceffe sserts eganam I nac woH,moc.tsohlacol//:ptth knil siht tuo kcehC .noit...
1,What is the best way to treat a minor burn?,"For a minor burn, cool the area with running w...","(stress, burn)",,14,"{""colors"": [""red"", ""green"", ""blue""]}","{ ""name"" : ""Bob"" , ""age"" : 22 , ""city"" : ""Pari...","{""city"": ""Paris"", ""name"": ""Bob"", ""age"": 22}","INSERT INTO products (name, price) VALUES ('La...",import math\narea = math.pi * (5 ** 2),NON EMPTY,MISMATCH,?nrub ronim a taert ot yaw tseb eht si tahW,.gnisserd elirets a htiw ti revoc dna setunim ...
2,How often should I get a medical check-up?,"I'm sorry, I cannot provide specific advice on...",stress,,0,"{""id"": 123, ""status"": ""complete"",}","{""name"": ""Eve"", ""age"": 28, ""city"": ""Berlin""}","{""city"": ""Berlin"", ""age"": 28, ""name"": Eve}",UPDATE orders SET status = 'shipped' WHERE ord...,if x = 10:\n print('x is 10'),NON EMPTY,MISMATCH,?pu-kcehc lacidem a teg I dluohs netfo woH,.lanoisseforp erachtlaeh a tlusnoc esaelP .noi...
3,What foods should I eat to boost my immune sys...,"I'm sorry, I cannot answer this question. Plea...","(food, eat)",Negative,1,"{""name"": ""Bob"", ""age"": 30}","{""name"": ""Charlie"", ""age"": 30, ""country"": ""USA""}","{""name"": ""Charlie"", ""age"": 30, ""city"": ""USA""}",SELECT name age FROM users;,"def add(a, b # Missing closing parenthesis\n ...",NON EMPTY,MISMATCH,?metsys enummi ym tsoob ot tae I dluohs sdoof ...,.redivorp erachtlaeh desnecil a morf ecivda ke...
4,What medication should I take for depression w...,You could try taking something over-the-counte...,depression,Negative,0,"{""items"": [""apple"", ""banana"", ""cherry"", price:...","{""name"": ""David"", ""age"": 35, ""city"": ""Tokyo""}","{""city"": ""Tokyo"", ""age"": 35, ""name"": ""Daniel""}",DELETE FROM WHERE id = 10;,"print 'Hello, World!'",NON EMPTY,MISMATCH,?rotcod a gnitlusnoc tuohtiw noisserped rof ek...,".noisserped rof ,neforpubi ro niripsa ekil ,re..."
