# Test Pydantic Conversation Data Uploaded from JSON file

### 1. Import Python libraries

In [3]:
import os
import sys
import json
from pydantic import BaseModel, ValidationError, Field
from typing import List, Optional, Union
import pandas as pd

### 2. Define Pydantic classes

In [4]:
# Message keeps record of:
#     1. speaker's name
#     2. text message
#     3. time the message was sent.

In [5]:
class Message(BaseModel):
    speaker: str
    text: str
    timestamp: Optional[str] = None  # ISO format

In [6]:
# ConversationAnnotation is employed when a conversation is started and keeps record of:
#     1. label - single word that generally describes the message starting the conversation
#     2. description - sentence(s) with more detailed description of the message content
#     3. additional_info

In [7]:
class ConversationAnnotation(BaseModel):
    label: str
    description: Optional[str] = None
    additional_info: Optional[dict] = None

In [8]:
# MessageAnnotation is employed when a message is sent in response/in the sequence of a previous message opening a conversation. It keeps record of:
#     1. message_index - Index of the message in the conversation
#     2. label - single word that generally describes the message
#     3. description - sentence(s) with more detailed description of the message content
#     4. additional_info

In [9]:
class MessageAnnotation(BaseModel):
    message_index: int
    label: str
    description: Optional[str] = None
    additional_info: Optional[dict] = None

In [10]:
# ConversationScore evaluates the flow and (eventual) outcome of the conversation. It keeps record of:
#     1. metric - label of metric used to score conversation
#     2. value - metric value
#     3. details - Pthon dictionary providing context to the metric value, e.g. reason for the resulting score

In [11]:
class ConversationScore(BaseModel):
    metric: str
    value: Union[float, int]
    details: Optional[dict] = None

In [12]:
# ConversationData stores all the data modeled according to the models previously defined in this section and respective metadata.

In [13]:
class ConversationData(BaseModel):
    messages: List[Message] = Field(..., min_items=1)
    conversation_annotations: Optional[List[ConversationAnnotation]] = None
    message_annotations: Optional[List[MessageAnnotation]] = None
    scores: Optional[List[ConversationScore]] = None
    metadata: Optional[dict] = None


dataset_validator guarantees that the imported json is modeled according to the conditions specified in ConversationData

In [14]:
class DatasetValidator(BaseModel):
    
    ds_dict: ConversationData

### 3. Locate and upload conversation data

In [15]:
raw_data_file_path = input("Type the filepath to your json or csv (including filename and extension):")

print("\nYou are about to model the following dataset:", raw_data_file_path)

Type the filepath to your json or csv (including filename and extension): C:\Users\jguimaraes\Projects\jointAI\pydantic_playground\data\Conversation_Data\conversation_data.json



You are about to model the following dataset: C:\Users\jguimaraes\Projects\jointAI\pydantic_playground\data\Conversation_Data\conversation_data.json


In [18]:
if os.path.splitext(raw_data_file_path)[1] == '.json':
    
    json_opened = open(raw_data_file_path)
    conversation_dataset = json.load(json_opened)
    
elif os.path.splitext(raw_data_file_path)[1] == '.csv':

    conversation_dataset = pd.read_csv(raw_data_file_path)
    conversation_dataset = conversation_dataset.to_dict(orient = 'records')
    
else:
    
    print("Oops! You have not provided a file neither in csv or json format.\nPlease provide a file in either of these formats to complete the process.")
    sys.exit()

try:
    DatasetValidator(ds_dict = conversation_dataset)
    
except ValidationError as e:
        raise e
        
print(conversation_dataset)

conversation_modeled = ConversationData.model_validate(conversation_dataset)

print(conversation_modeled)

{'messages': [{'speaker': 'Alice', 'text': 'Hello, how are you?', 'timestamp': '2023-04-05T14:30:00Z'}, {'speaker': 'Bob', 'text': "I'm fine, thank you! And you?", 'timestamp': '2023-04-05T14:31:00Z'}], 'conversation_annotations': [{'label': 'Greeting', 'description': 'This conversation starts with a greeting.'}], 'message_annotations': [{'message_index': 1, 'label': 'Polite Response', 'description': 'Bob responds politely.'}], 'scores': [{'metric': 'coherence', 'value': 1.0, 'details': {'reason': 'The conversation flows naturally.'}}], 'metadata': {'topic': 'Casual conversation', 'language': 'English'}}
messages=[Message(speaker='Alice', text='Hello, how are you?', timestamp='2023-04-05T14:30:00Z'), Message(speaker='Bob', text="I'm fine, thank you! And you?", timestamp='2023-04-05T14:31:00Z')] conversation_annotations=[ConversationAnnotation(label='Greeting', description='This conversation starts with a greeting.', additional_info=None)] message_annotations=[MessageAnnotation(message_