In [303]:
with open('./dataset/worksheet.yml', 'r') as f:
    yml_schema = f.read()

In [418]:
import yaml
import json


def build_schema(yaml_content):
    parsed_yaml = yaml.safe_load(yaml_content)
    subjects_schema = {}

    for subject, content in parsed_yaml['subjects'].items():
        schema = {}
        for field in content['fields']:
            field_schema = {
                "type": field['schema']['type']
            }
            if 'enum' in field['schema']:
                field_schema['enum'] = field['schema']['enum']
            if 'format' in field['schema']:
                field_schema['format'] = field['schema']['format']
            
            schema[field['name']] = field_schema
        
        subjects_schema[subject] = schema

    return subjects_schema



def build_schema_prompt(yaml_content, subject: str = None):
    parsed_yaml = yaml.safe_load(yaml_content)
    instructions = []

    subjects = parsed_yaml['subjects']

    if subject:
        subjects = {subject: subjects[subject]} if subject in subjects else {}

    for subject_name, content in subjects.items():
        subject_description = f"{subject_name}: {content['description']}"
        instructions.append(subject_description)
        
        for field in content['fields']:
            field_description = f"  - {field['name']}: {field['schema']['type']}"
            if 'format' in field['schema']:
                field_description += f" ({field['schema']['format']} format)"
            instructions.append(field_description)
            
            field_metadata = f"    description: {field['description']}"
            instructions.append(field_metadata)
            
            if 'enum' in field['schema']:
                possible_values = f"    possible values: {', '.join(field['schema']['enum'])}"
                instructions.append(possible_values)

    return "\n".join(instructions)
  # To get field value read whole conversation and fill the required field with values.
  #   !Important. You can build final field value from different messages, because candidate could answer iteratively.

    # Please fill incomoplete field for latest fields or field that mentioned at conversation that have only part of value and there is needed to ask addition details to fill it full. 



def define_worksheet_fields(subject, subject_fields: dict, conversation_history):
    subject_str = f'{subject['name']}: {subject['description']}'
    fields_str = '\n'.join(build_prompt_from_subject_fields(subject_fields))

    conversation_history_str = '\n'.join([f"{message['from']}: {message['text']}" for message in conversation_history])

    prompt = f"""
Please find entities at speech of Latin America candidate on security guy position.

Here is possible fields:
{subject_str}
{fields_str}

Here is conversation:
{conversation_history_str}

When analyzing the conversation, follow these guidelines:

	1.	Track which fields have been asked about in the conversation.
	2.	Collect and compile all relevant field information, even if it spans multiple steps.
	3.	Finalize the field only when all required details have been provided.
	4.	Only include incomplete field errors for fields that have been asked about but not fully completed.
	5.	Do not include errors for fields that have not yet been asked.

incomplete_field_error:
    field_name: <field name>
    current_value: <current field value>
    required_details: <required details>

    
Please put result info json format, like this ```json```.
"""
    # print(prompt)
    
    response = get_gpt(prompt, model='gpt-4o', temperature=0)

    return select_json_block(response)


def build_prompt_from_subject_fields(fields):
    instructions = []
        
    for field in fields:
        field_description = f"  - {field['name']}: {field['schema']['type']}"
        if 'format' in field['schema']:
            field_description += f" ({field['schema']['format']} format)"
        instructions.append(field_description)
            
        field_metadata = f"    description: {field['description']}"
        instructions.append(field_metadata)
            
        if 'enum' in field['schema']:
            possible_values = f"    possible values: {', '.join(field['schema']['enum'])}"
            instructions.append(possible_values)
    return instructions

In [304]:
parsed_yaml = yaml.safe_load(yml_schema)

In [401]:
subject_name = 'personal_info'

subject = parsed_yaml['subjects'][subject_name]

subject_details = {
    'name': subject_name,
    'description': subject['description']
}

In [None]:
conversation_history = [
    {
        'from': 'Recruiter',
        'text': 'Please answer what your first name'
    },
    {
        'from': 'Security Guard',
        'text': 'My name is Jaez'
    },
    {
        'from': 'Recruiter',
        'text': 'Alright! Please say your birth date.'
    },
    {
        'from': 'Security Guard',
        'text': 'September 12'
    },
    # {
    #     'from': 'Recruiter',
    #     'text': 'Please say the year?'
    # },
    # {
    #     'from': 'Security Guard',
    #     'text': 'I was born at 1990'
    # }
]

worsheet_result = define_worksheet_fields(subject_details, subject['fields'], conversation_history)

In [362]:
def clarify_field_details(incomplete_field_error):
    field_error_str = f'field: {incomplete_field_error['field_name']}\ncurrent value: {incomplete_field_error['current_value']}\nrequired details: {incomplete_field_error['required_details']}'
    
    response = get_gpt(f"""
You are recruiter. Ensure the accurate and complete collection of candidate information by reasking questions 
and clarifying details to complete the worksheet field information.

**Instructions:**

1. **Identify Incomplete Fields:**
   - Review the provided list of fields that have incomplete or missing details.

2. **Addressing Incomplete Information:**
   - For each field with incomplete information, identify the missing details.
   - Politely ask the candidate to provide the missing information to complete the field.

3. **Sample Prompt Structure:**
   - Use a polite and clear approach to reask for the missing details.
   - Ensure the candidate understands what specific information is needed.

4. **Handling Specific Fields:**
   - **Field Name:** Birth Date
     - **Current Value:** September 12
     - **Required Details:** Year of birth
     - **Clarification Prompt:** "Could you please provide the year of your birth to complete your birth date?"

5. **General Tips:**
   - Be patient and understanding if the candidate is unsure or needs time to find the information.
   - Confirm with the candidate once the information is provided to ensure accuracy.

Please ask for correction by the following field:

{field_error_str}

Put the response into ```json``` with schema:

```
{{
    "text": string // Text message
}}
    """)
    return select_json_block(response)

In [400]:
worsheet_result

{'personal_info': {'first_names': 'Jaez'},
 'incomplete_field_error': [{'field_name': 'birth_date',
   'current_value': 'September 12',
   'required_details': 'Year of birth'}]}

In [365]:
clarify_field_details(worsheet_result['incomplete_field_error'][0])

```json
{
    "text": "Could you please provide the year of your birth to complete your birth date? Currently, we have September 12, but we need the full date including the year. Thank you!"
}
```

{'text': 'Could you please provide the year of your birth to complete your birth date? Currently, we have September 12, but we need the full date including the year. Thank you!'}

In [374]:
worsheet_result

{'personal_info': {'first_names': 'Jaez'},
 'incomplete_field_error': [{'field_name': 'birth_date',
   'current_value': 'September 12',
   'required_details': 'Year of birth'}]}

In [377]:
worskheet_field_storage = {}
conversation_history = []

In [402]:
def append_keys_to_storage(subject_name, worsheet_result, worskheet_field_storage):
    subject_worksheet_fields = worsheet_result[subject_name]
    
    for field_name, value in subject_worksheet_fields.items():
        if subject_name not in worskheet_field_storage or not worskheet_field_storage.get(subject_name, None):
            worskheet_field_storage[subject_name] = {}
    
        incomplete_fields = {error['field_name'] for error in worsheet_result['incomplete_field_error']}
    
        if field_name not in worskheet_field_storage[subject_name]:
            if field_name not in incomplete_fields:
                worskheet_field_storage[subject_name][field_name] = value
        

In [403]:
append_keys_to_storage('personal_info', worsheet_result, worskheet_field_storage)

In [404]:
worskheet_field_storage

{'personal_info': {'first_names': 'Jaez'}}

In [416]:
parsed_yaml = yaml.safe_load(yml_schema)

subject_name = 'personal_info'

subject = parsed_yaml['subjects'][subject_name]

subject_details = {
    'name': subject_name,
    'description': subject['description']
}


class FromType:
    Recruiter = 'recruiter'
    SecurityGuard = 'security guard'

def handle_candidate_messages(conversation_history, worskheet_field_storage):
    worsheet_result = define_worksheet_fields(subject_details, subject['fields'], conversation_history)

    append_keys_to_storage(subject_name, worsheet_result, worskheet_field_storage)
    
    if len(worsheet_result['incomplete_field_error']) > 0:
        response = clarify_field_details(worsheet_result['incomplete_field_error'][0])['text']
    else:
        response = next_dialog_step(subject_name, subject_fields, worskheet_field_storage)['message']
    
    return response

In [None]:
conversation_history.append({
    'from': FromType.SecurityGuard,
    'text': ''
})

In [419]:
response = handle_candidate_messages(conversation_history)

```json
{
  "personal_info": {
    "first_names": "Jaez"
  },
  "incomplete_field_error": [
    {
      "field_name": "birth_date",
      "current_value": "September 12",
      "required_details": "Year of birth"
    }
  ]
}
```

In [420]:
response

'Could you please provide the year of your birth to complete your birth date? Currently, we have September 12, but we need the full date including the year. Thank you!'

In [None]:
conversation_history.append({
        'from': FromType.Recruiter,
        'text': msg
    })

In [412]:
FromType.Recruiter

'recruiter'

In [398]:
def next_dialog_step(subject, subject_fields, worksheet_fields: dict):
    subject_str = f'{subject['name']}: {subject['description']}'
    fields_str = '\n'.join(build_prompt_from_subject_fields(subject_fields))
    
    fillded_fields = []
    for subject, fields in worskheet_field_storage.items():
        fillded_fields.append(f'{subject}:')
        for field, value in fields.items():
            fillded_fields.append(f'   - {field}: {value}')
        fillded_fields.append('')
    
    response = get_gpt(f"""
You are a recruiter in Latin America, hiring candidates for a security guard position in a store. 
For employment, you need to make a call with the candidate and gather all the necessary information to fill out the application form.

Your task is to ask the question needed to fill in the next field based on the previous answers.
The examples of questions:
- Can you please tell me your first names?
- Great, thank you. Now, can you tell me your last names?
- Thank you. What is your gender?
- Perfect. And your ID number?
- Alright. Now, your date of birth, please.
- Thank you. Can you also tell me your place of birth?

Here is already filled fields by user:
{'\n'.join(fillded_fields)}

Here is possible fields:
{subject_str}
{fields_str}

Put the response into ```json``` with schema:

```
{{
    "messaage": string // Text message
}}

""")

    response_data = select_json_block(response)
    return response_data
    

In [399]:
next_dialog_step(subject_details, subject['fields'], worskheet_field_storage)

```json
{
    "message": "Great, thank you. Now, can you tell me your last names?"
}
```

{'message': 'Great, thank you. Now, can you tell me your last names?'}

In [387]:
for subject, fields in worskheet_field_storage.items():
    print(f'{subject}:')
    for field, value in fields.items():
        print(f'   - {field}: {value}')

personal_info:
   - first_names: Jaez


In [392]:
print('\n'.join([c['text'] for c in conversation if c['speaker'] == 'Recruiter']))

Good morning, thank you for coming in today. We’re just going to fill out this worksheet with your information. Let’s start with your first names. Can you please tell me your first names?
Okay, Juan. Do you have any other first names?
Great, thank you. Now, can you tell me your last names?
Just Diaz? Usually, people from your region have two last names.
Thank you. What is your gender?
Perfect. And your ID number?
I think there's a mistake. The format should be ###-########-#. Could you check it again?
Great, thank you. Do you have a driver's license?
And what category is your driver’s license?
Alright. Now, your date of birth, please.
Thank you. Can you also tell me your place of birth?
Buenos Aires, got it. And your nationality?
Perfect. What is your marital status? Are you married, single, or in a civil union?
Thank you. That covers your personal information. Let's move on to the next section.


In [393]:
Can you please tell me your first names?

Great, thank you. Now, can you tell me your last names?

Thank you. What is your gender?

Perfect. And your ID number?

Alright. Now, your date of birth, please.

Thank you. Can you also tell me your place of birth?


Object `names` not found.


In [364]:
worsheet_result

{'personal_info': {'first_names': 'Jaez'},
 'incomplete_field_error': [{'field_name': 'birth_date',
   'current_value': 'September 12',
   'required_details': 'Year of birth'}]}

In [None]:
print(build_schema_prompt(yml_schema, 'personal_info'))

In [316]:
build_schema(yml_schema)['personal_info']

{'first_names': {'type': 'string'},
 'last_names': {'type': 'string'},
 'gender': {'type': 'string'},
 'id_number': {'type': 'string'},
 'has_drivers_license': {'type': 'boolean'},
 'license_category': {'type': 'string',
  'enum': ['A', 'B', 'C', 'D', 'E', 'F']},
 'birth_date': {'type': 'string'},
 'birth_place': {'type': 'string'},
 'nationality': {'type': 'string'},
 'marital_status': {'type': 'string',
  'enum': ['Married', 'Single', 'Civil union']}}

In [None]:
print(build_schema_prompt(yml_schema))

In [15]:
from memoization import get_gpt

In [None]:
get_gpt('kke')

In [42]:
speech = """
Ladies and Gentlemen,

Good day! My name is Juan Carlos Mendoza. Born on September 15, in Guadalajara, Jalisco, a city celebrated for its vibrant culture and community spirit, I am here today to express my interest in joining your esteemed security team.

Having grown up in Tlaquepaque, a district known for its bustling activity and close-knit community, I have developed a strong sense of responsibility towards maintaining safety and order. My current residence at 1020 Avenida Revolución places me at the center of the city’s dynamics, allowing me to stay connected and responsive to the local pulse.

With several years of experience in security roles, my background equips me with the skills necessary to effectively manage and mitigate risks. My approach combines keen observation skills, quick problem-solving abilities, and a calm demeanor, essential qualities for ensuring a safe environment.

My commitment is to the safety and well-being of our community. I am prepared to bring dedication, vigilance, and integrity to this role, ensuring that our environment remains secure and that our people feel safe. It is my goal to contribute positively and significantly to your team, leveraging my local insights and security expertise.

I am eager to discuss how I can contribute to your goals and become an integral part of your security operations. Thank you for considering my application. I look forward to the possibility of working together to maintain and enhance the safety standards that our community relies on.

Thank you for your attention.

This speech should align more closely with the context of applying for a security position, emphasizing relevant skills and local knowledge.
"""

In [272]:

def fill_worksheet(subject: str, yml_schema: str, speech: str, version: str = 'v1', temperature = 0):
    possible_fields = build_schema_prompt(yml_schema, 'personal_info')
    
    prompt = f"""
    Please find entities at speech of Latin America candidate on security guy position.

    Here is possible fields:
    {possible_fields}

    There is no required strict format of filling, answers of candidate should be correct by sence. Also you can build final field value by
    multiple answers if they was given separately at different messages.
    
    If there is error at fields, for instance candidate filed only part of field, but not specified the rest part, you should say about this at 
    error message. 
    
    Please put null value at field at subject block if meets one of this conditions:
    - field has validation error
    - field was not specified
    
    Here is validation error field schema:
    
    errors: array
        - field: <field_name>
        - value: <wrong field value>
        - message: <validation error message>
    
    If field was missed do not recognize it as an validation error.
    
    Here is conversation:
    {speech}
    
    
    Please put result info json format, like this ```json```.
    """

    resp = get_gpt(prompt, temperature=temperature)

    print(f'Temperature: {temperature}')
    print(prompt)
    print()

    json_data = select_json_block(resp)
    return json_data

In [47]:
resp = get_gpt(prompt)

In [273]:
import yaml
import re


def select_json_block(text: str):
    match = re.search(r"```json\n([\s\S]*?)\n```", text)
    if match:
        json_data = match.group(1)
    else:
        raise ValueError("No valid JSON data found in the string.")

    return json.loads(json_data)


class MemoryStorage:
    def __init__(self):
        self.data = {}
        self.conversation_history = {}

    def update_data(self, subject, field, value):
        if subject not in self.data:
            self.data[subject] = {}
        self.data[subject][field] = value
        if subject not in self.conversation_history:
            self.conversation_history[subject] = {}
        if field not in self.conversation_history[subject]:
            self.conversation_history[subject][field] = []
        self.conversation_history[subject][field].append(value)

    def get_data(self):
        return self.data

    def get_conversation_history(self):
        return self.conversation_history

    def get_field_value(self, subject, field):
        return self.data.get(subject, {}).get(field, None)

    def get_field_history(self, subject, field):
        return self.conversation_history.get(subject, {}).get(field, [])

    def update_field_value_with_history(self, subject, field, value):
        # If there is existing history for this field, concatenate the new value appropriately
        history = self.get_field_history(subject, field)
        if history:
            # Example handling for birth_date where we need a complete date
            if field == "birth_date":
                if len(history) == 1 and '-' not in history[0]:
                    # Assume the first entry is month and day, and the new value is year
                    full_date = f"{value}-{history[0]}"
                    self.update_data(subject, field, full_date)
                elif len(history) == 2 and '-' not in history[1]:
                    # Assume the second entry is year
                    full_date = f"{history[1]}-{value}"
                    self.update_data(subject, field, full_date)
            else:
                # For other fields, concatenate values as needed
                combined_value = " ".join(history) + " " + value
                self.update_data(subject, field, combined_value)
        else:
            self.update_data(subject, field, value)

# Example of handling a response with errors and incomplete fields
def handle_response(memory_storage, response):
    data = response.get("personal_info", {})
    errors = response.get("errors", [])
    
    for field, value in data.items():
        if value is not None:
            memory_storage.update_data("personal_info", field, value)
    
    for error in errors:
        field = error["field"]
        value = error["value"]
        message = error["message"]
        print(f"Error with field '{field}': {message}")
        if field == "birth_date":
            print("Please provide the full date of birth (year, month, day).")
    
    return memory_storage.get_data()

In [46]:
memory_storage = MemoryStorage()

In [53]:
json_data = select_json_block(resp)

handle_response(memory_storage, json_data)

Error with field 'birth_date': Full date of birth (year, month, day) is required
Please provide the full date of birth (year, month, day).


{'personal_info': {'first_names': 'Juan Carlos',
  'last_names': 'Mendoza',
  'birth_date': 'September 15',
  'birth_place': 'Guadalajara, Jalisco'}}

{'personal_info': {'first_names': 'Juan Carlos',
  'last_names': 'Mendoza',
  'gender': None,
  'id_number': None,
  'has_drivers_license': None,
  'license_category': None,
  'birth_date': 'September 15',
  'birth_place': 'Guadalajara, Jalisco',
  'nationality': None,
  'marital_status': None},
 'errors': [{'field': 'birth_date',
   'value': 'September 15',
   'message': 'Full date of birth (year, month, day) is required'}]}

In [72]:
import json

with open('./dataset/dialogue.json', 'r') as f:
    dialogue_json = json.loads(f.read())

In [None]:
conversation = dialogue_json['dialogues'][0]['conversation']
conversation

In [None]:
conversation

In [None]:
dialogue_json

In [274]:
import yaml

class MemoryStorage:
    def __init__(self):
        self.data = {}

    def update_data(self, key, value):
        self.data[key] = value

    def get_data(self):
        return self.data

    def get_by_key(self, key):
        return self.data.get(key, {})

    def update_by_key(self, key, update):
        current_data = self.get_by_key(key)
        current_data.update(update)
        self.update_data(key, current_data)

class ConversationHandler:
    def __init__(self, storage):
        self.storage = storage

    def update_conversation(self, subject, from_, text):
        key = "conversation_history"
        conversation_history = self.storage.get_by_key(key)
        if subject not in conversation_history:
            conversation_history[subject] = []
        conversation_history[subject].append({"from": from_, "text": text})
        self.storage.update_data(key, conversation_history)

    def get_conversation_history(self):
        return self.storage.get_by_key("conversation_history")

class DataManager:
    def __init__(self, storage):
        self.storage = storage

    def update_field_value(self, subject, field, value):
        personal_info = self.storage.get_by_key(subject)
        personal_info[field] = value
        self.storage.update_data(subject, personal_info)

# Example of handling a response with errors and incomplete fields
def handle_response(data_manager, response):
    data = response.get("personal_info", {})
    errors = response.get("errors", [])
    
    for field, value in data.items():
        if value is not None:
            data_manager.update_field_value("personal_info", field, value)
    
    for error in errors:
        field = error["field"]
        value = error["value"]
        message = error["message"]
        print(f"Error with field '{field}': {message}")
        if field == "birth_date":
            print("Please provide the full date of birth (year, month, day).")
    
    return data_manager.storage.get_by_key("personal_info")

In [96]:
response

{'personal_info': {'first_names': None,
  'last_names': None,
  'gender': None,
  'id_number': None,
  'has_drivers_license': None,
  'license_category': None,
  'birth_date': '1985-04-15',
  'birth_place': None,
  'nationality': None,
  'marital_status': None},
 'errors': []}

In [131]:
offset = 18
max_items = 2

conversation_str = '\n'.join([f'{c['speaker']}: {c['text']}' for c in conversation[offset:offset+max_items]])

print(conversation_str)
print()
response = fill_worksheet('personal_info', yml_schema, conversation_str)

Recruiter: Alright. Now, your date of birth, please.
Security Guard: It’s 15th of April, 1985.



In [129]:
memory_storage = MemoryStorage()
conversation_handler = ConversationHandler(memory_storage)
data_manager = DataManager(memory_storage)



In [132]:
conversation_handler = ConversationHandler(memory_storage)
data_manager = DataManager(memory_storage)


updated_data = handle_response(data_manager, response)
print(updated_data)


{'has_drivers_license': True, 'birth_date': '1985-04-15'}


In [127]:
memory_storage.get_data()

{'personal_info': {'birth_date': ['1985-04-15'],
  'has_drivers_license': [True]}}

In [133]:
updated_data

{'has_drivers_license': True, 'birth_date': '1985-04-15'}

In [135]:
security_dialogs = [ c['text'] for c in conversation if c['speaker'] == 'Security Guard']

In [275]:
def handle_worksheet_filling(input_text):
    response = fill_worksheet('personal_info', yml_schema, input_text, temperature=0.4)
    updated_data = handle_response(data_manager, response)
    return updated_data, response['errors']

In [None]:
conversation

In [142]:
security_dialogs

['Uh, yes. My first name is Juan.',
 'Ah, yes. Sorry. Juan Carlos.',
 'It’s Diaz.',
 'Oh, right. It’s Diaz Lopez.',
 'I am male.',
 "It's 123-4567890-1.",
 'Oh, my bad. It’s 123-456789-1.',
 'Yes, I do.',
 'Umm, I think it’s B.',
 'It’s 15th of April, 1985.',
 'I was born in Buenos Aires.',
 'I am Argentinean.',
 'I am married.']

In [164]:
updated_data, errors = handle_worksheet_filling('I have license category')

Error with field 'license_category': License category specified without a valid category.


In [165]:
errors

[{'field': 'license_category',
  'value': None,
  'message': 'License category specified without a valid category.'}]

In [276]:
def build_error_prompt(error: dict):
    return f"""
- {error['field']}: {error['value']}
  error: {error['message']}
"""

def build_data_string(data):
    result = []
    for subject, fields in data.items():
        result.append(f"- {subject}")
        for field, value in fields.items():
            result.append(f"     {field}: {value}")
    return "\n".join(result)

In [168]:
print(build_error_prompt(errors[0]))


- license_category: None
  error: License category specified without a valid category.



In [277]:
def get_error_instruction(error: dict):
    field_name = error['field']

    response = get_gpt(f"""
You are recruiter that need to get worksheet information from candidate to security gut position at Latin America. 

Security Guard: "I have license category"

Field error:

{build_error_prompt(error)}

Field info:
{build_field_prompt(yml_schema, field_name)}

Conversation history:
{get_last_n_messages(conversation_handler.get_conversation_history(), "1", 10)}

Ask for correct answer.

Here is examples:

Security Guard: Uh, yes. My first name is Juan.
Recruiter: Okay, Juan. Do you have any other first names?

Put the response into ```json``` with schema:

```
{{
    "text": string // Text message
}}
    """)
    
    data = select_json_block(response)
    return data['text']
    

In [176]:
print(build_data_string(memory_storage.get_data()))

- personal_info
     has_drivers_license: True
     birth_date: 1985-04-15
     gender: M
     birth_place: Buenos Aires


In [278]:
def next_steps(memory_storage, subject: str):
    possible_fields = build_schema_prompt(yml_schema, subject)
    
    response = get_gpt(f"""
    You are recruiter that need to get worksheet information from candidate to security gut position at Latin America. 
    
    Possible fields:
    {possible_fields} 
    
    Filled fields:
    {build_data_string(memory_storage.get_data())}
    
    Please ask for the filling another field.
    
    Here is examples:
    
    - Okay, Juan. Do you have any other first names?
    - Great, thank you. Now, can you tell me your last names?
    - Perfect. And your ID number?
    - Alright. Now, your date of birth, please.
    
    Put the response into ```json``` with schema:
    
    ```
    {{
        "text": string // Text message
    }}
    """)
    data = select_json_block(response)
    return data['text']
    

In [279]:
def get_field_info(schema, field_name):
    for subject, fields in schema.items():
        if field_name in fields:
            return {
                "subject": subject,
                "field": field_name,
                "info": fields[field_name]
            }
    return None

def build_field_prompt(yaml_content, field_name: str):
    parsed_yaml = yaml.safe_load(yaml_content)
    
    for subject, content in parsed_yaml['subjects'].items():
        for field in content['fields']:
            if field['name'] == field_name:
                field_schema = field['schema']
                prompt = [
                    f"- {field['name']}: {field['description']}",
                    f"   required: {field['required']}",
                    f"   schema: {field_schema}"
                ]
                return "\n".join(prompt)
    
    return f"Field '{field_name}' not found in the schema."

In [154]:
field_name = 'license_category'
field_info = get_field_info(build_schema(yml_schema), field_name)

In [156]:
field_info

{'subject': 'personal_info',
 'field': 'license_category',
 'info': {'type': 'string', 'enum': ['A', 'B', 'C', 'D', 'E', 'F']}}

- license_category: Driver's license category
   required: False
   schema: {'type': 'string', 'enum': ['A', 'B', 'C', 'D', 'E', 'F']}


In [None]:
if field_info:
    print(f"Field '{field_name}' information:")
    print(f"Subject: {field_info['subject']}")
    print(f"Description: {field_info['info']['description']}")
    print(f"Required: {field_info['info']['required']}")
    print(f"Example: {field_info['info']['example']}")
    print(f"Schema: {field_info['info']['schema']}")
else:
    print(f"Field '{field_name}' not found in the schema.")

In [145]:
conversation

[{'speaker': 'Recruiter',
  'text': 'Good morning, thank you for coming in today. We’re just going to fill out this worksheet with your information. Let’s start with your first names. Can you please tell me your first names?'},
 {'speaker': 'Security Guard', 'text': 'Uh, yes. My first name is Juan.'},
 {'speaker': 'Recruiter',
  'text': 'Okay, Juan. Do you have any other first names?'},
 {'speaker': 'Security Guard', 'text': 'Ah, yes. Sorry. Juan Carlos.'},
 {'speaker': 'Recruiter',
  'text': 'Great, thank you. Now, can you tell me your last names?'},
 {'speaker': 'Security Guard', 'text': 'It’s Diaz.'},
 {'speaker': 'Recruiter',
  'text': 'Just Diaz? Usually, people from your region have two last names.'},
 {'speaker': 'Security Guard', 'text': 'Oh, right. It’s Diaz Lopez.'},
 {'speaker': 'Recruiter', 'text': 'Thank you. What is your gender?'},
 {'speaker': 'Security Guard', 'text': 'I am male.'},
 {'speaker': 'Recruiter', 'text': 'Perfect. And your ID number?'},
 {'speaker': 'Securit

In [None]:
yml_schema

In [266]:
memory_storage = MemoryStorage()
conversation_handler = ConversationHandler(memory_storage)
data_manager = DataManager(memory_storage)

In [280]:
import gradio as gr


hist = None
def echo(message, history):
    conversation_handler.update_conversation('1', 'Security Guard', message)
    hist = history
    conversation_history_str = '\n'.join(get_last_n_messages(conversation_handler.get_conversation_history(), "1", 10))
    updated_data, errors = handle_worksheet_filling(conversation_history_str)

    if len(errors) > 0:
        response = get_error_instruction(errors[0])
        conversation_handler.update_conversation('1', 'Recruiter', response)
        return response

    response = next_steps(memory_storage, 'personal_info')
    conversation_handler.update_conversation('1', 'Recruiter', response)
    return response
    

demo = gr.ChatInterface(fn=echo, examples=["hello", "hola", "merhaba"], title="Echo Bot", chatbot=gr.Chatbot(height=300))
demo.launch()

Running on local URL:  http://127.0.0.1:7881

To create a public link, set `share=True` in `launch()`.




```json
{
  "personal_info": {
    "first_names": "Jane",
    "last_names": null,
    "gender": "Female",
    "id_number": "123-4567890-1",
    "has_drivers_license": null,
    "license_category": null,
    "birth_date": null,
    "birth_place": null,
    "nationality": null,
    "marital_status": null
  },
  "errors": []
}
```Temperature: 0.4

    Please find entities at speech of Latin America candidate on security guy position.

    Here is possible fields:
    personal_info: Personal information about the individual
  - first_names: string
    description: First names (can be multiple in Hispanic cultures)
  - last_names: string
    description: Last name of security guard
  - gender: string
    description: Gender (Male or Female)
  - id_number: string
    description: ID number. Format ###-########-#, last digit is a checksum
  - has_drivers_license: boolean
    description: Do you have a driver's license?
  - license_category: string
    description: Driver's license category
  

In [205]:
def get_last_n_messages(messages, id, n):
    last_n_messages = messages[str(id)][-n:]  # Get the last N messages
    formatted_messages = [f"{message['from']}: {message['text']}" for message in last_n_messages]
    return formatted_messages

In [201]:
conversation_handler.update_conversation('1', 'recruiter', 'lol')

In [215]:
for msg in conversation_handler.get_conversation_history()['1']:
    print(f'{msg['from']}: {msg['text']}')
    print()

recruiter: lol

Security Guard: My name is jane Diaz

Recruiter: Great, thank you, Jane. Can you please provide your gender? (M or F)

Security Guard: male

Recruiter: Perfect. And your ID number?

Security Guard: 123

Recruiter: Thank you for providing your name, Jane. It seems there was an error with the first names field. Could you please confirm your first names? For example, if you have multiple first names, please list them all.

Security Guard: My name is Jane Diaz

Recruiter: Alright, Jane. Now, can you please provide your ID number in the correct format? It should be ###-########-#.

Security Guard: 123

Recruiter: Thank you for providing your name, Jane. It seems there was an error with the first names field. Could you please confirm your first names? For example, if you have multiple first names, please list them all.



In [None]:
print('\n'.join(get_last_n_messages(conversation_handler.get_conversation_history(), "1", 10)))

In [None]:
memory_storage.get_data()

In [263]:
memory_storage

[0;31mType:[0m        MemoryStorage
[0;31mString form:[0m <__main__.MemoryStorage object at 0x138309b50>
[0;31mDocstring:[0m   <no docstring>

In [267]:
memory_storage.update_data("personal_info", {'last_names': 'Paul Diez', 'first_names': 'Jane'})

```json
{
  "personal_info": {
    "birth_date": "1990-09-14"
  },
  "incomplete_fields": []
}
```

'```json\n{\n  "personal_info": {\n    "birth_date": "1990-09-14"\n  },\n  "incomplete_fields": []\n}\n```'

In [290]:
Recruiter: Alright, it seems there was an error with the date format. Can you please provide your full date of birth in the format year, month, day? For example, 1990-09-14.
Security Guard: I was born at 1983

    
    Here is validation error field schema:
    
    errors: array
        - field: <field_name>
        - value: <wrong field value>
        - message: <validation error message>

                
    Please put null value at field at subject block if meets one of this conditions:
    - field has validation error
    - field was not specified

    
    If field was missed do not recognize it as an validation error.


SyntaxError: leading zeros in decimal integer literals are not permitted; use an 0o prefix for octal integers (3339983748.py, line 1)