In [30]:
import os, sys
import json
import pandas as pd

pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

In [2]:
from dotenv import load_dotenv

load_dotenv("../../../../settings/.env", override=True)

True

In [3]:
from IPython.display import Markdown, display

def printmd(string):
    display(Markdown(string))

In [4]:
sys.path.insert(0, '../../../../05 Own Solutions/AIChampTools')

import importlib

from AIChampTools import AIChampTools, LLMUsage, PromptEngineeringExperiment, LLMMistral, LLMOpenAI
prevent_output = importlib.reload(sys.modules['AIChampTools'])

In [5]:
experiment = PromptEngineeringExperiment(
    name="MistralAI_1st_try",
    logs_folder="../../logs/",
)

# Experiment Report

Playing around with Mistral AI models.

## Version 07

Checking how good is Mistral in generating json and how json output affects the length of the completion.

In [72]:
ver07_results_nf = experiment.load_results(ver="07")

ver07_results_nf.columns

ver07_results_nf["llm_params"] = ver07_results_nf["llm_params"].apply(json.dumps)
ver07_results_nf["data"] = ver07_results_nf["data"].apply(json.dumps)
ver07_results_nf["messages_template"] = ver07_results_nf["messages_template"].apply(str)

for i, data in enumerate(ver07_results_nf["data"].unique()):
    completions = ver07_results_nf[ver07_results_nf["data"]==data]
    usage_df = pd.json_normalize(completions['llm_usage'])
    valid_df = pd.json_normalize(completions['assessor.assess_json_valid'].apply(lambda x: json.loads(x) if pd.notnull(x) else x))
    len_df = pd.json_normalize(completions['assessor.assess_len'].apply(lambda x: json.loads(x) if pd.notnull(x) else x))
    
    printmd(f'### Variation {i+1}')
    printmd(f'**Messages template**: {list(completions["messages_template"].unique())}')
    printmd(f'**User Messages**: {completions["data"].apply(str).unique()}')
    printmd(f'**LLM Params**: {completions["llm_params"].apply(str).unique()}')
    printmd(f'**Completions in the experiment**: {len(completions)}')
    printmd(f'**Valid JSON\'s**: {len(valid_df[valid_df["valid"]==True])/len(valid_df)*100:.2f}%')#.value_counts(normalize=True).get(True, 0)
    printmd(f'**Length (characters)**: {int(len_df["len"].mean())}')
    


### Variation 1

**Messages template**: ["[{'role': 'system', 'content': '\\n            Act as a professional career consultant with 20 years of experience.\\n            \\n            Your objective is to help the Human create their CV.\\n            \\n            Collect information from them for you to be able to create a CV for them.\\n\\n            {json_mode_toggle}\\n        '}, {'role': 'user', 'content': 'hi'}]"]

**User Messages**: ['{"json_mode_toggle": "\\n            Respond in a valid JSON with the following fields (and only them):\\n            - \\"ai_message\\": your message (full message including the questions)\\n            - \\"status\\", the only possible values:\\n                - \\"collecting_missing_information\\": before you\'ve received the missing information from the human\\n                - \\"completed\\": once you\'ve collected the information\\n        "}']

**LLM Params**: ['{"model": "mistral-tiny", "temperature": 0, "n": 30}'
 '{"model": "mistral-tiny", "temperature": 0, "n": 50}']

**Completions in the experiment**: 80

**Valid JSON's**: 91.25%

**Length (characters)**: 853

### Variation 2

**Messages template**: ["[{'role': 'system', 'content': '\\n            Act as a professional career consultant with 20 years of experience.\\n            \\n            Your objective is to help the Human create their CV.\\n            \\n            Collect information from them for you to be able to create a CV for them.\\n\\n            {json_mode_toggle}\\n        '}, {'role': 'user', 'content': 'hi'}]"]

**User Messages**: ['{"json_mode_toggle": ""}']

**LLM Params**: ['{"model": "mistral-tiny", "temperature": 0, "n": 30}'
 '{"model": "mistral-tiny", "temperature": 0, "n": 50}']

**Completions in the experiment**: 80

**Valid JSON's**: 0.00%

**Length (characters)**: 1515

We can see that even the tiny Mistral model is great at outputting a valid json (considering its price and speed you can do additional call when the first json was not valid).

Just like in <a href="https://github.com/TonySimonovsky/prompt_engineering_experiments/blob/main/experiments/OpenAIAttentionGrab/OpenAI%20Attention%20Grab%20(report).ipynb">OpenAI gpt-3.5</a>, json output significantly decreases he length of the completion.