In [1]:
from validmind.models import FoundationModel, Prompt

In [2]:
import os

import dotenv
dotenv.load_dotenv()

if os.getenv("OPENAI_API_KEY") is None:
    raise Exception("OPENAI_API_KEY not found")

In [3]:
import openai

def call_model(prompt):
    return openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "user", "content": prompt},
        ]
    ).choices[0].message["content"]

In [4]:
prompt_template = """
You are an AI with expertise in sentiment analysis, particularly in the context of financial news.
Your task is to analyze the sentiment of a specific sentence provided below.
Before proceeding, take a moment to understand the context and nuances of the financial terminology used in the sentence.

Sentence to Analyze:
```
{Sentence}
```

Please respond with the sentiment of the sentence denoted by one of either 'positive', 'negative', or 'neutral'.
Please respond only with the sentiment enum value. Do not include any other text in your response.

Note: Ensure that your analysis is based on the content of the sentence and not on external information or assumptions.
""".strip()

prompt_variables = ["Sentence"]

In [5]:
import pandas as pd

df = pd.read_csv('./datasets/sentiments.csv')

df_test = df[:10].reset_index(drop=True)
df_test

Unnamed: 0,Sentiment,Sentence
0,neutral,"According to Gran , the company has no plans t..."
1,neutral,Technopolis plans to develop in stages an area...
2,negative,The international electronic industry company ...
3,positive,With the new production plant the company woul...
4,positive,According to the company 's updated strategy f...
5,positive,FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...
6,positive,"For the last quarter of 2010 , Componenta 's n..."
7,positive,"In the third quarter of 2010 , net sales incre..."
8,positive,Operating profit rose to EUR 13.1 mn from EUR ...
9,positive,"Operating profit totalled EUR 21.1 mn , up fro..."


In [6]:
import validmind as vm

vm.init(
    api_host="http://localhost:3000/api/v1/tracking",
    project="clkh0yg4r00cgg9ryfwfy8pqo",
)

2023-08-17 17:29:20,284 - INFO(validmind.api_client): Connected to ValidMind. Project: Demo project 2 (clkh0yg4r00cgg9ryfwfy8pqo)


In [7]:
vm_dataset = vm.init_dataset(
    dataset=df,
    text_column="Sentence",
    target_column="Sentiment",
)

vm_test_ds = vm.init_dataset(
    dataset=df_test,
    text_column="Sentence",
    target_column="Sentiment",
)

vm_model = FoundationModel(
    predict_fn=call_model,
    prompt=Prompt(
        prompt_template=prompt_template,
        prompt_variables=prompt_variables,
    ),
    train_ds=vm_test_ds,
    test_ds=vm_test_ds,
)

2023-08-17 17:29:20,340 - INFO(validmind.client): Pandas dataset detected. Initializing VM Dataset instance...
2023-08-17 17:29:20,355 - INFO(validmind.client): Pandas dataset detected. Initializing VM Dataset instance...


In [8]:
vm.run_test_suite(
    "binary_classifier_full_suite",
    model=vm_model,
    dataset=vm_dataset,
)

HBox(children=(Label(value='Running test suite...'), IntProgress(value=0, max=58)))

2023-08-17 17:29:29,550 - ERROR(validmind.vm_models.test_plan): Failed to run test 'dataset_description': Unsupported field type found when computing its histogram: Text
2023-08-17 17:29:29,551 - ERROR(validmind.vm_models.test_plan): Failed to run test 'descriptive_statistics': Cannot describe a DataFrame without columns
2023-08-17 17:29:31,643 - ERROR(validmind.vm_models.test_plan): Failed to run test 'model_metadata': 'Foundation'

Note that pos_label (set to 'negative') is ignored when average != 'binary' (got 'micro'). You may use labels=[pos_label] to specify a single positive class.


Note that pos_label (set to 'negative') is ignored when average != 'binary' (got 'micro'). You may use labels=[pos_label] to specify a single positive class.


Note that pos_label (set to 'negative') is ignored when average != 'binary' (got 'micro'). You may use labels=[pos_label] to specify a single positive class.


Note that pos_label (set to 'negative') is ignored when average != 'binary' (got '

VBox(children=(HTML(value='<h2>Test Suite Results: <i style="color: #DE257E">Binary Classifier Full Suite</i><…

BinaryClassifierFullSuite(config=None, _global_config=None, _test_configs=None, test_context=TestContext(dataset=DataFrameDataset(_raw_dataset=array([['neutral',
        'According to Gran , the company has no plans to move all production to Russia , although that is where the company is growing .'],
       ['neutral',
        'Technopolis plans to develop in stages an area of no less than 100,000 square meters in order to host companies working in computer technologies and telecommunications , the statement said .'],
       ['negative',
        'The international electronic industry company Elcoteq has laid off tens of employees from its Tallinn facility ; contrary to earlier layoffs the company contracted the ranks of its office workers , the daily Postimees reported .'],
       ...,
       ['negative',
        'Operating profit fell to EUR 35.4 mn from EUR 68.8 mn in 2007 , including vessel sales gain of EUR 12.3 mn .'],
       ['negative',
        'Net sales of the Paper segment de