In [1]:
from validmind.models import FoundationModel, Prompt

In [2]:
import os

import dotenv
dotenv.load_dotenv()

if os.getenv("OPENAI_API_KEY") is None:
    raise Exception("OPENAI_API_KEY not found")

In [3]:
import openai

def call_model(prompt):
    return openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "user", "content": prompt},
        ]
    ).choices[0].message["content"]

In [4]:
prompt_template = """
You are an AI with expertise in sentiment analysis, particularly in the context of financial news.
Your task is to analyze the sentiment of a specific sentence provided below.
Before proceeding, take a moment to understand the context and nuances of the financial terminology used in the sentence.

Sentence to Analyze:
```
{Sentence}
```

Please respond with the sentiment of the sentence denoted by one of either 'positive', 'negative', or 'neutral'.
Please respond only with the sentiment enum value. Do not include any other text in your response.

Note: Ensure that your analysis is based on the content of the sentence and not on external information or assumptions.
""".strip()

prompt_variables = ["Sentence"]

In [5]:
import pandas as pd

df = pd.read_csv('./datasets/sentiments.csv')

df_test = df[:10].reset_index(drop=True)
df_test

Unnamed: 0,Sentiment,Sentence
0,neutral,"According to Gran , the company has no plans t..."
1,neutral,Technopolis plans to develop in stages an area...
2,negative,The international electronic industry company ...
3,positive,With the new production plant the company woul...
4,positive,According to the company 's updated strategy f...
5,positive,FINANCING OF ASPOCOMP 'S GROWTH Aspocomp is ag...
6,positive,"For the last quarter of 2010 , Componenta 's n..."
7,positive,"In the third quarter of 2010 , net sales incre..."
8,positive,Operating profit rose to EUR 13.1 mn from EUR ...
9,positive,"Operating profit totalled EUR 21.1 mn , up fro..."


In [6]:
import validmind as vm

vm.init(
    api_host="http://localhost:3000/api/v1/tracking",
    project="clkh0yg4r00cgg9ryfwfy8pqo",
)

2023-08-21 16:50:31,622 - INFO(validmind.api_client): Connected to ValidMind. Project: Demo project 2 (clkh0yg4r00cgg9ryfwfy8pqo)


In [7]:
vm_dataset = vm.init_dataset(
    dataset=df,
    text_column="Sentence",
    target_column="Sentiment",
)

vm_test_ds = vm.init_dataset(
    dataset=df_test,
    text_column="Sentence",
    target_column="Sentiment",
)

vm_model = FoundationModel(
    predict_fn=call_model,
    prompt=Prompt(
        template=prompt_template,
        variables=prompt_variables,
    ),
    train_ds=vm_test_ds,
    test_ds=vm_test_ds,
)

2023-08-21 16:50:31,626 - INFO(validmind.client): Pandas dataset detected. Initializing VM Dataset instance...
2023-08-21 16:50:31,643 - INFO(validmind.client): Pandas dataset detected. Initializing VM Dataset instance...
2023-08-21 16:50:31,650 - INFO(validmind.models.foundation): Running predict() for `train_ds`... This may take a while
2023-08-21 16:50:34,630 - INFO(validmind.models.foundation): Running predict() for `test_ds`... This may take a while


In [8]:
test_suite = vm.run_test_suite(
    "binary_classifier_full_suite",
    model=vm_model,
    dataset=vm_dataset,
)

HBox(children=(Label(value='Running test suite...'), IntProgress(value=0, max=58)))

2023-08-21 16:50:37,914 - ERROR(validmind.vm_models.test_plan): Failed to run test 'descriptive_statistics': (ValueError) Cannot describe a DataFrame without columns

Note that pos_label (set to 'negative') is ignored when average != 'binary' (got 'micro'). You may use labels=[pos_label] to specify a single positive class.


Note that pos_label (set to 'negative') is ignored when average != 'binary' (got 'micro'). You may use labels=[pos_label] to specify a single positive class.


Note that pos_label (set to 'negative') is ignored when average != 'binary' (got 'micro'). You may use labels=[pos_label] to specify a single positive class.


Note that pos_label (set to 'negative') is ignored when average != 'binary' (got 'micro'). You may use labels=[pos_label] to specify a single positive class.


Note that pos_label (set to 'negative') is ignored when average != 'binary' (got 'micro'). You may use labels=[pos_label] to specify a single positive class.


Note that pos_label (set to 'nega

VBox(children=(HTML(value='<h2>Test Suite Results: <i style="color: #DE257E">Binary Classifier Full Suite</i><…