# Example: Using VectorizedOpenAI with Pandas

This notebook demonstrates how to use the `VectorizedOpenAI` client to translate text data within a Pandas DataFrame into multiple languages.

In [1]:
# Import necessary libraries
import pandas as pd
from openai import OpenAI
from openaivec import VectorizedOpenAI
from typing import List
from pydantic import BaseModel

In [2]:
# Initialize VectorizedOpenAI client for French translation
client = VectorizedOpenAI(
    client=OpenAI(),
    model_name="gpt-4o-mini",
    system_message="translate the following text to French",
)

In [3]:
# Define a list of entities to translate
entities: List[str] = ["apple", "banana", "orange", "grape", "kiwi", "mango", "peach", "pear", "pineapple", "strawberry"]

In [4]:
# Create a DataFrame from the entity list
df = pd.DataFrame({"entity": entities})
df

Unnamed: 0,entity
0,apple
1,banana
2,orange
3,grape
4,kiwi
5,mango
6,peach
7,pear
8,pineapple
9,strawberry


In [5]:
# Translate entity to French and add as a new column
df.assign(
    fr=lambda df: client.predict(df["entity"])
)

Unnamed: 0,entity,fr
0,apple,pomme
1,banana,banane
2,orange,orange
3,grape,raisin
4,kiwi,kiwi
5,mango,mangue
6,peach,pêche
7,pear,poire
8,pineapple,ananas
9,strawberry,fraise


## Multi-language Translation Example

The following example demonstrates translating entities into multiple languages simultaneously using the Structured Output feature of `VectorizedOpenAI`. Structured Output allows the model to directly return translations in a structured format defined by a Pydantic model, simplifying data handling and integration with Pandas DataFrames.

In [None]:
# Simplified Pydantic model renamed to clearly indicate its purpose
class Translation(BaseModel):
    en: str
    fr: str
    ja: str
    es: str
    de: str
    it: str
    pt: str
    ru: str

# Initialize VectorizedOpenAI client for multi-language translation
client = VectorizedOpenAI(
    client=OpenAI(),
    model_name="gpt-4o-mini",
    system_message="translate the following text to English, French, Japanese, Spanish, German, Italian, Portuguese, and Russian",
    response_format=Translation,
)

# Translate entity into multiple languages
translations = client.predict(df["entity"])

# Create a DataFrame from the translations
pd.DataFrame([t.model_dump() for t in translations])

Unnamed: 0,en,fr,ja,es,de,it,pt,ru
0,apple,pomme,リンゴ,manzana,Apfel,mela,maçã,яблоко
1,banana,banane,バナナ,plátano,Banane,banana,banana,банан
2,orange,orange,オレンジ,naranja,Orange,arancia,laranja,апельсин
3,grape,raisin,ぶどう,uva,Traube,uva,uva,виноград
4,kiwi,kiwi,キウイ,kiwi,Kiwi,kiwi,kiwi,киви
5,mango,mangue,マンゴー,mango,Mango,mango,manga,манго
6,peach,pêche,桃,durazno,Pfirsich,pesca,pêssego,персик
7,pear,poire,梨,pera,Birne,pera,pera,груша
8,pineapple,ananas,パイナップル,piña,Ananas,ananas,abacaxi,ананас
9,strawberry,fraise,いちご,fresa,Erdbeere,fragola,morango,клубника
