In [None]:
import os

from dotenv import load_dotenv
import pandas as pd
from langchain_openai import OpenAI
from themefinder import find_themes

In [None]:
# Load LLM API settings from .env file
load_dotenv()

# Add params for Albert API
api_key = os.getenv("ALBERT_API")
base_url = "https://albert.api.etalab.gouv.fr/v1"
model = "mistralai/Mixtral-8x7B-Instruct-v0.1"

# Model options
# https://python.langchain.com/docs/integrations/llms/openai/å
# meta-llama/Meta-Llama-3.1-8B-Instruct
# mistralai/Mixtral-8x7B-Instruct-v0.1
# AgentPublic/llama3-instruct-8b
# BAAI/bge-m3
# AgentPublic/llama3-instruct-guillaumetell
# intfloat/multilingual-e5-large
# google/gemma-2-9b-it

# TODO - make this work with Albert API
# Initialise LLM using langchain
llm = OpenAI(api_key=api_key, base_url=base_url)

In [None]:
models = llm.models.list()
for model in models.data:
    print(model.id)

In [None]:
# Set up data
file_name = "detailed_synthetic_data"
responses_df = pd.read_json(f"../example_data/inputs/{file_name}.json")
responses_df = responses_df.rename(columns={"id": "response_id"})
responses_df

In [None]:
# Add your question
question = "Avez-vous des commentaires sur la construction d'une nouvelle centrale nucléaire à Normandie?"

# Make the system prompt specific to your use case 
system_prompt = "Vous êtes un outil d'évaluation de l'IA analysant les réponses du public à une consultation. Respondez en francais."


In [None]:
# And get themefinder results!
result = await find_themes(responses_df, llm, question, system_prompt)

result


In [None]:
# And get the mappings
mappings_df = result["mapping"]
mappings_df


In [None]:
# And get the themes
topics_df = result["refined_topics"]
topics_df = topics_df.transpose().reset_index()
topics_df = topics_df.rename(columns={"topic_id": "labels", 0: "themes"})

topics_df

In [None]:
# Now match and write out to file
df = mappings_df.explode(["stances", "labels"])
df = df.merge(topics_df, on="labels", how="left")
df = df.drop(columns=["labels", "reasons"])

df

In [None]:
# Write out to file

df.to_json(f"../example_data/outputs/albert_{file_name}_mapped.json", orient="records")