In [None]:
from dotenv import load_dotenv
import pandas as pd
from langchain_openai import AzureChatOpenAI
from themefinder import find_themes

In [None]:
# Load LLM API settings from .env file
load_dotenv()

# Initialise LLM of choice using langchain
llm = AzureChatOpenAI(
    model="gpt-4o",
    temperature=0,
    model_kwargs={"response_format": {"type": "json_object"}},
)

In [None]:
# Set up data
file_name = "detailed_synthetic_data"

responses_df = pd.read_json(f"../example_data/inputs/{file_name}.json")

responses_df = responses_df.rename(columns={"id": "response_id"})

responses_df

In [None]:
# Add your question
question = "What are your views on the building of a new nuclear power plant in your town?"

# Make the system prompt specific to your use case
system_prompt = "You are an AI evaluation tool analyzing public responses to a consultation."

In [None]:
# And get themefinder results!
result = await find_themes(responses_df, llm, question, system_prompt)

result

In [None]:
# And get the mappings

mappings_df = result["mapping"]

mappings_df

In [None]:
# And get the themes
topics_df = result["refined_topics"]
topics_df = topics_df.transpose().reset_index()
topics_df = topics_df.rename(columns={"topic_id": "labels", 0: "themes"})

topics_df

In [None]:
# Now match and write out to file
df = mappings_df.explode(["stances", "labels"])
df = df.merge(topics_df, on="labels", how="left")
df = df.drop(columns=["labels", "reasons"])

df

In [None]:
# Write out to file

df.to_json(f"../example_data/outputs/{file_name}_mapped.json", orient="records")