In [None]:
import pandas as pd
import themefinder
from langchain_openai import AzureChatOpenAI
import string

In [None]:
# Define the question the responses are answering and load the response data

question = "What improvements would you most like to see in local public transportation?"

responses = pd.read_json("./example_data.json")

In [None]:
# Check the dataframe has the following columns: response_id, response
# response_ids should start from 1
responses

In [None]:
# Let's look at some sample responses to understand the variety of input data
print("Sample responses about public transportation improvements:\n")
for i in range(10):
    print(f"{responses.iloc[i]['response_id']}: {responses.iloc[i]['response']}")
    print()

# Draft Email for IT Department - API Key Request

**Subject:** Request for Azure OpenAI API Access for ThemeFinder Project

---

**To:** IT Support / Cloud Services Team  
**From:** [Your Name]  
**Date:** [Today's Date]  
**Subject:** Request for Azure OpenAI API Access for ThemeFinder Project

Dear IT Team,

I am working on implementing **ThemeFinder**, an AI-powered topic modeling tool for analyzing survey responses and public consultation feedback. This tool will help our department process citizen feedback more efficiently and identify key themes in public responses.

**Project Overview:**
- **Tool:** ThemeFinder (Python package for topic modeling)
- **Purpose:** Automated analysis of survey responses and consultation data
- **Use Case:** Processing public feedback to identify themes and sentiment for policy makers
- **Repository:** https://github.com/i-dot-ai/themefinder/

**Required API Access:**
I need access to Azure OpenAI services with the following configuration:

**Option 1: Azure OpenAI (Preferred)**
- `AZURE_OPENAI_API_KEY` - API key for our Azure OpenAI resource
- `AZURE_OPENAI_ENDPOINT` - Endpoint URL (format: https://your-resource-name.openai.azure.com/)
- `OPENAI_API_VERSION` - API version (recommend: 2024-02-15-preview)
- `DEPLOYMENT_NAME` - Name of GPT-4o deployment
- `AZURE_OPENAI_BASE_URL` - Base URL (same as endpoint)

**Option 2: OpenAI Direct (Alternative)**
- `OPENAI_API_KEY` - Direct OpenAI API key

**Technical Requirements:**
- Model access to GPT-4o or GPT-4 for optimal performance
- The tool supports structured outputs and works with various LLM providers
- Estimated usage: [specify your expected volume, e.g., "processing 200-500 responses per analysis"]
- Environment: Development container setup with secure credential management

**Security Considerations:**
- All API keys will be stored in `.env` files (not committed to version control)
- Development environment uses secure container isolation
- Tool is designed for government/public sector use with appropriate data handling

**Business Justification:**
This tool will significantly improve our ability to:
- Process large volumes of public consultation responses efficiently
- Identify key themes and sentiment in citizen feedback automatically
- Provide structured insights for policy decision-making
- Reduce manual analysis time from days/weeks to hours

**Next Steps:**
Could you please advise on:
1. The process for requesting Azure OpenAI access for our department
2. Any security approvals or data governance requirements
3. Whether we have existing Azure OpenAI resources I can use
4. Timeline for provisioning access

I'm happy to provide additional technical details or meet to discuss this request further.

Thank you for your assistance.

Best regards,  
[Your Name]  
[Your Department]  
[Contact Information]

---

**Attachments to consider including:**
- Link to ThemeFinder documentation: https://i-dot-ai.github.io/themefinder/
- This notebook demonstrating the tool's capabilities

In [None]:
# Create the LLM object for your use-case e.g. ChatGoogleGenerativeAI if using Google's Gemini or ChatAnthropic for Claude
# NOTE: make sure your .env file is correctly set up with the correct API key/any other variables you need

llm = AzureChatOpenAI(
    model_name="gpt-4o",
    temperature=0
)

In [None]:
# Running the whole pipeline end-to-end in one go.
results = await themefinder.find_themes(
    responses, 
    llm=llm, 
    question=question,
    )

In [None]:
results["themes"]

In [None]:
# The results of each stage of the pipeline can be viewed by accessing the keys of the returned dictionary e.g.
results["themes"]
# or
results["mapping"]

In [None]:
results["themes"]

In [None]:
# If you want to modify the themes generated by the LLM such as merging similar themes or adding in new themes such as a default fallback theme like "Other", this can be done by directly modifying the themes and feeding them into the mapping stage of the pipeline.
from themefinder import theme_mapping

themes = results["themes"][["topic_id", "topic"]].copy()
themes.loc[len(themes)] = {"topic_id": string.ascii_uppercase[len(themes)], "topic": "Other: The response does not match any of the listed themes"}

In [None]:
# It is possible for an LLM to be unable to process a response, if is too long or violates the models content filters, these responses can be reviewed in the 2nd element of the returned object for each task
mapping, unprocessed = await theme_mapping(
    responses,
    llm=llm,
    refined_themes_df=themes,
    question=question,
)

In [None]:
# To export the mapping to a spreadhseet
mapping.to_excel("mapping.xlsx")