In [4]:
import nltk
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer

# Download WordNet data if not already downloaded
nltk.download('wordnet')
nltk.download('omw-1.4')

def generate_similar_words(keyword):
    # Initialize WordNet Lemmatizer
    lemmatizer = WordNetLemmatizer()
    
    # Find synonyms using WordNet
    synonyms = set()
    for syn in wordnet.synsets(keyword):
        for lemma in syn.lemmas():
            # Exclude compound words or phrases
            if "_" not in lemma.name() and len(lemma.name().split()) == 1:
                synonyms.add(lemma.name())
    
    # Generate morphological variations
    variations = set([
        keyword,  # Original word
        lemmatizer.lemmatize(keyword, pos='n'),  # Lemma (noun)
        lemmatizer.lemmatize(keyword, pos='v'),  # Lemma (verb)
        lemmatizer.lemmatize(keyword, pos='a'),  # Lemma (adjective)
        f"{keyword}s",  # Plural
        f"{keyword}ed",  # Past tense
        f"{keyword}ing"  # Present participle
    ])
    
    # Combine and deduplicate
    similar_words = sorted(synonyms.union(variations))
    return similar_words

# Example usage
keyword = "admission"
similar_words = generate_similar_words(keyword)
print(similar_words)


[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/lindaliang/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /Users/lindaliang/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


['access', 'accession', 'admission', 'admissioned', 'admissioning', 'admissions', 'admittance', 'entree']


In [5]:
similar_words

['access',
 'accession',
 'admission',
 'admissioned',
 'admissioning',
 'admissions',
 'admittance',
 'entree']

In [6]:
!pip install openai



In [14]:
pip install python-dotenv

Collecting python-dotenv
  Using cached python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Using cached python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.1
Note: you may need to restart the kernel to use updated packages.


In [16]:
import openai
import os
from openai import OpenAI
from dotenv import load_dotenv

load_dotenv()
openai.api_key = os.environ.get("OPENAI_API_KEY")
client = OpenAI(api_key = openai.api_key)

def generate_summary_for_topics(topics: dict) -> dict:
    """
    Generates one-word summaries for the given topics using OpenAI API (GPT-4).
    
    Args:
        topics (dict): Dictionary with topic names and their top words.
    
    Returns:
        dict: Dictionary with topic names and their one-word summaries.
    """
    # Create a prompt with all the topics and their words
    prompt = "For each of the following topics, summarize the key idea in one word:\n\n"
    for topic, words in topics.items():
        prompt += f"{topic}: {', '.join(words)}\n"
    
    try:
        # Make the API call to OpenAI (using GPT-4)
        response = client.chat.completions.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ],
            max_tokens=50,  # Limit response length for better formatting
            temperature=0.5  # Adjust creativity
        )
        
        # Extract the response
        summary = response.choices[0].message.content
        
        # Parse the summary into a dictionary
        summarized_topics = {}
        topic_summaries = summary.split("\n")
        for line in topic_summaries:
            if ':' in line:
                topic_name, topic_summary = line.split(":", 1)
                summarized_topics[topic_name.strip()] = topic_summary.strip()
        
        return summarized_topics

    except Exception as e:
        print(f"Error generating summary: {e}")
        return {}

# Example usage
topics = {
    "Topic 1": ["data", "analysis", "machine", "learning", "algorithm", "model", "predict", "accuracy", "statistics", "performance"],
    "Topic 2": ["hospital", "patient", "care", "health", "medical", "treatment", "doctor", "nurse", "clinic", "hospitalization"],
    "Topic 3": ["sports", "team", "soccer", "goal", "match", "players", "tactics", "league", "competition", "performance"],
    "Topic 4": ["education", "university", "learning", "student", "campus", "teacher", "classroom", "course", "degree", "professor"],
    "Topic 5": ["finance", "economy", "stock", "investment", "market", "business", "capital", "growth", "interest", "risk"]
}

summarized_topics = generate_summary_for_topics(topics)
print(summarized_topics)

{'Topic 1': 'Analytics', 'Topic 2': 'Healthcare', 'Topic 3': 'Sports', 'Topic 4': 'Education', 'Topic 5': 'Finance'}
