### Semantic proximity in quantum and general scientific publications: A BERTopic approach to mapping regional knowledge spaces
- Made by: Keungoui Kim (Ph.D.) & Jisoo Hur (Ph.D.)  
- Part 02. Label Generation
- Data set: WoS


#### Label Generation - Quantum Science

In [None]:
import openai
import pandas as pd

# Set your OpenAI API key
openai.api_key = ""

# Read the CSV data
quantum_pub_bertopic = pd.read_csv('quantum_pub_bertopic.csv')

# List to hold results
results = []

# Iterate over each row in the dataframe
for index, row in quantum_pub_bertopic.iterrows():
    keywords = row['Representation']
    
    # Modify the prompt for each iteration
    prompt = f'''{keywords}
    The list of words are the outcome of the topic modeling from quantum science and technology related journal articles.
    Generate a new label that best describes the topic of the articles as follow: <new science topic>
    Provide a specific and detailed label. I want just a single label. I do not want descriptions for the labels.'''
    
    try:
        # Call the OpenAI API to generate the new label
        response = openai.ChatCompletion.create(
            model="gpt-4o-mini",  # Use a valid model like "gpt-4" or "gpt-3.5-turbo"
            messages=[ 
                {"role": "system", "content": "You are an AI assistant that generates labels for topics based on provided keywords."},
                {"role": "user", "content": prompt}
            ],
            temperature=1.15,
            max_tokens=50
        )
        
        # Get the label from the response
        new_label = response.choices[0].message.content.strip()
        
        # Append the result to the list
        results.append({
            'eu_nuts_id': row['eu_nuts_id'],
            'period': row['period'],
            'keyword': keywords,
            'content': new_label
        })
    
    except Exception as e:
        print(f"Error processing row {index}: {e}")

# Convert the results into a DataFrame
results_df = pd.DataFrame(results)

# Optionally, save the results to a CSV
results_df.to_csv('quantum_pub_bertopic_chatgpt.csv', index=False)

#### Label Generation - All

In [None]:
import openai
import pandas as pd

# Set your OpenAI API key
openai.api_key = ""

# Read the CSV data
pub_bertopic = pd.read_csv('pub_bertopic.csv')

# List to hold results
results = []

# Iterate over each row in the dataframe
for index, row in pub_bertopic.iterrows():
    keywords = row['Representation']
    
    # Modify the prompt for each iteration
    prompt = f'''{keywords}
    The list of words are the outcome of the topic modeling from scientific journal articles.
    Generate a new label that best describes the topic of the articles as follow: <new science topic>
    Provide a specific and detailed label. I want just a single label. I do not want descriptions for the labels.'''
    
    try:
        # Call the OpenAI API to generate the new label
        response = openai.ChatCompletion.create(
            model="gpt-4o-mini",  # Use a valid model like "gpt-4" or "gpt-3.5-turbo"
            messages=[ 
                {"role": "system", "content": "You are an AI assistant that generates labels for topics based on provided keywords."},
                {"role": "user", "content": prompt}
            ],
            temperature=1.15,
            max_tokens=50
        )
        
        # Get the label from the response
        new_label = response.choices[0].message.content.strip()
        
        # Append the result to the list
        results.append({
            'eu_nuts_id': row['eu_nuts_id'],
            'period': row['period'],
            'keyword': keywords,
            'content': new_label
        })
    
    except Exception as e:
        print(f"Error processing row {index}: {e}")

# Convert the results into a DataFrame
results_df = pd.DataFrame(results)

# Optionally, save the results to a CSV
results_df.to_csv('pub_bertopic_chatgpt.csv', index=False)