In [1]:
import numpy as np
import pandas as pd
import json as json
import re
from sentence_transformers import SentenceTransformer
from sentence_transformers import util
from sklearn.metrics.pairwise import cosine_similarity

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
df_chatgpt = pd.read_csv("XXXX/chatgpt_survey_responses.csv")

In [None]:
# Using the pre-defined DAT script following https://github.com/jayolson/divergent-association-task

"""Compute score for Divergent Association Task,
a quick and simple measure of creativity
(Copyright 2021 Jay Olson; see LICENSE)"""

import re
import itertools
import numpy
import scipy.spatial.distance

class Model:
    """Create model to compute DAT"""

    def __init__(self, model="XX/glove.840B.300d.txt", dictionary="XXX/words.txt", pattern="^[a-z][a-z-]*[a-z]$"):
        """Join model and words matching pattern in dictionary"""

        # Keep unique words matching pattern from file
        words = set()
        with open(dictionary, "r", encoding="utf8") as f:
            for line in f:
                if re.match(pattern, line):
                    words.add(line.rstrip("\n"))

        # Join words with model
        vectors = {}
        with open(model, "r", encoding="utf8") as f:
            for line in f:
                tokens = line.split(" ")
                word = tokens[0]
                if word in words:
                    vector = numpy.asarray(tokens[1:], "float32")
                    vectors[word] = vector
        self.vectors = vectors


    def validate(self, word):
        """Clean up word and find best candidate to use"""

        # Strip unwanted characters
        clean = re.sub(r"[^a-zA-Z- ]+", "", word).strip().lower()
        if len(clean) <= 1:
            return None # Word too short

        # Generate candidates for possible compound words
        # "valid" -> ["valid"]
        # "cul de sac" -> ["cul-de-sac", "culdesac"]
        # "top-hat" -> ["top-hat", "tophat"]
        candidates = []
        if " " in clean:
            candidates.append(re.sub(r" +", "-", clean))
            candidates.append(re.sub(r" +", "", clean))
        else:
            candidates.append(clean)
            if "-" in clean:
                candidates.append(re.sub(r"-+", "", clean))
        for cand in candidates:
            if cand in self.vectors:
                return cand # Return first word that is in model
        return None # Could not find valid word


    def distance(self, word1, word2):
        """Compute cosine distance (0 to 2) between two words"""

        return scipy.spatial.distance.cosine(self.vectors.get(word1), self.vectors.get(word2))


    def dat(self, words, minimum=7):
        """Compute DAT score"""
        # Keep only valid unique words
        uniques = []
        for word in words:
            valid = self.validate(word)
            if valid and valid not in uniques:
                uniques.append(valid)

        # Keep subset of words
        if len(uniques) >= minimum:
            subset = uniques[:minimum]
        else:
            return None # Not enough valid words

        # Compute distances between each pair of words
        distances = []
        for word1, word2 in itertools.combinations(subset, 2):
            dist = self.distance(word1, word2)
            distances.append(dist)

        # Compute the DAT score (average semantic distance multiplied by 100)
        return (sum(distances) / len(distances)) * 100

In [None]:
# Load the GloVe model
model = Model("XXX/words.txt")

# Compute the DAT score for each participant
df_chatgpt['DAT_words'] = df_chatgpt['DAT'].astype(str).apply(lambda x: [word.strip() for word in x.split(',') if word.strip()])
df_chatgpt['DAT_score'] = df_chatgpt['DAT_words'].apply(lambda x: model.dat(x, len(x)))

In [5]:

df_chatgpt['ideas'] = df_chatgpt['Task1'].apply(lambda x: str(x).split('\n') if pd.notnull(x) else [])
df_chatgpt['idea_count'] = df_chatgpt['ideas'].apply(len)


In [6]:
# Compute the idea diversity score for each iteration

# Define function to compute cos simarlity of each idea pairs using SBERT
def cosine_similarity(ideas):
    model = SentenceTransformer('all-MiniLM-L6-v2') 
    embeddings = model.encode(ideas, convert_to_tensor=True)
    similarity_matrix = util.pytorch_cos_sim(embeddings, embeddings)
    return similarity_matrix.cpu().numpy()

# Define function to compute the cos similarity average
def average(matrix):
    if matrix is None or len(matrix) == 0:
        return None
    return np.mean(matrix[np.triu_indices_from(matrix, k=1)])

# Apply to the dataset
df_chatgpt['ideas_cos_sim'] = df_chatgpt['ideas'].apply(lambda x: cosine_similarity(x) if len(x) > 1 else None)
df_chatgpt['idea_similarity'] = df_chatgpt['ideas_cos_sim'].apply(lambda x: average(x))
df_chatgpt['idea_diversity'] = df_chatgpt['idea_similarity'].apply(lambda x: (1-x))

In [7]:
# Compute the idea diversity for each participants using pre-cleaned ideas

# Define words to remove 
remove_words = ['sustainable', 'urban', 'ecofriendly', 'green']

# Define function to clean ideas
def clean_idea_list(idea_list):
    if not isinstance(idea_list, list):
        return []
    cleaned = []
    for idea in idea_list:
        if isinstance(idea, str):
            idea = idea.lower()
            for word in remove_words:
                idea = re.sub(rf'\b{word}\b', '', idea)
            idea = re.sub(r'\s+', ' ', idea).strip()
            cleaned.append(idea)
    return cleaned

# Apply to the dataset
df_chatgpt['ideas_clean'] = df_chatgpt['ideas'].apply(clean_idea_list)
df_chatgpt['ideas_clean_cos_sim'] = df_chatgpt['ideas_clean'].apply(lambda x: cosine_similarity(x) if len(x) > 1 else None)
df_chatgpt['idea_clean_similarity'] = df_chatgpt['ideas_clean_cos_sim'].apply(lambda x: average(x))
df_chatgpt['idea_clean_diversity'] = df_chatgpt['idea_clean_similarity'].apply(lambda x: (1-x))

In [8]:
display(df_chatgpt.head())

Unnamed: 0,Iteration,DAT,Task1,Task2,DAT_words,DAT_score,ideas,idea_count,ideas_cos_sim,idea_similarity,idea_diversity,ideas_clean,ideas_clean_cos_sim,idea_clean_similarity,idea_clean_diversity
0,1,"Elephant, cloud, guitar, lighthouse, cupcake, ...",Eco-Commuter App \nUrban Vertical Gardens \n...,"Sure, I'll refine the Eco-Commuter App. The co...","[Elephant, cloud, guitar, lighthouse, cupcake,...",82.102456,"[Eco-Commuter App , Urban Vertical Gardens ,...",10,"[[1.0000001, 0.116671786, 0.119217515, 0.25092...",0.187552,0.812448,"[eco-commuter app, vertical gardens, solar-pow...","[[1.0000001, 0.06575291, 0.119217515, 0.163236...",0.173973,0.826027
1,2,"Whale, guitar, cactus, spoon, thunder, zebra, ...",Urban Vertical Farming Kits \nSolar-Powered C...,The Eco-Friendly Ride-Sharing App connects env...,"[Whale, guitar, cactus, spoon, thunder, zebra,...",79.02569,"[Urban Vertical Farming Kits , Solar-Powered ...",10,"[[1.0, 0.12557568, 0.15797126, 0.29622215, 0.1...",0.181859,0.818141,"[vertical farming kits, solar-powered charging...","[[1.0, 0.09383637, 0.11804062, 0.27507, 0.0699...",0.159946,0.840054
2,3,"Pillow, galaxy, violin, cactus, sandwich, bicy...",Smart Solar Windows \nCommunity Garden Networ...,Smart Solar Windows harness solar energy using...,"[Pillow, galaxy, violin, cactus, sandwich, bic...",81.16806,"[Smart Solar Windows , Community Garden Netwo...",10,"[[1.0000001, 0.2229301, 0.12097201, -0.0185468...",0.184511,0.815489,"[smart solar windows, community garden network...","[[1.0000001, 0.2229301, 0.12097201, -0.0185468...",0.173312,0.826688
3,4,"Sandwich, galaxy, pencil, whisper, fog, dragon...",EcoSmart Home Hubs \nUrban Vertical Gardens ...,EcoSmart Home Hubs offer an all-in-one solutio...,"[Sandwich, galaxy, pencil, whisper, fog, drago...",84.280583,"[EcoSmart Home Hubs , Urban Vertical Gardens ...",10,"[[1.0, 0.09692103, 0.10685343, 0.08860694, 0.2...",0.141607,0.858393,"[ecosmart home hubs, vertical gardens, solar-p...","[[1.0, 0.086895704, 0.10685343, 0.08860694, 0....",0.131105,0.868895
4,5,"Spoon, lamp, galaxy, sandcastle, butterfly, um...",Urban Vertical Gardens \nEco-Friendly Commut...,"The ""AI-Driven Energy Optimizers"" concept focu...","[Spoon, lamp, galaxy, sandcastle, butterfly, u...",81.304143,"[Urban Vertical Gardens , Eco-Friendly Commu...",10,"[[1.0000002, 0.15264638, 0.1011789, 0.17319568...",0.20928,0.79072,"[vertical gardens, eco-friendly commuter kits,...","[[1.0, 0.087823346, 0.07795932, 0.17088355, 0....",0.165866,0.834134


In [None]:
df_chatgpt.to_csv("XXXX/chatgpt_survey_responses_cleaned.csv")