In [2]:
import numpy as np
import pandas as pd
import json as json
import re
import nltk
from nltk.corpus import stopwords
from sentence_transformers import SentenceTransformer, util
from sklearn.metrics.pairwise import cosine_similarity
from scipy.stats import ttest_ind

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Import xlsx file with survey results
df_import = pd.read_excel("XXX/SurveyResults.xlsx")

# Remove very first row and use 2nd row as col names
df_import.columns = df_import.iloc[0]
df_import = df_import[1:].reset_index(drop=True)

# Remove entries that were made in the preview 
df_import = df_import[df_import['Distribution Channel'] == "anonymous"]

# Remove entries with not sufficient business familarity
df_import = df_import[df_import['BusinessFamilarity'] != "Not familiar at all"]

# Remove entries that did not pass the attention check (Duration > 300 seconds)
df_import = df_import[df_import['Duration (in seconds)'] >= 300]

# Remove entries that did not fill out the complete survey
df_import = df_import[df_import['Progress'] == 100]

# Remove entries that did not fulfill English experise (minimum C1)
df_import = df_import[df_import['EngExp'].isin(['Advanced (C1)' ,'Near-Native (C2)', 'Native'])]

display(df_import.head())


Unnamed: 0,Response Type,Progress,Duration (in seconds),Finished,Recorded Date,Response ID,Distribution Channel,User Language,BusinessFamilarity,DAT - Word 1,...,ProfField,convo_history1,API Error,MAX_TURNS,TimeSpent_DAT,TimeSpent_IdeaGeneration,TimeSpent_IdeaElaboration,convo_history2,Create New Field or Choose From Dropdown...,GroupAssignment
0,IP Address,100,2373,True,2025-02-22 20:15:39.634000,R_2MeVhGrthluSOHf,anonymous,EN,Somewhat familiar,Table,...,"Business, Economics, & Management",,,,243.162,627.665,1359.231,,,Human_Human
3,IP Address,100,729,True,2025-02-23 00:06:40.302000,R_2I6tZ6LTbfYc0GX,anonymous,EN,Extremely familiar,Tiger,...,"Business, Economics, & Management","[{""role"":""system"",""content"":""Provide concise a...",,,77.025,299.028,282.148,,,Human/AI_Human
4,IP Address,100,1150,True,2025-02-23 00:23:26.466000,R_2y8eiKbpPQCPHiI,anonymous,EN,Somewhat familiar,Car,...,"Business, Economics, & Management","[{""role"":""system"",""content"":""Provide concise a...",,,183.169,415.632,434.479,"[{""role"":""system"",""content"":""Provide concise a...",,Human/AI_Human/AI
5,IP Address,100,899,True,2025-02-23 18:59:58.427000,R_8X68xD9TOB81k0E,anonymous,EN,Very familiar,Chocolate,...,"Business, Economics, & Management",,,,72.799,67.785,614.495,"[{""role"":""system"",""content"":""Provide concise a...",,Human_Human/AI
6,IP Address,100,27783,True,2025-02-23 21:21:07.033000,R_8dj9aA2If4L0AXS,anonymous,EN,Somewhat familiar,car,...,"Business, Economics, & Management","[{""role"":""system"",""content"":""Provide concise a...",,,,361.77,452.756,,,Human/AI_Human


In [4]:
# Merge all DAT words into a single list
column_DAT_words = ['DAT - Word 1', 'DAT - Word 2', 'DAT - Word 3', 'DAT - Word 4','DAT - Word 5', 'DAT - Word 6', 'DAT - Word 7', 'DAT - Word 8', 'DAT - Word 9', 'DAT - Word 10']
df_import ['DAT_words'] = df_import[column_DAT_words].apply(lambda x: x.dropna().tolist(), axis=1)

# Merge all ideas into a single list
column_ideas = ['Task1_IdeaGen_Human - Idea 1', 'Task1_IdeaGen_Human - Idea 2', 'Task1_IdeaGen_Human - Idea 3', 'Task1_IdeaGen_Human - Idea 4', 'Task1_IdeaGen_Human - Idea 5', 'Task1_IdeaGen_Human - Idea 6', 'Task1_IdeaGen_Human - Idea 7', 'Task1_IdeaGen_Human - Idea 8', 'Task1_IdeaGen_Human - Idea 9', 'Task1_IdeaGen_Human - Idea 10', 
                    'Task1_IdeaGen_HumanAI - Idea 1', 'Task1_IdeaGen_HumanAI - Idea 2', 'Task1_IdeaGen_HumanAI - Idea 3', 'Task1_IdeaGen_HumanAI - Idea 4', 'Task1_IdeaGen_HumanAI - Idea 5', 'Task1_IdeaGen_HumanAI - Idea 6', 'Task1_IdeaGen_HumanAI - Idea 7', 'Task1_IdeaGen_HumanAI - Idea 8', 'Task1_IdeaGen_HumanAI - Idea 9', 'Task1_IdeaGen_HumanAI - Idea 10']
df_import ['ideas'] = df_import[column_ideas].apply(lambda x: x.dropna().tolist(), axis=1)

# Create new field indicating the number of ideas generated per participant
df_import['idea_count'] = df_import['ideas'].apply(len)

# Merge the idea elaboration into one column
column_idea_ela = ['Task2_IdeaEla_Human', 'Task2_IdeaEla_HumanAI']
df_import['idea_elaboration'] = df_import[column_idea_ela].apply(lambda x: x.dropna().values[0] if not x.dropna().empty else None, axis=1)

# Binary encoding of experimental group 
minimum_length = len(json.dumps({"role":"system","content":"Provide concise and helpful answers"}))
df_import['Task1_ChatGPT_Access'] = df_import['convo_history1'].apply(lambda x: 1 if not pd.isna(x) else 0)
df_import['Task1_ChatGPT_Interaction'] = df_import['convo_history1'].apply(lambda x: 1 if isinstance(x, str) and len(x) > minimum_length else 0)
df_import['Task2_ChatGPT_Access'] = df_import['convo_history2'].apply(lambda x: 1 if not pd.isna(x) else 0)
df_import['Task2_ChatGPT_Interaction'] = df_import['convo_history2'].apply(lambda x: 1 if isinstance(x, str) and len(x) > minimum_length else 0)

display(df_import)

Unnamed: 0,Response Type,Progress,Duration (in seconds),Finished,Recorded Date,Response ID,Distribution Channel,User Language,BusinessFamilarity,DAT - Word 1,...,Create New Field or Choose From Dropdown...,GroupAssignment,DAT_words,ideas,idea_count,idea_elaboration,Task1_ChatGPT_Access,Task1_ChatGPT_Interaction,Task2_ChatGPT_Access,Task2_ChatGPT_Interaction
0,IP Address,100,2373,True,2025-02-22 20:15:39.634000,R_2MeVhGrthluSOHf,anonymous,EN,Somewhat familiar,Table,...,,Human_Human,"[Table , Homework, Metro, Garden , Bag, Classr...","[Outside showers with solar system, Solar Syst...",4,A Smart outlet helps save electricity by autom...,0,0,0,0
3,IP Address,100,729,True,2025-02-23 00:06:40.302000,R_2I6tZ6LTbfYc0GX,anonymous,EN,Extremely familiar,Tiger,...,,Human/AI_Human,"[Tiger, Cheese, Flower, House, Ocean, Car, Lip...","[Solar powered phone charger , Zero waste cook...",5,Shoes made of ocean plastic would reduce the w...,1,1,0,0
4,IP Address,100,1150,True,2025-02-23 00:23:26.466000,R_2y8eiKbpPQCPHiI,anonymous,EN,Somewhat familiar,Car,...,,Human/AI_Human/AI,"[Car, Dog, Phone, Tree, Wife, Couch, Pen, Swea...","[Reusable water bottles, Bamboo toothbrush , B...",10,Reusable water bottles can be a key component ...,1,1,1,1
5,IP Address,100,899,True,2025-02-23 18:59:58.427000,R_8X68xD9TOB81k0E,anonymous,EN,Very familiar,Chocolate,...,,Human_Human/AI,"[Chocolate, Home, Work, Napkin, Elections, Pan...","[Reusable bags , Ecofriendly phone cases, E ca...",3,Core Concept: reuseable bags to minimize envir...,0,0,1,1
6,IP Address,100,27783,True,2025-02-23 21:21:07.033000,R_8dj9aA2If4L0AXS,anonymous,EN,Somewhat familiar,car,...,,Human/AI_Human,"[car, pineapple, climate , burger, metro, tree...","[Community Solar Sharing, waste free grocery d...",3,Community Solar sharing is a decentralized ene...,1,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
196,IP Address,100,519,True,2025-03-19 12:13:20.645000,R_2a9zoHexe3ldLWW,anonymous,EN,Somewhat familiar,coffee,...,,Human/AI_Human/AI,"[coffee, arm, bike, tree, gym, sand, screw, br...","[Circular Fashion Marketplace, Zero-Waste Groc...",10,Urban Vertical Farming Solutions focuses on re...,1,1,1,1
197,IP Address,100,420,True,2025-03-19 15:35:23.788000,R_6rffg9jG4P5uv8C,anonymous,EN,Very familiar,photo,...,,Human/AI_Human,"[photo, glass , cucumber, bike, hat, coin, key...","[Sustainable Shopping Concierge Service, Local...",8,Sustainable cooking classes are offered to peo...,1,1,0,0
198,IP Address,100,921,True,2025-03-19 16:57:35.486000,R_2EiEFoosaRZCH8b,anonymous,EN,Somewhat familiar,phone,...,,Human_Human,"[phone, ice, apple, truck, bridge, ocean, autu...",[AI-based agent to support sustainable decisio...,3,"A new innovative way to install pavement, e.g....",0,0,0,0
203,IP Address,100,2469,True,2025-03-22 23:23:42.351000,R_6j98PldVIBCiuBX,anonymous,EN,Slightly familiar,leg,...,,Human_Human/AI,"[leg, bread, heart, wall, cat, plane, sombrero...","[portable solar panels to charge your bike, el...",5,Smart Weather-Integrated Garden Sprinkling Sys...,0,0,1,1


In [5]:
# Compute the idea diversity score for each participants

# Define function to compute cos simarlity of each idea pairs using SBERT
def cosine_similarity(ideas):
    model = SentenceTransformer('all-MiniLM-L6-v2') 
    embeddings = model.encode(ideas, convert_to_tensor=True)
    similarity_matrix = util.pytorch_cos_sim(embeddings, embeddings)
    return similarity_matrix.cpu().numpy()

# Define function to compute the cos similarity average
def average(matrix):
    if matrix is None or len(matrix) == 0:
        return None
    return np.mean(matrix[np.triu_indices_from(matrix, k=1)])

# Apply to the dataset
df_import['ideas_cos_sim'] = df_import['ideas'].apply(lambda x: cosine_similarity(x) if len(x) > 1 else None)
df_import['idea_similarity'] = df_import['ideas_cos_sim'].apply(lambda x: average(x))
df_import['idea_diversity'] = df_import['idea_similarity'].apply(lambda x: (1-x))

In [6]:
# Compute the idea diversity for each participants using pre-cleaned ideas

# Define words to remove 
remove_words = ['sustainable', 'urban', 'ecofriendly', 'green']

# Define function to clean ideas
def clean_idea_list(idea_list):
    if not isinstance(idea_list, list):
        return []
    cleaned = []
    for idea in idea_list:
        if isinstance(idea, str):
            idea = idea.lower()
            for word in remove_words:
                idea = re.sub(rf'\b{word}\b', '', idea)
            idea = re.sub(r'\s+', ' ', idea).strip()
            cleaned.append(idea)
    return cleaned

# Apply to the dataset
df_import['ideas_clean'] = df_import['ideas'].apply(clean_idea_list)
df_import['ideas_clean_cos_sim'] = df_import['ideas_clean'].apply(lambda x: cosine_similarity(x) if len(x) > 1 else None)
df_import['idea_clean_similarity'] = df_import['ideas_clean_cos_sim'].apply(lambda x: average(x))
df_import['idea_clean_diversity'] = df_import['idea_clean_similarity'].apply(lambda x: (1-x))

In [None]:
# Configure DAT computation as in https://github.com/jayolson/divergent-association-task

"""Compute score for Divergent Association Task,
a quick and simple measure of creativity
(Copyright 2021 Jay Olson; see LICENSE)"""

import re
import itertools
import numpy
import scipy.spatial.distance

class Model:
    """Create model to compute DAT"""

    def __init__(self, model="XXX/.840B.300d.txt", dictionary="XXXX/words.txt", pattern="^[a-z][a-z-]*[a-z]$"):
        """Join model and words matching pattern in dictionary"""

        # Keep unique words matching pattern from file
        words = set()
        with open(dictionary, "r", encoding="utf8") as f:
            for line in f:
                if re.match(pattern, line):
                    words.add(line.rstrip("\n"))

        # Join words with model
        vectors = {}
        with open(model, "r", encoding="utf8") as f:
            for line in f:
                tokens = line.split(" ")
                word = tokens[0]
                if word in words:
                    vector = numpy.asarray(tokens[1:], "float32")
                    vectors[word] = vector
        self.vectors = vectors


    def validate(self, word):
        """Clean up word and find best candidate to use"""

        # Strip unwanted characters
        clean = re.sub(r"[^a-zA-Z- ]+", "", word).strip().lower()
        if len(clean) <= 1:
            return None # Word too short

        # Generate candidates for possible compound words
        # "valid" -> ["valid"]
        # "cul de sac" -> ["cul-de-sac", "culdesac"]
        # "top-hat" -> ["top-hat", "tophat"]
        candidates = []
        if " " in clean:
            candidates.append(re.sub(r" +", "-", clean))
            candidates.append(re.sub(r" +", "", clean))
        else:
            candidates.append(clean)
            if "-" in clean:
                candidates.append(re.sub(r"-+", "", clean))
        for cand in candidates:
            if cand in self.vectors:
                return cand # Return first word that is in model
        return None # Could not find valid word


    def distance(self, word1, word2):
        """Compute cosine distance (0 to 2) between two words"""

        return scipy.spatial.distance.cosine(self.vectors.get(word1), self.vectors.get(word2))


    def dat(self, words, minimum=7):
        """Compute DAT score"""
        # Keep only valid unique words
        uniques = []
        for word in words:
            valid = self.validate(word)
            if valid and valid not in uniques:
                uniques.append(valid)

        # Keep subset of words
        if len(uniques) >= minimum:
            subset = uniques[:minimum]
        else:
            return None # Not enough valid words

        # Compute distances between each pair of words
        distances = []
        for word1, word2 in itertools.combinations(subset, 2):
            dist = self.distance(word1, word2)
            distances.append(dist)

        # Compute the DAT score (average semantic distance multiplied by 100)
        return (sum(distances) / len(distances)) * 100


In [None]:
# Load the GloVe model
model = Model("XXX/glove.840B.300d.txt", "XXX/words.txt")

# Compute the DAT score for each participant
df_import['DAT_score'] = df_import['DAT_words'].apply(lambda x: model.dat(x, len(x)))

display(df_import)

Unnamed: 0,Response Type,Progress,Duration (in seconds),Finished,Recorded Date,Response ID,Distribution Channel,User Language,BusinessFamilarity,DAT - Word 1,...,Task2_ChatGPT_Access,Task2_ChatGPT_Interaction,ideas_cos_sim,idea_similarity,idea_diversity,ideas_clean,ideas_clean_cos_sim,idea_clean_similarity,idea_clean_diversity,DAT_score
0,IP Address,100,2373,True,2025-02-22 20:15:39.634000,R_2MeVhGrthluSOHf,anonymous,EN,Somewhat familiar,Table,...,0,0,"[[0.99999994, 0.56783485, 0.13523558, 0.357647...",0.293659,0.706341,"[outside showers with solar system, solar syst...","[[0.99999994, 0.56783485, 0.13523558, 0.357647...",0.293659,0.706341,78.535901
3,IP Address,100,729,True,2025-02-23 00:06:40.302000,R_2I6tZ6LTbfYc0GX,anonymous,EN,Extremely familiar,Tiger,...,0,0,"[[1.0, 0.045722827, 0.1078574, 0.11039008, 0.1...",0.140201,0.859799,"[solar powered phone charger, zero waste cooki...","[[1.0, 0.045722827, 0.08220878, 0.11039008, 0....",0.116290,0.883710,77.496777
4,IP Address,100,1150,True,2025-02-23 00:23:26.466000,R_2y8eiKbpPQCPHiI,anonymous,EN,Somewhat familiar,Car,...,1,1,"[[1.0000001, 0.2369139, 0.20088157, 0.18020742...",0.200220,0.799780,"[reusable water bottles, bamboo toothbrush, be...","[[1.0000001, 0.2369139, 0.20088157, 0.18020742...",0.200220,0.799780,78.237401
5,IP Address,100,899,True,2025-02-23 18:59:58.427000,R_8X68xD9TOB81k0E,anonymous,EN,Very familiar,Chocolate,...,1,1,"[[1.0, 0.16935797, 0.09562738], [0.16935797, 1...",0.193569,0.806431,"[reusable bags, phone cases, e cars]","[[1.0000001, 0.16951951, 0.09562728], [0.16951...",0.194207,0.805793,79.926777
6,IP Address,100,27783,True,2025-02-23 21:21:07.033000,R_8dj9aA2If4L0AXS,anonymous,EN,Somewhat familiar,car,...,0,0,"[[1.0, 0.043839015, 0.44793528], [0.043839015,...",0.191129,0.808871,"[community solar sharing, waste free grocery d...","[[1.0, 0.043839015, 0.44793528], [0.043839015,...",0.191129,0.808871,81.771145
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
196,IP Address,100,519,True,2025-03-19 12:13:20.645000,R_2a9zoHexe3ldLWW,anonymous,EN,Somewhat familiar,coffee,...,1,1,"[[1.0, 0.33112183, 0.1347638, 0.16219965, 0.04...",0.192355,0.807645,"[circular fashion marketplace, zero-waste groc...","[[1.0, 0.33112183, 0.10946867, 0.16219965, 0.0...",0.174403,0.825597,74.677366
197,IP Address,100,420,True,2025-03-19 15:35:23.788000,R_6rffg9jG4P5uv8C,anonymous,EN,Very familiar,photo,...,0,0,"[[1.0, 0.26556307, 0.074637815, 0.39413318, 0....",0.222198,0.777802,"[shopping concierge service, local food map ap...","[[1.0000001, 0.29047266, 0.17995271, 0.3173180...",0.192408,0.807592,81.616391
198,IP Address,100,921,True,2025-03-19 16:57:35.486000,R_2EiEFoosaRZCH8b,anonymous,EN,Somewhat familiar,phone,...,0,0,"[[1.0, 0.09794642, 0.15352798], [0.09794642, 1...",0.173166,0.826834,"[ai-based agent to support decision-making, ca...","[[1.0, 0.030090854, 0.08333335], [0.030090854,...",0.127149,0.872851,79.499761
203,IP Address,100,2469,True,2025-03-22 23:23:42.351000,R_6j98PldVIBCiuBX,anonymous,EN,Slightly familiar,leg,...,1,1,"[[1.0, 0.14848918, 0.13927193, 0.38214725, 0.2...",0.190521,0.809479,"[portable solar panels to charge your bike, el...","[[1.0, 0.14800034, 0.13927193, 0.38214725, 0.2...",0.190696,0.809304,77.619768


In [None]:
# Save cleaned version to files
df_import.to_csv("/Users/benmischeck/Library/Mobile Documents/com~apple~CloudDocs/Uni/Master_Uni-Köln/WS2024 Master Thesis/Final/Data Analysis/Data/Pre-processed/Survey_ideas_clean.csv")

In [7]:
print(df_import.groupby('GroupAssignment')['Task1_ChatGPT_Interaction'].value_counts())

GroupAssignment    Task1_ChatGPT_Interaction
Human/AI_Human     1                            28
Human/AI_Human/AI  1                            21
                   0                             1
Human_Human        0                            28
Human_Human/AI     0                            27
Name: count, dtype: int64


In [8]:
print(df_import.groupby('GroupAssignment')['Task2_ChatGPT_Interaction'].value_counts())

GroupAssignment    Task2_ChatGPT_Interaction
Human/AI_Human     0                            28
Human/AI_Human/AI  1                            20
                   0                             2
Human_Human        0                            28
Human_Human/AI     1                            21
                   0                             6
Name: count, dtype: int64


In [27]:
# Comput mean time for task 1
df_import['TimeSpent_IdeaGeneration'] = pd.to_numeric(df_import['TimeSpent_IdeaGeneration'], errors='coerce')
df_time = df_import.dropna(subset=['TimeSpent_IdeaGeneration'])
df_time = df_time[df_time['Duration (in seconds)'] <= 60*60 ]
mean_time = df_time.groupby('Task1_ChatGPT_Access')['TimeSpent_IdeaGeneration'].mean() / 60

print("Avg. Time for Task 1:\n", mean_time)

# Perform welch's test
group_0 = df_time[df_time['Task1_ChatGPT_Access'] == 0]['TimeSpent_IdeaGeneration']
group_1 = df_time[df_time['Task1_ChatGPT_Access'] == 1]['TimeSpent_IdeaGeneration']
t_stat, p_value = ttest_ind(group_0, group_1, equal_var=False)
print(f"Welch's t-test:\nt-statistic = {t_stat:.3f}, p-value = {p_value:.4f}")

Avg. Time for Task 1:
 Task1_ChatGPT_Access
0    5.501999
1    5.417297
Name: TimeSpent_IdeaGeneration, dtype: float64
Welch's t-test:
t-statistic = 0.086, p-value = 0.9318


In [31]:
# Comput mean time for task 2
df_import['TimeSpent_IdeaElaboration'] = pd.to_numeric(df_import['TimeSpent_IdeaElaboration'], errors='coerce')
df_time = df_import.dropna(subset=['TimeSpent_IdeaElaboration'])
df_time = df_time[df_time['Duration (in seconds)'] <= 60*60 ]
mean_time = df_time.groupby('Task2_ChatGPT_Access')['TimeSpent_IdeaElaboration'].mean() / 60

print("Avg. Time for Task 2:\n", mean_time)

# Perform welch's test
group_0 = df_time[df_time['Task2_ChatGPT_Access'] == 0]['TimeSpent_IdeaElaboration']
group_1 = df_time[df_time['Task2_ChatGPT_Access'] == 1]['TimeSpent_IdeaElaboration']
t_stat, p_value = ttest_ind(group_0, group_1, equal_var=False)
print(f"Welch's t-test:\nt-statistic = {t_stat:.3f}, p-value = {p_value:.4f}")

Avg. Time for Task 2:
 Task2_ChatGPT_Access
0    5.509211
1    3.684374
Name: TimeSpent_IdeaElaboration, dtype: float64
Welch's t-test:
t-statistic = 2.281, p-value = 0.0250


In [33]:
# Comput mean time for task 1
df_import['idea_count'] = pd.to_numeric(df_import['idea_count'], errors='coerce')
df_idea_count = df_import.dropna(subset=['idea_count'])
mean_time = df_idea_count.groupby('Task1_ChatGPT_Access')['idea_count'].mean() 

print("Avg. Idea Count for Task 1:\n", mean_time)

# Perform welch's test
group_1 = df_idea_count[df_idea_count['Task1_ChatGPT_Access'] == 0]['idea_count']
group_0 = df_idea_count[df_idea_count['Task1_ChatGPT_Access'] == 1]['idea_count']
t_stat, p_value = ttest_ind(group_0, group_1, equal_var=False)
print(f"Welch's t-test:\nt-statistic = {t_stat:.3f}, p-value = {p_value:.4f}")

Avg. Idea Count for Task 1:
 Task1_ChatGPT_Access
0    4.327273
1    7.860000
Name: idea_count, dtype: float64
Welch's t-test:
t-statistic = 7.926, p-value = 0.0000


In [35]:
# Compute mean time needed to generate one idea
df_import['TimeSpent_IdeaGeneration'] = pd.to_numeric(df_import['TimeSpent_IdeaGeneration'], errors='coerce')
df_import['idea_count'] = pd.to_numeric(df_import['idea_count'], errors='coerce')
df_efficiency = df_import.dropna(subset=['TimeSpent_IdeaGeneration', 'idea_count'])
df_efficiency = df_efficiency[(df_efficiency['idea_count'] > 0) & (df_efficiency['TimeSpent_IdeaGeneration'] <= 60*60)]
df_efficiency['time_per_idea'] = df_efficiency['TimeSpent_IdeaGeneration'] / df_efficiency['idea_count'] 
mean_efficiency = df_efficiency.groupby('Task1_ChatGPT_Access')['time_per_idea'].mean()
print("Avg. Time per Idea (Task 1, in minutes):\n", mean_efficiency.round(2))

# Perform Welch's t-test
group_0 = df_efficiency[df_efficiency['Task1_ChatGPT_Access'] == 0]['time_per_idea']
group_1 = df_efficiency[df_efficiency['Task1_ChatGPT_Access'] == 1]['time_per_idea']

t_stat, p_value = ttest_ind(group_0, group_1, equal_var=False)
print(f"\nWelch's t-test:\nt-statistic = {t_stat:.3f}, p-value = {p_value:.4f}")



Avg. Time per Idea (Task 1, in minutes):
 Task1_ChatGPT_Access
0    74.7
1    45.5
Name: time_per_idea, dtype: float64

Welch's t-test:
t-statistic = 3.549, p-value = 0.0006
