In [1]:
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore
import pickle
import os
import json
import numpy as np
import pandas as pd 
import re 
import ast 
import dateutil.parser as dparser
import matplotlib.pyplot as plt 
import seaborn as sns 
import statsmodels.formula.api as smf
import statsmodels.api as sm
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import math
from datetime import datetime, timedelta


In [2]:
import firebase_admin
from firebase_admin import credentials
from firebase_admin import firestore

cred = credentials.Certificate("../../codeinterface-85b5e-firebase-adminsdk-11q7e-837ba92a03.json")
firebase_admin.initialize_app(cred)
db = firestore.client()

# Helper functions 

get actual tasks participants solved

In [3]:
# load tasks
folder_path = '../../tasks_study/tasks'  
all_tasks = []
for file_name in os.listdir(folder_path):
    if file_name.endswith('.json'):
        file_path = os.path.join(folder_path, file_name)
        with open(file_path, 'r') as file:
            task_data = json.load(file)
            all_tasks.append(task_data)

all_tasks = np.array(all_tasks)
def get_task_index(task_name):
    for i in range(len(all_tasks)):
        if all_tasks[i]['name'] == task_name:
            return i
    raise ValueError('Task not found')
tutorial_task_index = get_task_index('sum_product')

task_sets = [np.array([ get_task_index('even_odd_count'), get_task_index('triple_sum_to_zero'),   get_task_index('table_transform_named'), get_task_index('tokenizer'),
                          get_task_index('encode_message'), get_task_index('t_test'), get_task_index('event_scheduler')]),

            np.array([ get_task_index('even_odd_count'), get_task_index('is_bored'),   get_task_index('login_authenticator'), get_task_index('is_multiply_prime'),
                          get_task_index('count_nums'), get_task_index('table_transform_named'), get_task_index('calculator')]),

            np.array([ get_task_index('even_odd_count'), get_task_index('count_nums'),   get_task_index('calculator'), get_task_index('table_transform_unnamed2'),
                          get_task_index('login_authenticator'), get_task_index('encode_message'), get_task_index('is_bored')]),

            np.array([ get_task_index('even_odd_count'), get_task_index('order_by_points'),   get_task_index('retriever'), get_task_index('triple_sum_to_zero'),
                          get_task_index('tokenizer'), get_task_index('event_scheduler'), get_task_index('encode_message')]),

            np.array([ get_task_index('even_odd_count'), get_task_index('is_multiply_prime'),   get_task_index('table_transform_unnamed1'), get_task_index('t_test'),
                          get_task_index('is_bored'), get_task_index('order_by_points'), get_task_index('triple_sum_to_zero')])]           


In [4]:
def get_suggestion_acceptance_rate(telemetry_data):
    num_accept = len([event for event in telemetry_data if event["event_type"] == "accept"])
    # only count suggestion_shown when suggestion is not ""
    num_suggestion_shown = len([event for event in telemetry_data if event["event_type"] == "suggestion_shown" and event["suggestion"] != ""])
    suggestion_ids_shown = [event["suggestion_id"] for event in telemetry_data if event["event_type"] == "suggestion_shown" and event["suggestion"] != ""]
    suggestion_log_probs = [event["logprobs"]['firstElement'] for event in telemetry_data if event["event_type"] == "suggestion_shown" and event["suggestion"] != ""]
    sugggestion_ids_accepted = [event["suggestion_id"] for event in telemetry_data if event["event_type"] == "accept"]
    sugggestion_ids_rejected = [event["suggestion_id"] for event in telemetry_data if event["event_type"] == "reject"]
    accepted = 0
    shown_true = 0

    for suggestion_id in suggestion_ids_shown:
        if suggestion_id in sugggestion_ids_accepted:
            accepted += 1
        if suggestion_id in sugggestion_ids_rejected or suggestion_id in sugggestion_ids_accepted:
            shown_true += 1
    if shown_true == 0:
        return 0, 0
    return accepted, shown_true


def get_suggestions_labels(telemetry_data, task_id):
    num_accept = len([event for event in telemetry_data if event["event_type"] == "accept"])
    # only count suggestion_shown when suggestion is not ""
    num_suggestion_shown = len([event for event in telemetry_data if event["event_type"] == "suggestion_shown" and event["suggestion"] != ""])
    suggestion_ids_shown = [event["suggestion_id"] for event in telemetry_data if event["event_type"] == "suggestion_shown" and event["suggestion"] != ""]
    suggestion_shown = [event for event in telemetry_data if event["event_type"] == "suggestion_shown" and event["suggestion"] != ""]
    suggestions_before_shown = [event for event in telemetry_data if event["event_type"] == "before_shown"]
    suggestions_before_shown_ids = [event["suggestion_id"] for event in telemetry_data if event["event_type"] == "before_shown"]
    suggestion_log_probs = [event["logprobs"] for event in telemetry_data if event["event_type"] == "suggestion_shown" and event["suggestion"] != ""]
    sugggestion_ids_accepted = [event["suggestion_id"] for event in telemetry_data if event["event_type"] == "accept"]
    sugggestion_ids_rejected = [event["suggestion_id"] for event in telemetry_data if event["event_type"] == "reject"]
    suggestions_ids_requested = [event["suggestion_id"] for event in telemetry_data if event["event_type"] == "request_suggestion"]

    accepted = 0
    shown_true = 0
    suggestion_labels = []
    suggestion_probs = []
    suggestions_text = []
    suggestions_prefix = []
    suggestions_suffix = []
    suggestions_task_names = []
    suggestions_times = []
    suggestions_requested = []
    task_set_user = task_sets[task_id]
    for suggestion_id in suggestion_ids_shown:
        if suggestion_id not in sugggestion_ids_rejected and suggestion_id not in sugggestion_ids_accepted:
            continue
        suggestions_times.append(suggestion_shown[suggestion_ids_shown.index(suggestion_id)]["timestamp"])
        suggestion_probs.append(suggestion_log_probs[suggestion_ids_shown.index(suggestion_id)])
        suggestions_text.append(suggestion_shown[suggestion_ids_shown.index(suggestion_id)]["suggestion"])
        task_index = suggestion_shown[suggestion_ids_shown.index(suggestion_id)]["task_index"]
        suggestions_requested.append(suggestion_id-1 in suggestions_ids_requested)
        if task_index == -1:
            task_name = all_tasks[tutorial_task_index]["name"]
        else:
            task_name = all_tasks[task_set_user[task_index]]["name"]
        suggestions_task_names.append(task_name)
        # prefix_code and suffix_code are found in before_shown
        suggestions_prefix.append(suggestions_before_shown[suggestions_before_shown_ids.index(suggestion_id)]["prefix_code"])
        suggestions_suffix.append(suggestions_before_shown[suggestions_before_shown_ids.index(suggestion_id)]["suffix_code"])
        if suggestion_id in sugggestion_ids_accepted:
            suggestion_labels.append(1)
        else:
            suggestion_labels.append(0)
    suggestions_info = pd.DataFrame({'suggestion': suggestions_text, 'prefix_code': suggestions_prefix,
     'suffix_code': suggestions_suffix, 'label': suggestion_labels, 'logprobs': suggestion_probs, 'times': suggestions_times, 'task_name': suggestions_task_names, 'requested': suggestions_requested})
    return suggestions_info

def get_suggestions_requested(telemetry_data):
    suggestion_ids_shown = [event["suggestion_id"] for event in telemetry_data if event["event_type"] == "suggestion_shown" and event["suggestion"] != ""]
    sugggestion_ids_accepted = [event["suggestion_id"] for event in telemetry_data if event["event_type"] == "accept"]
    sugggestion_ids_rejected = [event["suggestion_id"] for event in telemetry_data if event["event_type"] == "reject"]
    # request_suggestion
    suggestions_requested = [event["suggestion_id"] for event in telemetry_data if event["event_type"] == "request_suggestion"]

    n_req = 0
    n_accepted = 0
    for id in suggestions_requested:
        if id+1 not in suggestion_ids_shown:
            continue
        if id+1 not in sugggestion_ids_accepted and id+1 not in sugggestion_ids_rejected:
            continue
        if id+1 in sugggestion_ids_accepted:
            n_accepted += 1
        n_req += 1
    return n_req, n_accepted


def get_time_to_completion(telemetry_data):
    starts = [event for event in telemetry_data if event["event_type"] == "load_task"]
    ends = [
        event#["timestamp"]
        for event in telemetry_data
        if event["event_type"] == "submit_code" and event["completed_task"] == 1
    ]
    times_tasks_solved = []
    task_indices_seen = set()
    for start in starts:
        if start["task_index"] in task_indices_seen:
            continue
        task_indices_seen.add(start["task_index"])
        max_time = 1800
        min_time = max_time
        for end in ends:
            if end["task_index"] == start["task_index"]: 
                # check if tim is more than 10mins
                if (end["timestamp"] - start["timestamp"]) / 1000 < min_time:
                    times_tasks_solved.append((end["timestamp"] - start["timestamp"]) / 1000)
                    break
        if min_time < max_time:
            times_tasks_solved.append(min_time)

    if len(times_tasks_solved) == 0:
        return [np.nan], np.nan
    return times_tasks_solved, np.mean(times_tasks_solved)


def get_coding_time(telemetry, entered_exit_survey, date_performed):
    # Get first load task
    # get first event in telemetry timestamp and last event in telemetry timestamp 
    # get time difference
    time1_telem = telemetry[0]["timestamp"]
    time2_telem = telemetry[-1]["timestamp"]
    diff_telemetry = (time2_telem - time1_telem) / 1000

    time2 = entered_exit_survey
    time1 = date_performed
    time2 = time2.split(" ")[4]
    # day of time2 is same as time1 but time2 only has time without time zone
    time2 = datetime.strptime(time2, '%H:%M:%S')
    # only keep time without day or timezone in time1
    time1 = time1.strftime('%H:%M:%S')
    time1 = datetime.strptime(time1, '%H:%M:%S')
    time_diff = time2 - time1
    time_diff = time_diff.seconds
    # if time_diff is more than 36 minutes, use telemetry time
    if time_diff > 36*60:
        return diff_telemetry
    else:
        return time_diff


def get_time_verifying_suggestion(telemetry_data):
    # Get suggestions
    suggestions_shown = [event for event in telemetry_data if event["event_type"] == "suggestion_shown"]
    suggestions_reviewed = [
        event for event in telemetry_data if event["event_type"] == "reject" or event["event_type"] == "accept"
    ]

    # Create a hashmap for suggestion reviews.
    reviewed_hashmap = {}
    for event in suggestions_reviewed:
        reviewed_hashmap[event["suggestion_id"]] = event["timestamp"]

    # Create a hashmap for times to completion
    time_spent_verifying = {}
    for event in suggestions_shown:
        if event["suggestion_id"] in reviewed_hashmap:
            time_spent_verifying[event["suggestion_id"]] = (
                reviewed_hashmap[event["suggestion_id"]] - event["timestamp"]
            ) / 1000
        # else:
        #     print("No review found for suggestion: ", event["suggestion_id"])

    return time_spent_verifying


def get_chat_messages_count(telemetry_data):
    assistant_response_count = len([event for event in telemetry_data if event["event_type"] == "assistant_response"])
    user_message_count = len([event for event in telemetry_data if event["event_type"] == "user_message"])
    copy_code_button_count = len([event for event in telemetry_data if event["event_type"] == "copy_code"])
    copy_from_chat_count = len([event for event in telemetry_data if event["event_type"] == "copy_from_chat"])
    # for each assistant response, find a copy_code or copy_from_chat with same messageAIindex and where copied_text is a substring of response in assistant_response
    suggestions_copied = []
    for event in telemetry_data:
        if event["event_type"] == "assistant_response":
            response = event["response"]
            messageAIindex = event["messageAIindex"]
            count = 0
            for copy_event in telemetry_data:
                # check timestamps order
                if copy_event['timestamp'] < event['timestamp']:
                    continue
                if copy_event['task_index'] != event['task_index']:
                    continue
                if copy_event["event_type"] == "copy_code" and copy_event["copied_text"] in response:
                    count += 1
                if copy_event["event_type"] == "copy_from_chat"  and copy_event["copied_text"] in response:
                    count += 1
            count = min(count, 1)
            suggestions_copied.append(count)
    avg_copy_per_response = np.nanmean(suggestions_copied)
    return assistant_response_count, user_message_count, copy_code_button_count, copy_from_chat_count, avg_copy_per_response

def get_task_data(telemetry_data, task_id):
    task_set_user = task_sets[task_id]
    task_data = {}
    load_events = [event for event in telemetry_data if event["event_type"] == "load_task"]
    skip_events = [event for event in telemetry_data if event["event_type"] == "skip_task"]
    submit_events = [event for event in telemetry_data if event["event_type"] == "submit_code"]
    # save code events
    save_events = [event for event in telemetry_data if event["event_type"] == "save_code"]
    for i in range(len(load_events)):
        task_index = load_events[i]["task_index"]
        if task_index == -1:
            task_name = all_tasks[tutorial_task_index]["name"]
        else:
            task_name = all_tasks[task_set_user[task_index]]["name"]
        if task_index in task_data:
            continue
        time_in_task = 0 # time to the next load task with index+1
        completed = False # if there exists a submit event with same task_index and completed_task=1
        skipped = False
        code = "" # get the save_event with same task_index and the latest timestamp
        for j in range(i+1, len(load_events)):
            if load_events[j]["task_index"] == task_index+1:
                time_in_task = (load_events[j]["timestamp"] - load_events[i]["timestamp"]) / 1000
                break
        for submit_event in submit_events:
            if submit_event["task_index"] == task_index and submit_event["completed_task"] == 1:
                completed = True
                break
        for save_event in save_events:
            if save_event["task_index"] == task_index:
                code = save_event["code"]
                
        for skip_event in skip_events:
            if skip_event["task_index"] == task_index:
                skipped = True
                break
        task_data[task_index] = {'name': task_name, 'time_in_task': time_in_task, 'completed': completed, 'code': code, 'skipped': skipped}
    return task_data


def get_chat_history(telemetry_data, task_id):
    # get user_message and assistant_response events
    user_messages = [event for event in telemetry_data if event["event_type"] == "user_message"]
    assistant_responses = [event for event in telemetry_data if event["event_type"] == "assistant_response"]
    clear_chat = [event for event in telemetry_data if event["event_type"] == "clear_chat"]
    copy_code = [event for event in telemetry_data if event["event_type"] == "copy_code"]
    copy_from_chat = [event for event in telemetry_data if event["event_type"] == "copy_from_chat"]
    # df: input, aii response, logprobs, copy events and what was copied, taskname, times
    chat_responses = []
    chat_messages = []
    chat_logprobs = []
    chat_times = []
    chat_tasks = []
    chat_copy_info = []
    for response in assistant_responses:
        chat_responses.append(response["response"])
        chat_logprobs.append(response["logprobs"])
        chat_times.append(response["timestamp"])
        history = response["chatHistory"]
        history_proccessed = []
        for msg in history:
            if msg['role'] == 'system':
                continue
            if msg['role'] == 'assistant' and msg['content'] == response["response"]:
                break
            history_proccessed.append(msg)
        chat_messages.append(history_proccessed)

        if response["task_index"] == -1:
            task_name = all_tasks[tutorial_task_index]["name"]
        else:
            task_name = all_tasks[task_sets[task_id][response["task_index"]]]["name"]
        chat_tasks.append(task_name)
        copy_info = []
        for copy_event in copy_code:
            copied_text = copy_event["copied_text"]
            # if copy event has same taskindex and timestamp is before response timestamp
            if copy_event["task_index"] == response["task_index"] and copy_event["timestamp"] >= response["timestamp"]:
                # check if copied_text is found inside response
                if copied_text in response["response"]:
                    copy_info.append({'type': 'copy_button', 'copied': copied_text})
        for copy_event in copy_from_chat:
            copied_text = copy_event["copied_text"]
            if copy_event["task_index"] == response["task_index"] and copy_event["timestamp"] >= response["timestamp"]:
                if copied_text in response["response"]:
                    copy_info.append({'type': 'copy_chat', 'copied': copied_text})
        chat_copy_info.append(copy_info)
    messages_df = pd.DataFrame({'message': chat_messages, 'response': chat_responses, 'logprobs': chat_logprobs, 'times': chat_times, 'task_name': chat_tasks, 'copy_info': chat_copy_info})
    return messages_df
              

def get_code_history(telemetry_data, task_id, last_time):

    save_events = [event for event in telemetry_data if event["event_type"] == "save_code"]
    code_states = []
    times = []
    task_names = []
    time_gaps = []

    for save_event in save_events:
        code_states.append(save_event["code"])
        times.append(save_event["timestamp"])
        if save_event["task_index"] == -1:
            task_name = all_tasks[tutorial_task_index]["name"]
        else:
            task_name = all_tasks[task_sets[task_id][save_event["task_index"]]]["name"]
        task_names.append(task_name)
    # add last code state, the timestamp is last_time but need to convert to linux time
    if len(code_states) > 0:
        code_states.append(code_states[-1])
        # add 35minutes to first time
        final_timestamp =  35*60*1000 + times[0]
        times.append(final_timestamp)
        task_names.append(task_names[-1])    
        # add column time gaps between each code state
        # first gap is 0
        time_gaps.append(0)
        for i in range(1, len(times)):
            time_gaps.append((times[i] - times[i-1])/1000)
    code_df = pd.DataFrame({'code': code_states, 'times': times, 'task_name': task_names, 'time_gaps': time_gaps})
    return code_df


# Pre-processing

In [5]:
#get participants data from google form
df_participants_form = pd.read_csv('../../data/gpt4_all_participants.csv')
df_participants_form = df_participants_form[['First Name', 'Last Name', 'Email (If you are student, you must provide your .edu email here)', 'Email for Amazon gift card compensation', 'Which best describes your programming experience?', 'How proficient are you with Python?', 'How often do you use AI tools for programming (e.g., GitHub Copilot, ChatGPT)?']]
df_participants_form['name'] = df_participants_form['First Name'] + " " + df_participants_form['Last Name']
df_participants_form = df_participants_form.drop(columns=['First Name', 'Last Name'])
df_participants_form = df_participants_form.rename(columns={'Email (If you are student, you must provide your .edu email here)': 'email1', 'Email for Amazon gift card compensation': 'email2', 'Which best describes your programming experience?': 'prog_experience', 'How proficient are you with Python?': 'python_experience', 'How often do you use AI tools for programming (e.g., GitHub Copilot, ChatGPT)?': 'ai_experience'})


In [6]:
docs = db.collection('responses').get()

In [7]:
DICT_TOOL_USAGE = {"1":"Strongly Disagree","2":"Disagree", "3":"Neutral", "4":"Agree", "5":"Strongly Agree"}

In [None]:
df = (pd.DataFrame([x.to_dict() for x in docs])
     .dropna(subset=["telemetry_data", "completed_task_time", "date_performed"])
     # remove those with test in name or email
     .loc[lambda x: ~x["name"].str.contains("test", case=False)]
     .loc[lambda x: ~x["email"].str.contains("test", case=False)]
     # if name Subhro Das
     .loc[lambda x: ~x["name"].str.contains("Subhro Das", case=False)]
     # numb participants
     .assign(n_participants = 1)
     .assign(completed_task_time = lambda x: [dparser.parse(re.sub("\s*\([^)]*\)", "", y), fuzzy=True) for y in x["completed_task_time"]])
     .assign(date_performed = lambda x: [dparser.parse(re.sub("\s*\([^)]*\)", "", y), fuzzy=True) for y in x["date_performed"]])
     .assign(task_duration = lambda x: x.completed_task_time - x.date_performed)
     .assign(model = lambda x: [re.match("[a-zA-Z]*_[a-zA-Z0-9]*", x)[0] if re.match("[a-zA-Z]*_[a-zA-Z0-9]*", str(x)) else "" for x in x["task_id"]])
     .assign(interface = lambda x: ["autocomplete" if "autocomplete" in y else "chat" if "chat" in y else "nomodel" for y in x["model"]])
     .assign(task_id = lambda x: [int(x.split("_")[-2]) for x in x["task_id"]])
     .assign(n_tasks_completed = lambda z: [len([x for x in y if x["event_type"] == "submit_code" and x["completed_task"] == 1 ]) for y in z["telemetry_data"]])
     .assign(n_tasks_attempted = lambda z: [len([x for x in y if x["event_type"] == "load_task"]) for y in z["telemetry_data"]])
     .assign(n_tasks_skipped = lambda z: [len([x for x in y if x["event_type"] == "skip_task"]) for y in z["telemetry_data"]])
     .assign(task_completion_durations = lambda x: [get_time_to_completion(y)[0] for y in x['telemetry_data']])
     .assign(mean_task_duration = lambda x: [get_time_to_completion(y)[1] for y in x['telemetry_data']])
     #  entered_exit_survey - date_performed    
     .assign(coding_time = lambda x: [get_coding_time(t, y, z) for t, y, z in zip(x['telemetry_data'], x['entered_exit_survey'], x['date_performed'])])

     # code history
     .assign(code_history = lambda x: [get_code_history(y, z, t) for y, z, t in zip(x['telemetry_data'], x['task_id'], x['entered_exit_survey'])])

     .assign(TLX_frustration = lambda x: x["frustration"].astype(int))
     .assign(TLX_performance = lambda x: x["performance"].astype(int))
     .assign(TLX_temporal_demand = lambda x: x["temporalDemand"].astype(int))
     .assign(TLX_physical_demand = lambda x: x["physicalDemand"].astype(int))
     .assign(TLX_effort = lambda x: x["effort"].astype(int))
     .assign(TLX_mental_demand = lambda x: x["mentalDemand"].astype(int))
     .assign(TLX_total_score = lambda x: x.filter(like="TLX").sum(axis=1) * 5)
      
      # autocomplete specific
     .assign(n_sugg_accepted = lambda z: [get_suggestion_acceptance_rate(y)[0] for y in z["telemetry_data"]])
     .assign(n_sugg_shown = lambda z: [get_suggestion_acceptance_rate(y)[1] for y in z["telemetry_data"]])
     .assign(sugg_accept_rate = lambda x: x.n_sugg_accepted / x.n_sugg_shown)
     .assign(time_spent_verifying = lambda x: [get_time_verifying_suggestion(y) for y in x['telemetry_data']])
     # for suggestions requested
     .assign(n_sugg_requested = lambda z: [get_suggestions_requested(y)[0] for y in z["telemetry_data"]]) 
     .assign(n_sugg_accepted_requested = lambda z: [get_suggestions_requested(y)[1] for y in z["telemetry_data"]])
     .assign(sugg_accept_rate_requested = lambda x: x.n_sugg_accepted_requested / x.n_sugg_requested)
     # acceptance rate for non requested suggestions
     .assign(sugg_accept_rate_non_requested = lambda x: (x.n_sugg_accepted - x.n_sugg_accepted_requested) / (x.n_sugg_shown - x.n_sugg_requested))
     # get suggestion data
     .assign(suggestions_data = lambda x: [get_suggestions_labels(y, z) for y, z in zip(x['telemetry_data'], x['task_id'])])

     # chat specific
     .assign(n_assistant_response = lambda z: [get_chat_messages_count(y)[0] for y in z["telemetry_data"]])
     .assign(n_user_message = lambda z: [get_chat_messages_count(y)[1] for y in z["telemetry_data"]])
     .assign(n_copy_code_button = lambda z: [get_chat_messages_count(y)[2] for y in z["telemetry_data"]])
     .assign(n_copy_from_chat = lambda z: [get_chat_messages_count(y)[3] for y in z["telemetry_data"]])
     .assign(avg_copy_per_response = lambda z: [get_chat_messages_count(y)[4] for y in z["telemetry_data"]])
     .assign(chat_history_data = lambda x: [get_chat_history(y, z) for y, z in zip(x['telemetry_data'], x['task_id'])])
     # both chat and autocomplete
     .assign(aihelpful = lambda x: [int(y) if "aihelpful" in x else 0 for y in x["aihelpful"]])
     .assign(aiToolTypicalUsage = lambda x: [DICT_TOOL_USAGE[y] if y in DICT_TOOL_USAGE else y for y in x["aiToolTypicalUsage"]])


     # task level information
     .assign(task_data = lambda x: [get_task_data(y, z) for y, z in zip(x['telemetry_data'], x['task_id'])])
)

df.loc[["model" in x for x in df["model"]], "model"] = "nomodel"


# FILTERING for quality:
# those who have not completed the tutorial
print("n participants before filtering: ", len(df))
df = df.query("n_tasks_completed >= 1").reset_index()
print("n participants after filtering for tutorial completion: ", len(df))
# for each row, see column time_gaps in code_history and count how many times it is higher than 15 mins
long_gaps = []
for i in range(len(df)):
    # count how 
    gaps_large = len([x for x in df['code_history'][i]['time_gaps'] if x > 60*15 and x < 3000])
    long_gaps.append(gaps_large)
df['n_long_gaps'] = long_gaps
# remove those with long_gaps > 0
df = df.query("n_long_gaps == 0").reset_index()
print("n participants after filtering for long gaps: ", len(df))

sanity checks

In [None]:
# those with interface autocomplete and n_sugg_shown is 0
print("Autocomplete but no suggestions shown:")
print(len(df[(df['interface'] == 'autocomplete') & (df['n_sugg_shown'] == 0)]['model']))
print("Chat but never used it:")
print(len(df[(df['interface'] == 'chat') & (df['n_user_message'] == 0)]['model']))
print("Chat with user message but no assistant response:")
print(len(df[(df['interface'] == 'chat') & (df['n_user_message'] > 0) & (df['n_assistant_response'] == 0)]['model']))
print("Study completion time under 35 mins")
print(len(df[df['coding_time']/60 < 36]['model']))
print("Study completion time under 30 mins")
print(len(df[df['coding_time']/60 < 30]['model']))
# those above 35 mins
print("Study completion time over 36 mins")
print(len(df[df['coding_time']/60 > 36]['model']))
print("Study completion time over 40 mins")
print(len(df[df['coding_time']/60 > 40 ]['model']))
print("Study completion time over 50 mins")
print(len(df[df['coding_time']/60 > 50 ]['model']))

merge with google form data of participants

In [None]:
df1 = df
df2 = df_participants_form
df2_renamed_email2 = df2.rename(columns={'email2': 'email'})
df2_renamed_email2 = df2_renamed_email2.drop_duplicates(subset=['email'], keep='first')
merge_on_email2 = pd.merge(df1, df2_renamed_email2, on='email', how='left')
# Identify rows in df1 that were not matched
unmatched = merge_on_email2[merge_on_email2['name_y'].isnull()]

# Merge unmatched rows on email1
if not unmatched.empty:
    df2_renamed_email1 = df2.rename(columns={'email1': 'email'})
    merge_on_email1 = pd.merge(unmatched, df2_renamed_email1, on='email', how='left')
    
    # Combine the two sets of matched rows
    final_df = pd.concat([merge_on_email2[~merge_on_email2['name_y'].isnull()], merge_on_email1])

    # Drop temporary columns and fill NaNs for unmatched rows
    final_df = final_df.drop(columns=['name_y', 'email1', 'email2']).rename(columns={'name_x': 'name'}).reset_index(drop=True)
else:
    final_df = merge_on_email2.drop(columns=['name_y', 'email1', 'email2']).rename(columns={'name_x': 'name'}).reset_index(drop=True)

print(len(final_df))
# prog_experience	python_experience	ai_experience	prog_experience_x	python_experience_x	ai_experience_x	prog_experience_y	python_experience_y	ai_experience_y
# combine properly
final_df['prog_experience'] = final_df['prog_experience'].combine_first(final_df['prog_experience_y']).combine_first(final_df['prog_experience_x'])
final_df['python_experience'] = final_df['python_experience'].combine_first(final_df['python_experience_y']).combine_first(final_df['python_experience_x'])
final_df['ai_experience'] = final_df['ai_experience'].combine_first(final_df['ai_experience_y']).combine_first(final_df['ai_experience_x'])
# drop _x and _y
final_df = final_df.drop(columns=['prog_experience_x', 'python_experience_x', 'ai_experience_x', 'prog_experience_y', 'python_experience_y', 'ai_experience_y'])

# for those with nan in prog_experience, try to match based on name in df_participants_form
names_df_participants_form = df_participants_form['name'].values.tolist()
for i in range(len(final_df)):
    if pd.isna(final_df['prog_experience'][i]):
        # print row i
        name = final_df.iloc[i]['name'][0]
        closest_name, score = process.extractOne(name, names_df_participants_form)
        if score > 0:
            row = df_participants_form[df_participants_form['name'] == closest_name]
            final_df.at[i, 'prog_experience'] = row['prog_experience'].values[0]
            final_df.at[i, 'python_experience'] = row['python_experience'].values[0]
            final_df.at[i, 'ai_experience'] = row['ai_experience'].values[0]


""" with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    print(final_df['prog_experience']) """

# drop merge_key name and email
duplicated_names = final_df['name'].duplicated(keep=False)
duplicated_emails = final_df['email'].duplicated(keep=False)

# A row is marked for keeping if it is not duplicated in either 'name' or 'email'
# We invert the duplicated flags with ~ and use | to combine them, marking rows duplicated in either column
rows_to_keep = ~(duplicated_names | duplicated_emails)

# Filter the DataFrame based on rows_to_keep
final_df_unique = final_df[rows_to_keep]

final_df = final_df.drop(columns=['name', 'email'])
# how many nan in prog_experience
print(final_df['prog_experience'].isna().sum())
print(len(final_df))

In [11]:
final_df.to_pickle('../../data/final_df.pkl')

# Participant Information

In [12]:
# get participants data from google form
df_participants_form = pd.read_csv('../../data/gpt4_all_participants.csv')
df_participants_form['name'] = df_participants_form['First Name'] + " " + df_participants_form['Last Name']
df_participants_form = df_participants_form.drop(columns=['First Name', 'Last Name'])
df_participants_form = df_participants_form.rename(columns={'Email (If you are student, you must provide your .edu email here)': 'email1', 'Email for Amazon gift card compensation': 'email2', 'Which best describes your programming experience?': 'prog_experience', 'How proficient are you with Python?': 'python_experience', 'How often do you use AI tools for programming (e.g., GitHub Copilot, ChatGPT)?': 'ai_experience'})


In [13]:
import pandas as pd

# Example dataframes
# df1 = pd.DataFrame({'name': ['Name1', 'Name2'], 'email1': ['email1@example.com', 'email2@example.com'], 'email2': ['email3@example.com', 'email4@example.com']})
# df2 = pd.DataFrame({'name': ['Name3', 'Name1'], 'email': ['email5@example.com', 'email1@example.com']})

df1 = df_participants_form
df2 = df 

# Convert df2['email'] to a list for efficient searching
emails_in_df2 = df2['email'].tolist()
names_in_df2 = df2['name'].tolist()

# Define a function to apply across df1 to check the conditions
def match_row(row):
    return (row['name'] in names_in_df2) or (row['email1'] in emails_in_df2) or (row['email2'] in emails_in_df2)

# Apply this function to filter df1
filtered_df1 = df1[df1.apply(match_row, axis=1)]




In [None]:
filtered_df1.columns

In [None]:
# Calculate the percentage of each gender value
gender_percentages = filtered_df1['Gender'].value_counts(normalize=True) * 100

# Print the result
print(gender_percentages)


In [None]:
percentages = filtered_df1['Occupation'].value_counts(normalize=True) * 100

# Print the result
print(percentages)


In [None]:
percentages = filtered_df1['prog_experience'].value_counts(normalize=True) * 100

# Print the result
print(percentages)


In [None]:
percentages = filtered_df1['python_experience'].value_counts(normalize=True) * 100

# Print the result
print(percentages)


In [None]:
percentages = filtered_df1['ai_experience'].value_counts(normalize=True) * 100

# Print the result
print(percentages)


# Dataset level statistics

In [None]:
print(f'number of participants {len(final_df)}')

In [None]:
percentages = final_df['model'].value_counts(normalize=True) * 100

# Print the result
print(percentages)


In [None]:
percentages = final_df['model'].value_counts() 

# Print the result
print(percentages)


In [None]:
percentages = sum(final_df['n_tasks_completed'])

# Print the result
print(percentages)


In [None]:
final_df['n_tasks_completed'].describe()

In [None]:
percentages = sum(final_df['n_sugg_shown'])

# Print the result
print(percentages)


In [None]:
percentages = sum(final_df['n_sugg_accepted']) 

# Print the result
print(percentages)


In [None]:
percentages = sum(final_df['n_sugg_accepted']) / sum(final_df['n_sugg_shown'])

# Print the result
print(percentages)


In [None]:
percentages = sum(final_df['n_assistant_response'])

# Print the result
print(percentages)
