A notebook where we take the data we have in the `data` folder, and merge it to produce the final CSV.

As a reminder, we need the causal graph.

![causal graph](./causal_graph.png)

In [1]:
import Pkg; Pkg.activate("..")

[32m[1m  Activating[22m[39m project at `~/Dev/ai-assistants-rct-emse-2025/bayesian`


In [2]:
using Revise

In [3]:
using CSV, DataFrames, Statistics

In [4]:
data_task_1 = CSV.read("../../data/task1_preprocessed.csv", DataFrame);

In [5]:
data_task_2 = CSV.read("../../data/task2_preprocessed_multiline.csv", DataFrame);

In [6]:
combine(groupby(data_task_1, :github), :successors => length)

Row,github,successors_length
Unnamed: 0_level_1,String,Int64
1,anon126,2
2,anon094,1
3,anon082,1
4,anon009,1
5,anon047,1
6,anon139,1
7,anon004,1
8,anon113,1
9,anon122,1
10,anon007,1


In [7]:
names(data_task_2)

59-element Vector{String}:
 "github"
 "predecessor"
 "treatment"
 "codehealth"
 "codehealth_diff"
 "coverage"
 "coverage_diff"
 "test_runs"
 "measured_time"
 "estimated_time"
 "entry-demo-1"
 "entry-demo-2"
 "entry-demo-3"
 ⋮
 "gh-deletions"
 "gh-changes"
 "gh-changed-files"
 "gh-adds-unit-test"
 "gh-adds-behavioural-test"
 "gh-adds-functional"
 "gh-adds-sql"
 "gh-adds-logging"
 "gh-adds-exception-handling"
 "gh-adds-dependency"
 "clean_time"
 "pp_mean"

In [8]:
function ai_xp_related(c)
    # return c == "entry-ai-1" || c == "entry-ai-2"
    return c == "entry-ai-2"
end

ai_xp_related (generic function with 1 method)

In [9]:
function ai_pref_related(c)
    return occursin("entry-ai", c) && !ai_xp_related(c)
end

ai_pref_related (generic function with 1 method)

In [10]:
function predictor_column(c)
    # AI_xp and AI_pref is post-processed from entry columns
    ai_xp = ai_xp_related(c)
    ai_pref = ai_pref_related(c)
    
    # 'entry-demo-5' is the Java proficiency of the developer
    # 'entry-demo-4' is the activity of the developer (Student, Professional, Researcher, Hobbyist)
    skill_related = c == "entry-demo-5" || c == "entry-demo-4"
    
    # Whether the candidate used AI
    ai_use = c == "ai"
    
    return ai_xp || skill_related || ai_use || ai_pref
end

predictor_column (generic function with 1 method)

In [11]:
function outcome_column(c)
    selected = ["codehealth", "coverage", "measured_time", "estimated_time" ]
    
    # Perceived productivity requires more post-processing of the exit columns...
    # But we grab them just in case
    productivity_related = occursin("exit-space", c)
    
    github_related = occursin("gh", c)
    
    # Whether the developer was not interrupted
    uninterrupted = c == "exit-uninterrupted"
    
    return c in selected || productivity_related || uninterrupted    
end 

outcome_column (generic function with 1 method)

In [12]:
function task_1_selected_column(c)
    
    # Name of the dev, to merge with task2 data
    dev_name = c == "github"
    # id of the code
    id = c == "id"
    
    return id || dev_name || predictor_column(c) || outcome_column(c)
end

task_1_selected_column (generic function with 1 method)

In [13]:
function task_2_selected_column(c)
    
    dev_name = c == "github"
    
    # Whether the candidate is part of treatment (ai), or control (human)
    treatment = c == "treatment"
    
    # Predecessor (for merging)
    predecessor = c == "predecessor"
    
    return predecessor || dev_name || treatment || outcome_column(c)
end

task_2_selected_column (generic function with 1 method)

In [14]:
function select_columns(predicate, data)
    selected = filter(predicate, names(data))
    return data[!, selected]
end

select_columns (generic function with 1 method)

In [15]:
task_1_renames = Dict(:github => :dev1)

Dict{Symbol, Symbol} with 1 entry:
  :github => :dev1

In [16]:
task_1 = select_columns(task_1_selected_column, data_task_1) |> 
    df -> rename(df, task_1_renames);

In [17]:
task_1

Row,dev1,ai,codehealth,coverage,measured_time,estimated_time,entry-demo-4,entry-demo-5,entry-ai-1,entry-ai-2,entry-ai-3,entry-ai-4,entry-ai-5,entry-ai-6,entry-ai-7,entry-ai-8,entry-ai-9,entry-ai-10,exit-uninterrupted,exit-space-1,exit-space-2,exit-space-3,exit-space-4,exit-space-5,exit-space-6,exit-space-7,exit-space-8,exit-space-9,exit-space-10
Unnamed: 0_level_1,String,String7,Float64,Float64,String7,String7?,String15,String15,String3,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,String31?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?
1,anon126,True,8.34,0.74,3290,missing,Professional,Beginner,Yes,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0
2,anon094,True,8.5,0.71,6601,missing,Professional,Advanced,Yes,5.0,5.0,5.0,5.0,5.0,4.0,1.0,3.0,4.0,"Yes, but breaks",4.0,4.0,5.0,1.0,5.0,5.0,5.0,4.0,4.0,2.0
3,anon082,True,8.37,0.7,5327,missing,Professional,Advanced,Yes,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,4.0,4.0,4.0,4.0,5.0,4.0,4.0,5.0,5.0,2.0
4,anon126,True,8.88,0.91,462632,10800.0,Professional,Beginner,Yes,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,No,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0
5,anon009,True,8.22,0.7,4411980,10800.0,Student,Beginner,Yes,5.0,5.0,4.0,5.0,5.0,4.0,2.0,3.0,4.0,No,4.0,3.0,2.0,4.0,2.0,3.0,1.0,4.0,2.0,4.0
6,anon047,True,8.34,0.71,13201,10800.0,Professional,Advanced,Yes,5.0,4.0,4.0,2.0,5.0,3.0,2.0,2.0,3.0,"Yes, but breaks",5.0,4.0,4.0,2.0,5.0,4.0,2.0,4.0,5.0,1.0
7,anon139,True,8.48,0.83,92586,missing,Professional,Beginner,Yes,4.0,3.0,4.0,5.0,4.0,2.0,3.0,3.0,4.0,No,3.0,4.0,3.0,4.0,5.0,3.0,2.0,5.0,4.0,2.0
8,anon004,True,8.35,0.69,8024,missing,Professional,Intermediate,Yes,5.0,4.0,4.0,5.0,5.0,3.0,2.0,5.0,5.0,Yes,4.0,4.0,5.0,3.0,3.0,4.0,4.0,5.0,5.0,3.0
9,anon113,True,8.23,0.7,123318,25200.0,Professional,Advanced,Yes,4.0,4.0,2.0,4.0,2.0,4.0,2.0,4.0,2.0,Yes,4.0,3.0,3.0,3.0,4.0,3.0,3.0,4.0,3.0,4.0
10,anon122,True,8.16,0.7,86012,missing,Professional,Beginner,Yes,4.0,4.0,3.0,4.0,2.0,4.0,3.0,4.0,4.0,Yes,3.0,4.0,2.0,2.0,5.0,1.0,1.0,2.0,5.0,2.0


In [18]:
using ai_codev_study

In [19]:
task_1[!,:ai] = Commons.remap_scale(task_1[!,:ai], ["True", "False"], [true, false])
task_1[!,"entry-ai-1"] = Commons.remap_scale(task_1[!,"entry-ai-1"], ["Yes", "No"], [true, false]) 

76-element Vector{Bool}:
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 1
 ⋮
 1
 1
 0
 1
 1
 1
 1
 0
 0
 0
 1
 1

In [20]:
task_1

Row,dev1,ai,codehealth,coverage,measured_time,estimated_time,entry-demo-4,entry-demo-5,entry-ai-1,entry-ai-2,entry-ai-3,entry-ai-4,entry-ai-5,entry-ai-6,entry-ai-7,entry-ai-8,entry-ai-9,entry-ai-10,exit-uninterrupted,exit-space-1,exit-space-2,exit-space-3,exit-space-4,exit-space-5,exit-space-6,exit-space-7,exit-space-8,exit-space-9,exit-space-10
Unnamed: 0_level_1,String,Bool,Float64,Float64,String7,String7?,String15,String15,Bool,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,String31?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?
1,anon126,true,8.34,0.74,3290,missing,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0
2,anon094,true,8.5,0.71,6601,missing,Professional,Advanced,true,5.0,5.0,5.0,5.0,5.0,4.0,1.0,3.0,4.0,"Yes, but breaks",4.0,4.0,5.0,1.0,5.0,5.0,5.0,4.0,4.0,2.0
3,anon082,true,8.37,0.7,5327,missing,Professional,Advanced,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,4.0,4.0,4.0,4.0,5.0,4.0,4.0,5.0,5.0,2.0
4,anon126,true,8.88,0.91,462632,10800.0,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,No,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0
5,anon009,true,8.22,0.7,4411980,10800.0,Student,Beginner,true,5.0,5.0,4.0,5.0,5.0,4.0,2.0,3.0,4.0,No,4.0,3.0,2.0,4.0,2.0,3.0,1.0,4.0,2.0,4.0
6,anon047,true,8.34,0.71,13201,10800.0,Professional,Advanced,true,5.0,4.0,4.0,2.0,5.0,3.0,2.0,2.0,3.0,"Yes, but breaks",5.0,4.0,4.0,2.0,5.0,4.0,2.0,4.0,5.0,1.0
7,anon139,true,8.48,0.83,92586,missing,Professional,Beginner,true,4.0,3.0,4.0,5.0,4.0,2.0,3.0,3.0,4.0,No,3.0,4.0,3.0,4.0,5.0,3.0,2.0,5.0,4.0,2.0
8,anon004,true,8.35,0.69,8024,missing,Professional,Intermediate,true,5.0,4.0,4.0,5.0,5.0,3.0,2.0,5.0,5.0,Yes,4.0,4.0,5.0,3.0,3.0,4.0,4.0,5.0,5.0,3.0
9,anon113,true,8.23,0.7,123318,25200.0,Professional,Advanced,true,4.0,4.0,2.0,4.0,2.0,4.0,2.0,4.0,2.0,Yes,4.0,3.0,3.0,3.0,4.0,3.0,3.0,4.0,3.0,4.0
10,anon122,true,8.16,0.7,86012,missing,Professional,Beginner,true,4.0,4.0,3.0,4.0,2.0,4.0,3.0,4.0,4.0,Yes,3.0,4.0,2.0,2.0,5.0,1.0,1.0,2.0,5.0,2.0


Data from task 2 has a lot of missing rows (for some reason), so we remove that.

In [21]:
data_task_2_cleaned = dropmissing(data_task_2, :github);

In [22]:
task_2_renames = Dict(:github => :dev2)

Dict{Symbol, Symbol} with 1 entry:
  :github => :dev2

In [23]:
task_2 = select_columns(task_2_selected_column, data_task_2_cleaned) |>
    df -> rename(df, task_2_renames);

In [24]:
task_2

Row,dev2,predecessor,treatment,codehealth,coverage,measured_time,estimated_time,exit-uninterrupted,exit-space-1,exit-space-2,exit-space-3,exit-space-4,exit-space-5,exit-space-6,exit-space-7,exit-space-8,exit-space-9,exit-space-10
Unnamed: 0_level_1,String7,String7?,String7?,Float64?,Float64?,Float64?,Float64?,String15?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?
1,anon136,anon126,ai,8.42,0.73,4773.0,missing,Yes,4.0,4.0,1.0,5.0,5.0,4.0,1.0,2.0,3.0,1.0
2,anon106,anon126,ai,8.07,0.75,15671.0,missing,Yes,5.0,4.0,3.0,1.0,4.0,5.0,5.0,5.0,5.0,2.0
3,anon037,anon126,ai,8.43,0.73,6327.0,missing,Yes,5.0,4.0,5.0,4.0,4.0,4.0,4.0,5.0,4.0,4.0
4,anon005,anon094,ai,8.64,0.71,391708.0,5400.0,Yes,5.0,3.0,4.0,2.0,4.0,3.0,3.0,4.0,5.0,3.0
5,anon050,anon126,ai,8.84,0.9,94967.0,19800.0,No,5.0,5.0,5.0,2.0,5.0,4.0,5.0,5.0,5.0,5.0
6,anon143,anon047,ai,8.66,0.72,9746.0,9746.0,"Yes, but breaks",4.0,4.0,5.0,2.0,3.0,4.0,4.0,5.0,3.0,2.0
7,anon098,anon047,ai,8.62,0.71,93246.0,16200.0,No,5.0,5.0,4.0,2.0,5.0,3.0,5.0,5.0,5.0,2.0
8,anon027,anon139,ai,8.66,0.72,88591.0,14400.0,Yes,4.0,3.0,3.0,4.0,3.0,3.0,4.0,4.0,3.0,2.0
9,anon003,anon004,ai,8.32,0.69,8057.0,missing,Yes,5.0,4.0,5.0,2.0,4.0,4.0,3.0,5.0,4.0,1.0
10,anon052,anon118,ai,8.44,0.7,9269.0,missing,Yes,5.0,5.0,4.0,1.0,5.0,5.0,5.0,4.0,5.0,1.0


In [25]:
# Now, we merge the two tables. 
merged_data = rightjoin(task_1, task_2, on=:dev1=>:predecessor,
    renamecols=(c -> "task1.$c") => (c -> "task2.$c"))

Row,dev1,task1.ai,task1.codehealth,task1.coverage,task1.measured_time,task1.estimated_time,task1.entry-demo-4,task1.entry-demo-5,task1.entry-ai-1,task1.entry-ai-2,task1.entry-ai-3,task1.entry-ai-4,task1.entry-ai-5,task1.entry-ai-6,task1.entry-ai-7,task1.entry-ai-8,task1.entry-ai-9,task1.entry-ai-10,task1.exit-uninterrupted,task1.exit-space-1,task1.exit-space-2,task1.exit-space-3,task1.exit-space-4,task1.exit-space-5,task1.exit-space-6,task1.exit-space-7,task1.exit-space-8,task1.exit-space-9,task1.exit-space-10,task2.dev2,task2.treatment,task2.codehealth,task2.coverage,task2.measured_time,task2.estimated_time,task2.exit-uninterrupted,task2.exit-space-1,task2.exit-space-2,task2.exit-space-3,task2.exit-space-4,task2.exit-space-5,task2.exit-space-6,task2.exit-space-7,task2.exit-space-8,task2.exit-space-9,task2.exit-space-10
Unnamed: 0_level_1,String7?,Bool?,Float64?,Float64?,String7?,String7?,String15?,String15?,Bool?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,String31?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,String7,String7?,Float64?,Float64?,Float64?,Float64?,String15?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?
1,anon126,true,8.34,0.74,3290,missing,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon136,ai,8.42,0.73,4773.0,missing,Yes,4.0,4.0,1.0,5.0,5.0,4.0,1.0,2.0,3.0,1.0
2,anon126,true,8.34,0.74,3290,missing,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon106,ai,8.07,0.75,15671.0,missing,Yes,5.0,4.0,3.0,1.0,4.0,5.0,5.0,5.0,5.0,2.0
3,anon126,true,8.34,0.74,3290,missing,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon037,ai,8.43,0.73,6327.0,missing,Yes,5.0,4.0,5.0,4.0,4.0,4.0,4.0,5.0,4.0,4.0
4,anon126,true,8.34,0.74,3290,missing,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon050,ai,8.84,0.9,94967.0,19800.0,No,5.0,5.0,5.0,2.0,5.0,4.0,5.0,5.0,5.0,5.0
5,anon094,true,8.5,0.71,6601,missing,Professional,Advanced,true,5.0,5.0,5.0,5.0,5.0,4.0,1.0,3.0,4.0,"Yes, but breaks",4.0,4.0,5.0,1.0,5.0,5.0,5.0,4.0,4.0,2.0,anon005,ai,8.64,0.71,391708.0,5400.0,Yes,5.0,3.0,4.0,2.0,4.0,3.0,3.0,4.0,5.0,3.0
6,anon126,true,8.88,0.91,462632,10800.0,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,No,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon136,ai,8.42,0.73,4773.0,missing,Yes,4.0,4.0,1.0,5.0,5.0,4.0,1.0,2.0,3.0,1.0
7,anon126,true,8.88,0.91,462632,10800.0,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,No,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon106,ai,8.07,0.75,15671.0,missing,Yes,5.0,4.0,3.0,1.0,4.0,5.0,5.0,5.0,5.0,2.0
8,anon126,true,8.88,0.91,462632,10800.0,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,No,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon037,ai,8.43,0.73,6327.0,missing,Yes,5.0,4.0,5.0,4.0,4.0,4.0,4.0,5.0,4.0,4.0
9,anon126,true,8.88,0.91,462632,10800.0,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,No,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon050,ai,8.84,0.9,94967.0,19800.0,No,5.0,5.0,5.0,2.0,5.0,4.0,5.0,5.0,5.0,5.0
10,anon047,true,8.34,0.71,13201,10800.0,Professional,Advanced,true,5.0,4.0,4.0,2.0,5.0,3.0,2.0,2.0,3.0,"Yes, but breaks",5.0,4.0,4.0,2.0,5.0,4.0,2.0,4.0,5.0,1.0,anon143,ai,8.66,0.72,9746.0,9746.0,"Yes, but breaks",4.0,4.0,5.0,2.0,3.0,4.0,4.0,5.0,3.0,2.0


**IMPORTANT** We know some developers produced *several* solutions. Are they anonymized as *two* different developers? Or the same? 

(Apparently, only affects one developer).

In [26]:
combine(groupby(merged_data, :dev1), "task2.dev2" => length) |> 
    df -> sort(df, "task2.dev2_length")

Row,dev1,task2.dev2_length
Unnamed: 0_level_1,String7?,Int64
1,anon094,1
2,anon139,1
3,anon004,1
4,anon032,1
5,anon090,1
6,anon070,1
7,anon011,1
8,anon110,1
9,anon087,1
10,anon073,1


We see that for some dev1's there are several dev2 solutions.

# Sanity Checks

In [27]:
function sanity_check(r)
    if r["task1.ai"] r["task2.treatment"] == "ai" else r["task2.treatment"] == "human" end
end

sanity_check (generic function with 1 method)

In [28]:
filter(r -> !sanity_check(r), eachrow(merged_data))

Row,dev1,task1.ai,task1.codehealth,task1.coverage,task1.measured_time,task1.estimated_time,task1.entry-demo-4,task1.entry-demo-5,task1.entry-ai-1,task1.entry-ai-2,task1.entry-ai-3,task1.entry-ai-4,task1.entry-ai-5,task1.entry-ai-6,task1.entry-ai-7,task1.entry-ai-8,task1.entry-ai-9,task1.entry-ai-10,task1.exit-uninterrupted,task1.exit-space-1,task1.exit-space-2,task1.exit-space-3,task1.exit-space-4,task1.exit-space-5,task1.exit-space-6,task1.exit-space-7,task1.exit-space-8,task1.exit-space-9,task1.exit-space-10,task2.dev2,task2.treatment,task2.codehealth,task2.coverage,task2.measured_time,task2.estimated_time,task2.exit-uninterrupted,task2.exit-space-1,task2.exit-space-2,task2.exit-space-3,task2.exit-space-4,task2.exit-space-5,task2.exit-space-6,task2.exit-space-7,task2.exit-space-8,task2.exit-space-9,task2.exit-space-10
Unnamed: 0_level_1,String7?,Bool?,Float64?,Float64?,String7?,String7?,String15?,String15?,Bool?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,String31?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,String7,String7?,Float64?,Float64?,Float64?,Float64?,String15?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?


Works.

In [29]:
merged_data

Row,dev1,task1.ai,task1.codehealth,task1.coverage,task1.measured_time,task1.estimated_time,task1.entry-demo-4,task1.entry-demo-5,task1.entry-ai-1,task1.entry-ai-2,task1.entry-ai-3,task1.entry-ai-4,task1.entry-ai-5,task1.entry-ai-6,task1.entry-ai-7,task1.entry-ai-8,task1.entry-ai-9,task1.entry-ai-10,task1.exit-uninterrupted,task1.exit-space-1,task1.exit-space-2,task1.exit-space-3,task1.exit-space-4,task1.exit-space-5,task1.exit-space-6,task1.exit-space-7,task1.exit-space-8,task1.exit-space-9,task1.exit-space-10,task2.dev2,task2.treatment,task2.codehealth,task2.coverage,task2.measured_time,task2.estimated_time,task2.exit-uninterrupted,task2.exit-space-1,task2.exit-space-2,task2.exit-space-3,task2.exit-space-4,task2.exit-space-5,task2.exit-space-6,task2.exit-space-7,task2.exit-space-8,task2.exit-space-9,task2.exit-space-10
Unnamed: 0_level_1,String7?,Bool?,Float64?,Float64?,String7?,String7?,String15?,String15?,Bool?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,String31?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,String7,String7?,Float64?,Float64?,Float64?,Float64?,String15?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?
1,anon126,true,8.34,0.74,3290,missing,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon136,ai,8.42,0.73,4773.0,missing,Yes,4.0,4.0,1.0,5.0,5.0,4.0,1.0,2.0,3.0,1.0
2,anon126,true,8.34,0.74,3290,missing,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon106,ai,8.07,0.75,15671.0,missing,Yes,5.0,4.0,3.0,1.0,4.0,5.0,5.0,5.0,5.0,2.0
3,anon126,true,8.34,0.74,3290,missing,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon037,ai,8.43,0.73,6327.0,missing,Yes,5.0,4.0,5.0,4.0,4.0,4.0,4.0,5.0,4.0,4.0
4,anon126,true,8.34,0.74,3290,missing,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon050,ai,8.84,0.9,94967.0,19800.0,No,5.0,5.0,5.0,2.0,5.0,4.0,5.0,5.0,5.0,5.0
5,anon094,true,8.5,0.71,6601,missing,Professional,Advanced,true,5.0,5.0,5.0,5.0,5.0,4.0,1.0,3.0,4.0,"Yes, but breaks",4.0,4.0,5.0,1.0,5.0,5.0,5.0,4.0,4.0,2.0,anon005,ai,8.64,0.71,391708.0,5400.0,Yes,5.0,3.0,4.0,2.0,4.0,3.0,3.0,4.0,5.0,3.0
6,anon126,true,8.88,0.91,462632,10800.0,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,No,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon136,ai,8.42,0.73,4773.0,missing,Yes,4.0,4.0,1.0,5.0,5.0,4.0,1.0,2.0,3.0,1.0
7,anon126,true,8.88,0.91,462632,10800.0,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,No,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon106,ai,8.07,0.75,15671.0,missing,Yes,5.0,4.0,3.0,1.0,4.0,5.0,5.0,5.0,5.0,2.0
8,anon126,true,8.88,0.91,462632,10800.0,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,No,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon037,ai,8.43,0.73,6327.0,missing,Yes,5.0,4.0,5.0,4.0,4.0,4.0,4.0,5.0,4.0,4.0
9,anon126,true,8.88,0.91,462632,10800.0,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,No,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon050,ai,8.84,0.9,94967.0,19800.0,No,5.0,5.0,5.0,2.0,5.0,4.0,5.0,5.0,5.0,5.0
10,anon047,true,8.34,0.71,13201,10800.0,Professional,Advanced,true,5.0,4.0,4.0,2.0,5.0,3.0,2.0,2.0,3.0,"Yes, but breaks",5.0,4.0,4.0,2.0,5.0,4.0,2.0,4.0,5.0,1.0,anon143,ai,8.66,0.72,9746.0,9746.0,"Yes, but breaks",4.0,4.0,5.0,2.0,3.0,4.0,4.0,5.0,3.0,2.0


In [30]:
names(data_task_2_cleaned)

59-element Vector{String}:
 "github"
 "predecessor"
 "treatment"
 "codehealth"
 "codehealth_diff"
 "coverage"
 "coverage_diff"
 "test_runs"
 "measured_time"
 "estimated_time"
 "entry-demo-1"
 "entry-demo-2"
 "entry-demo-3"
 ⋮
 "gh-deletions"
 "gh-changes"
 "gh-changed-files"
 "gh-adds-unit-test"
 "gh-adds-behavioural-test"
 "gh-adds-functional"
 "gh-adds-sql"
 "gh-adds-logging"
 "gh-adds-exception-handling"
 "gh-adds-dependency"
 "clean_time"
 "pp_mean"

# AI experience

We calculate the AI experience based on answers from different questions

In [31]:
any(ismissing, merged_data[!, "task1.entry-ai-2"])

true

In [32]:
merged_data[!,"task1.ai_xp"] = map(Commons.ai_experience, merged_data[!,"task1.entry-ai-1"], 
                                                  merged_data[!,"task1.entry-ai-2"])

79-element Vector{Int64}:
 5
 5
 5
 5
 5
 5
 5
 5
 5
 5
 5
 4
 5
 ⋮
 1
 2
 2
 2
 2
 1
 1
 1
 3
 1
 1
 1

In [33]:
merged_data

Row,dev1,task1.ai,task1.codehealth,task1.coverage,task1.measured_time,task1.estimated_time,task1.entry-demo-4,task1.entry-demo-5,task1.entry-ai-1,task1.entry-ai-2,task1.entry-ai-3,task1.entry-ai-4,task1.entry-ai-5,task1.entry-ai-6,task1.entry-ai-7,task1.entry-ai-8,task1.entry-ai-9,task1.entry-ai-10,task1.exit-uninterrupted,task1.exit-space-1,task1.exit-space-2,task1.exit-space-3,task1.exit-space-4,task1.exit-space-5,task1.exit-space-6,task1.exit-space-7,task1.exit-space-8,task1.exit-space-9,task1.exit-space-10,task2.dev2,task2.treatment,task2.codehealth,task2.coverage,task2.measured_time,task2.estimated_time,task2.exit-uninterrupted,task2.exit-space-1,task2.exit-space-2,task2.exit-space-3,task2.exit-space-4,task2.exit-space-5,task2.exit-space-6,task2.exit-space-7,task2.exit-space-8,task2.exit-space-9,task2.exit-space-10,task1.ai_xp
Unnamed: 0_level_1,String7?,Bool?,Float64?,Float64?,String7?,String7?,String15?,String15?,Bool?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,String31?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,String7,String7?,Float64?,Float64?,Float64?,Float64?,String15?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Int64
1,anon126,true,8.34,0.74,3290,missing,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon136,ai,8.42,0.73,4773.0,missing,Yes,4.0,4.0,1.0,5.0,5.0,4.0,1.0,2.0,3.0,1.0,5
2,anon126,true,8.34,0.74,3290,missing,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon106,ai,8.07,0.75,15671.0,missing,Yes,5.0,4.0,3.0,1.0,4.0,5.0,5.0,5.0,5.0,2.0,5
3,anon126,true,8.34,0.74,3290,missing,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon037,ai,8.43,0.73,6327.0,missing,Yes,5.0,4.0,5.0,4.0,4.0,4.0,4.0,5.0,4.0,4.0,5
4,anon126,true,8.34,0.74,3290,missing,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon050,ai,8.84,0.9,94967.0,19800.0,No,5.0,5.0,5.0,2.0,5.0,4.0,5.0,5.0,5.0,5.0,5
5,anon094,true,8.5,0.71,6601,missing,Professional,Advanced,true,5.0,5.0,5.0,5.0,5.0,4.0,1.0,3.0,4.0,"Yes, but breaks",4.0,4.0,5.0,1.0,5.0,5.0,5.0,4.0,4.0,2.0,anon005,ai,8.64,0.71,391708.0,5400.0,Yes,5.0,3.0,4.0,2.0,4.0,3.0,3.0,4.0,5.0,3.0,5
6,anon126,true,8.88,0.91,462632,10800.0,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,No,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon136,ai,8.42,0.73,4773.0,missing,Yes,4.0,4.0,1.0,5.0,5.0,4.0,1.0,2.0,3.0,1.0,5
7,anon126,true,8.88,0.91,462632,10800.0,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,No,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon106,ai,8.07,0.75,15671.0,missing,Yes,5.0,4.0,3.0,1.0,4.0,5.0,5.0,5.0,5.0,2.0,5
8,anon126,true,8.88,0.91,462632,10800.0,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,No,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon037,ai,8.43,0.73,6327.0,missing,Yes,5.0,4.0,5.0,4.0,4.0,4.0,4.0,5.0,4.0,4.0,5
9,anon126,true,8.88,0.91,462632,10800.0,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,No,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon050,ai,8.84,0.9,94967.0,19800.0,No,5.0,5.0,5.0,2.0,5.0,4.0,5.0,5.0,5.0,5.0,5
10,anon047,true,8.34,0.71,13201,10800.0,Professional,Advanced,true,5.0,4.0,4.0,2.0,5.0,3.0,2.0,2.0,3.0,"Yes, but breaks",5.0,4.0,4.0,2.0,5.0,4.0,2.0,4.0,5.0,1.0,anon143,ai,8.66,0.72,9746.0,9746.0,"Yes, but breaks",4.0,4.0,5.0,2.0,3.0,4.0,4.0,5.0,3.0,2.0,5


In [34]:
# Check we don't have missing values
any(ismissing, merged_data[!,"task1.ai_xp"])

false

# AI preference

We need to aggregate some data to determine the AI skill of the developer.

The selected columns are the `entry-ai` columns, but `entry-ai-8` is on an inverted scale!

(I'm thinking we might not need this aggregation, because these are predictors, not outcomes...)

In [35]:
using ai_codev_study.Commons

In [36]:
map(flip_likert, 1:5)

5-element Vector{Int64}:
 5
 4
 3
 2
 1

In [37]:
function make_ai_pref(data)
    # Get AI-related columns
    # 3 to 10 are the questions about AI preference
    ai_related = ["task1.entry-ai-$n" for n in 3:10]
    
    ai_skill_columns = filter(c -> c in ai_related,
                              names(merged_data))
    
    ai_skill_table = merged_data[!,ai_skill_columns]
    
    # Flip the values for question 8, inverted scale!
    ai_skill_table[!,"task1.entry-ai-8"] = 
        map(flip_likert, ai_skill_table[!,"task1.entry-ai-8"])
    
    # Get the mean
    ai_skills = map(mean, eachrow(ai_skill_table))
    return ai_skills
end

make_ai_pref (generic function with 1 method)

In [38]:
merged_data[!,"task1.ai_pref"] = make_ai_pref(merged_data)
merged_data

Row,dev1,task1.ai,task1.codehealth,task1.coverage,task1.measured_time,task1.estimated_time,task1.entry-demo-4,task1.entry-demo-5,task1.entry-ai-1,task1.entry-ai-2,task1.entry-ai-3,task1.entry-ai-4,task1.entry-ai-5,task1.entry-ai-6,task1.entry-ai-7,task1.entry-ai-8,task1.entry-ai-9,task1.entry-ai-10,task1.exit-uninterrupted,task1.exit-space-1,task1.exit-space-2,task1.exit-space-3,task1.exit-space-4,task1.exit-space-5,task1.exit-space-6,task1.exit-space-7,task1.exit-space-8,task1.exit-space-9,task1.exit-space-10,task2.dev2,task2.treatment,task2.codehealth,task2.coverage,task2.measured_time,task2.estimated_time,task2.exit-uninterrupted,task2.exit-space-1,task2.exit-space-2,task2.exit-space-3,task2.exit-space-4,task2.exit-space-5,task2.exit-space-6,task2.exit-space-7,task2.exit-space-8,task2.exit-space-9,task2.exit-space-10,task1.ai_xp,task1.ai_pref
Unnamed: 0_level_1,String7?,Bool?,Float64?,Float64?,String7?,String7?,String15?,String15?,Bool?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,String31?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,String7,String7?,Float64?,Float64?,Float64?,Float64?,String15?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Int64,Float64?
1,anon126,true,8.34,0.74,3290,missing,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon136,ai,8.42,0.73,4773.0,missing,Yes,4.0,4.0,1.0,5.0,5.0,4.0,1.0,2.0,3.0,1.0,5,5.0
2,anon126,true,8.34,0.74,3290,missing,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon106,ai,8.07,0.75,15671.0,missing,Yes,5.0,4.0,3.0,1.0,4.0,5.0,5.0,5.0,5.0,2.0,5,5.0
3,anon126,true,8.34,0.74,3290,missing,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon037,ai,8.43,0.73,6327.0,missing,Yes,5.0,4.0,5.0,4.0,4.0,4.0,4.0,5.0,4.0,4.0,5,5.0
4,anon126,true,8.34,0.74,3290,missing,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon050,ai,8.84,0.9,94967.0,19800.0,No,5.0,5.0,5.0,2.0,5.0,4.0,5.0,5.0,5.0,5.0,5,5.0
5,anon094,true,8.5,0.71,6601,missing,Professional,Advanced,true,5.0,5.0,5.0,5.0,5.0,4.0,1.0,3.0,4.0,"Yes, but breaks",4.0,4.0,5.0,1.0,5.0,5.0,5.0,4.0,4.0,2.0,anon005,ai,8.64,0.71,391708.0,5400.0,Yes,5.0,3.0,4.0,2.0,4.0,3.0,3.0,4.0,5.0,3.0,5,4.5
6,anon126,true,8.88,0.91,462632,10800.0,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,No,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon136,ai,8.42,0.73,4773.0,missing,Yes,4.0,4.0,1.0,5.0,5.0,4.0,1.0,2.0,3.0,1.0,5,5.0
7,anon126,true,8.88,0.91,462632,10800.0,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,No,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon106,ai,8.07,0.75,15671.0,missing,Yes,5.0,4.0,3.0,1.0,4.0,5.0,5.0,5.0,5.0,2.0,5,5.0
8,anon126,true,8.88,0.91,462632,10800.0,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,No,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon037,ai,8.43,0.73,6327.0,missing,Yes,5.0,4.0,5.0,4.0,4.0,4.0,4.0,5.0,4.0,4.0,5,5.0
9,anon126,true,8.88,0.91,462632,10800.0,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,No,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon050,ai,8.84,0.9,94967.0,19800.0,No,5.0,5.0,5.0,2.0,5.0,4.0,5.0,5.0,5.0,5.0,5,5.0
10,anon047,true,8.34,0.71,13201,10800.0,Professional,Advanced,true,5.0,4.0,4.0,2.0,5.0,3.0,2.0,2.0,3.0,"Yes, but breaks",5.0,4.0,4.0,2.0,5.0,4.0,2.0,4.0,5.0,1.0,anon143,ai,8.66,0.72,9746.0,9746.0,"Yes, but breaks",4.0,4.0,5.0,2.0,3.0,4.0,4.0,5.0,3.0,2.0,5,3.375


In [39]:
any(ismissing, merged_data[!,"task1.ai_pref"])

true

There are missing values, we might want to use some imputation at some point.

# Saving the data

In [40]:
# We remove dashes in column names
merged_data = rename(s -> replace(s, "-" => "_"), merged_data)

Row,dev1,task1.ai,task1.codehealth,task1.coverage,task1.measured_time,task1.estimated_time,task1.entry_demo_4,task1.entry_demo_5,task1.entry_ai_1,task1.entry_ai_2,task1.entry_ai_3,task1.entry_ai_4,task1.entry_ai_5,task1.entry_ai_6,task1.entry_ai_7,task1.entry_ai_8,task1.entry_ai_9,task1.entry_ai_10,task1.exit_uninterrupted,task1.exit_space_1,task1.exit_space_2,task1.exit_space_3,task1.exit_space_4,task1.exit_space_5,task1.exit_space_6,task1.exit_space_7,task1.exit_space_8,task1.exit_space_9,task1.exit_space_10,task2.dev2,task2.treatment,task2.codehealth,task2.coverage,task2.measured_time,task2.estimated_time,task2.exit_uninterrupted,task2.exit_space_1,task2.exit_space_2,task2.exit_space_3,task2.exit_space_4,task2.exit_space_5,task2.exit_space_6,task2.exit_space_7,task2.exit_space_8,task2.exit_space_9,task2.exit_space_10,task1.ai_xp,task1.ai_pref
Unnamed: 0_level_1,String7?,Bool?,Float64?,Float64?,String7?,String7?,String15?,String15?,Bool?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,String31?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,String7,String7?,Float64?,Float64?,Float64?,Float64?,String15?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Float64?,Int64,Float64?
1,anon126,true,8.34,0.74,3290,missing,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon136,ai,8.42,0.73,4773.0,missing,Yes,4.0,4.0,1.0,5.0,5.0,4.0,1.0,2.0,3.0,1.0,5,5.0
2,anon126,true,8.34,0.74,3290,missing,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon106,ai,8.07,0.75,15671.0,missing,Yes,5.0,4.0,3.0,1.0,4.0,5.0,5.0,5.0,5.0,2.0,5,5.0
3,anon126,true,8.34,0.74,3290,missing,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon037,ai,8.43,0.73,6327.0,missing,Yes,5.0,4.0,5.0,4.0,4.0,4.0,4.0,5.0,4.0,4.0,5,5.0
4,anon126,true,8.34,0.74,3290,missing,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,Yes,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon050,ai,8.84,0.9,94967.0,19800.0,No,5.0,5.0,5.0,2.0,5.0,4.0,5.0,5.0,5.0,5.0,5,5.0
5,anon094,true,8.5,0.71,6601,missing,Professional,Advanced,true,5.0,5.0,5.0,5.0,5.0,4.0,1.0,3.0,4.0,"Yes, but breaks",4.0,4.0,5.0,1.0,5.0,5.0,5.0,4.0,4.0,2.0,anon005,ai,8.64,0.71,391708.0,5400.0,Yes,5.0,3.0,4.0,2.0,4.0,3.0,3.0,4.0,5.0,3.0,5,4.5
6,anon126,true,8.88,0.91,462632,10800.0,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,No,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon136,ai,8.42,0.73,4773.0,missing,Yes,4.0,4.0,1.0,5.0,5.0,4.0,1.0,2.0,3.0,1.0,5,5.0
7,anon126,true,8.88,0.91,462632,10800.0,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,No,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon106,ai,8.07,0.75,15671.0,missing,Yes,5.0,4.0,3.0,1.0,4.0,5.0,5.0,5.0,5.0,2.0,5,5.0
8,anon126,true,8.88,0.91,462632,10800.0,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,No,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon037,ai,8.43,0.73,6327.0,missing,Yes,5.0,4.0,5.0,4.0,4.0,4.0,4.0,5.0,4.0,4.0,5,5.0
9,anon126,true,8.88,0.91,462632,10800.0,Professional,Beginner,true,5.0,5.0,5.0,5.0,5.0,5.0,1.0,5.0,5.0,No,5.0,5.0,5.0,1.0,5.0,5.0,5.0,5.0,5.0,1.0,anon050,ai,8.84,0.9,94967.0,19800.0,No,5.0,5.0,5.0,2.0,5.0,4.0,5.0,5.0,5.0,5.0,5,5.0
10,anon047,true,8.34,0.71,13201,10800.0,Professional,Advanced,true,5.0,4.0,4.0,2.0,5.0,3.0,2.0,2.0,3.0,"Yes, but breaks",5.0,4.0,4.0,2.0,5.0,4.0,2.0,4.0,5.0,1.0,anon143,ai,8.66,0.72,9746.0,9746.0,"Yes, but breaks",4.0,4.0,5.0,2.0,3.0,4.0,4.0,5.0,3.0,2.0,5,3.375


In [41]:
# Save the merged data to a file
CSV.write("../../data/tasks_merged.csv",merged_data)

"../../data/tasks_merged.csv"