# Simulated Foraging Scenario - Single Transect v2

### This notebook describes expert geoscientists behavior in a simulated data foraging scenario. In the scenario, participants were asked to evaluate a hypothesis along a single environmental transect by using a mobile robot to collect environmental data. At any point, participants were able to stop the robot and change their data collection strategy or make a conclusion about the hypothesis.
#### *The scenario was web-based and individual participant data was saved in a json file format. First, we extract relevant indices from json files for visualization and analysis*

In [2]:
# install neccessary packages and load libraries
install.packages (c("tidyverse", "jsonlite", "data.table"))
library ("tidyverse")
library ("jsonlite")
library ("data.table")

"packages 'tidyverse', 'jsonlite', 'data.table' are in use and will not be installed"


In [3]:
# create ID lists and length variables
ID.list.1 <- c("14432228.json", "15240948.json", "22375511.json", "23299149.json", "28074220.json", 
             "42776583.json", "45431255.json", "58784411.json", "71319116.json", "77520674.json", 
             "82399189.json", "82839541.json", "83386066.json", "90086100.json", "1583193385731.json")
ID.list.2 <- c("1585589311666.json","1585589948025.json", "1585676430169.json", "1585752031323.json",
             "1585849879621.json", "1586969203252.json", "1587010200660.json", "1587094272760.json", 
             "1589205081687.json", "1589986003069.json", "1590092884324.json", "1590520586325.json",
             "1591008806675.json", "1591026198006.json", "1591223988481.json", "1591276928709.json",
             "1591813810309.json", "1592392156413.json", "1592855181400.json", "1592855247619.json",
             "1592855262696.json", "1592855880739.json", "1592856791709.json", "1592856888631.json",
             "1592858044241.json", "1592860543761.json", "1592861825629.json", "1592861857839.json",
             "1592862833109.json", "1592865277247.json", "1592866257663.json", "1592866998514.json",
             "1592867506102.json", "1592869775947.json", "1592870684331.json", "1592881202024.json", 
             "1592894931507.json", "1592895912483.json", "1592899206181.json", "1592899663788.json",
             "1592910272301.json", "1592911065663.json", "1592917572936.json", "1592919636883.json",
             "1592926425780.json", "1592932009725.json", "1592933989451.json", "1592934462741.json",
             "1592938994367.json", "1592968715097.json", "1593003430648.json", "1593029401575.json",
             "1593030276249.json", "1593168428882.json", "1593203495508.json", "1593241530097.json",
             "1593427899378.json", "1593535171178.json", "1593535853051.json", "1593536206896.json",
             "1593537539734.json", "1593537767510.json", "1593538821131.json", "1593539448066.json",
             "1593541194718.json", "1593541516413.json", "1593542909385.json", "1593543098394.json",
             "1593543133450.json", "1593545669570.json", "1593547336844.json", "1593547951118.json", 
             "1593548058969.json", "1593550103153.json", "1593550674370.json", "1593583822482.json",
             "1593589653022.json", "1593590736190.json", "1593596427201.json", "1593712260273.json", 
             "1593765834467.json", "1593781123178.json", "1594214792295.json", "1594390983351.json", 
             "1594667245382.json", "1594830490636.json", "1594942395526.json", "1595191404495.json", 
             "1595358098258.json", "1595498691209.json", "1595505290181.json", "1595515252990.json", 
             "1595516644018.json", "1595521002079.json", "1595521790031.json", "1595613604129.json", 
             "1595702725300.json", "1595957562195.json", "1596256377223.json", "1596671816427.json", 
             "1597503133759.json")

ID.list <- c (ID.list.1, ID.list.2)
IDlength1 <- length(ID.list.1)
IDlength2 <- length(ID.list.2)
IDlength <- length(ID.list)

# create data frames to read to
DataRaw <- data.frame(ID = character(IDlength),
                      Conclusion = character(IDlength),
                      ConclusionConfidence = character(IDlength),
                      ConclusionACC = integer(IDlength),
                      MagicNumber.SelfReport = integer(IDlength),
                      EqualSpacing.SelfReport = integer(IDlength),
                      SupportingPlot.33dataACC = integer(IDlength),
                      SupportingPlot.33data.Confidence = character(IDlength),
                      RefutingPlot.33dataACC = integer(IDlength),
                      RefutingPlot.33data.Confidence = character(IDlength),
                      SupportingPlot.220dataACC = integer(IDlength),
                      SupportingPlot.220data.Confidence = character(IDlength),
                      RefutingPlot.220dataACC = integer(IDlength),
                      RefutingPlot.220data.Confidence = character(IDlength),
                      Age = integer(IDlength), 
                      Gender = character(IDlength), 
                      stringsAsFactors=FALSE)

tempRaw.1 <- data.frame(ID = character(IDlength1),
                        Version = integer(IDlength1),
                        Familiar = integer(IDlength1),
                        YearsPractice = integer(IDlength1),
                        AcademicPosition = character(IDlength1),
                        GeoIdentity = character(IDlength1),
                        GeoSpecialization = character(IDlength1), 
                        stringsAsFactors=FALSE)

tempRaw.2 <- data.frame(ID = character(IDlength2),
                        Version = integer(IDlength2),
                        Bachelors = integer(IDlength2),
                        YearsUG = character(IDlength2),
                        MajorUG = character(IDlength2),
                        MinorUG = character(IDlength2),
                        YearsPractice = integer(IDlength2),
                        Familiar = integer(IDlength2),
                        AcademicPosition = character(IDlength2),
                        GeoIdentity = character(IDlength2),
                        GeoSpecialization = character(IDlength2),
                        stringsAsFactors=FALSE)

DataQual <- data.frame(ID = character(IDlength),
                       How.Improve.Confidence = character(IDlength),
                       Why.MagicNumber = character(IDlength),
                       Why.EqualSpacing = character(IDlength),              
                       stringsAsFactors=FALSE)

tempQual.1 <- data.frame(ID = character(IDlength1),
                         Version = integer(IDlength1),
                         How.Familiar = character(IDlength1),
                         stringsAsFactors=FALSE)

tempQual.2 <- data.frame(ID = character(IDlength2),
                         Version = integer(IDlength2),
                         How.Familiar = character(IDlength2),
                         stringsAsFactors=FALSE)

In [4]:
# loops through ID.list and loads json files 
i <- 1
for (i in 1:IDlength) { 
    #json file of corresponding ID is opened
    SubData <- fromJSON (ID.list[i])
    #print (ID.list[i]) #print current ID for debugging purposes
    
    #data from json is read to new data frame
    DataRaw$ID[i] <- ID.list[i]
    DataQual$ID[i] <- ID.list[i]
    DataRaw$Conclusion[i] <- ifelse(SubData$concludeQuestions$support == "Yes", "Hypothesis Supported", "Hypothesis Not Supported")
    DataRaw$ConclusionConfidence[i] <- SubData$concludeQuestions$confidence
    DataQual$How.Improve.Confidence[i] <- SubData$concludeQuestions$improve
    DataRaw$ConclusionACC[i] <- ifelse(SubData$isAlternativeHypo == TRUE & SubData$concludeQuestions$support == "No", 1, 
                                         ifelse(SubData$isAlternativeHypo == FALSE & SubData$concludeQuestions$support == "Yes", 1, 0))
    DataRaw$MagicNumber.SelfReport [i] <- ifelse(SubData$form$consistent == "Yes", 1, 0)
    DataRaw$EqualSpacing.SelfReport [i] <- ifelse(SubData$form$`evenly-space` == "Yes", 1, 0)
    DataQual$Why.MagicNumber[i] <- SubData$form$`consistent-reason`
    DataQual$Why.EqualSpacing[i] <- SubData$form$`evenly-space-reason`
    DataRaw$SupportingPlot.33dataACC[i] <- ifelse(SubData$form$`p1-support` == "Yes", 1, 0)
    DataRaw$SupportingPlot.33data.Confidence[i] <- SubData$form$`p1-confidence`
    DataRaw$RefutingPlot.33dataACC[i] <- ifelse(SubData$form$`p2-support` == "Yes", 0, 1)
    DataRaw$RefutingPlot.33data.Confidence[i] <- SubData$form$`p2-confidence`
    DataRaw$SupportingPlot.220dataACC[i] <- ifelse(SubData$form$`p3-support` == "Yes", 1, 0)
    DataRaw$SupportingPlot.220data.Confidence[i] <- SubData$form$`p3-confidence`
    DataRaw$RefutingPlot.220dataACC[i] <- ifelse(SubData$form$`p4-support` == "Yes", 0, 1)
    DataRaw$RefutingPlot.220data.Confidence[i] <- SubData$form$`p4-confidence`
    DataRaw$Age[i] <- SubData$form$age 
    DataRaw$Gender[i] <- ifelse(SubData$form$gender$value == "Other", SubData$form$gender$reason, SubData$form$gender$value)
}

#DataRaw # print data frames for debugging purposes
#DataQual

# loops through ID.list.1 and loads json files 
i <- 1
for (i in 1:IDlength1) { 
    #json file of corresponding ID is opened
    SubData <- fromJSON (ID.list.1[i])
    #print (ID.list.1[i]) #print current ID for debugging purposes
    
    #data from json is read to new data frame
    tempRaw.1$ID[i] <- ID.list.1[i]
    tempQual.1$ID[i] <- ID.list.1[i]
    tempRaw.1$Version[i] <- 1
    tempQual.1$Version[i] <- 1
    tempRaw.1$Familiar[i] <- ifelse(SubData$form$familiarity$value == "I am not familiar with any features",0,1)
    tempQual.1$How.Familiar[i] <- ifelse(tempRaw.1$Familiar[i] == 1, SubData$form$familiarity$reason, "")
    tempRaw.1$YearsPractice[i] <- SubData$form$`years-of-practice`
    tempRaw.1$AcademicPosition[i] <- SubData$form$`academia-position`
    tempRaw.1$GeoIdentity[i] <- ifelse(SubData$form$identification$value == "Other", SubData$form$identification$reason, SubData$form$identification$value)
    tempRaw.1$GeoSpecialization[i] <- SubData$form$`geoscience-specialization` 
}

#tempRaw.1 # print data frame for debugging purposes
#tempQual.1

# loops through ID.list.2 and loads json files 
i <- 1
for (i in 1:IDlength2) {
    #json file of corresponding ID is opened
    SubData <- fromJSON (ID.list.2[i])
    #print (ID.list.2[i]) #print current ID for debugging purposes
        
    #data from json is read to new data frame
    tempRaw.2$ID[i] <- ID.list.2[i]
    tempQual.2$ID[i] <- ID.list.2[i]
    tempRaw.2$Version[i] <- 2
    tempQual.2$Version[i] <- 2
    tempRaw.2$Bachelors[i] <- ifelse(SubData$form$`if-bachelor` == "Yes", 1, 0)
    
    if (tempRaw.2$Bachelors[i] == 0){ #if undergrad participant
        tempRaw.2$YearsUG[i] <- SubData$form$`years-of-undergraduate`
        tempRaw.2$MajorUG[i] <- SubData$form$`undergraduate-major`
        tempRaw.2$MinorUG[i] <- SubData$form$`undergraduate-minor`
    } else { #if expert participant
        tempRaw.2$YearsPractice[i] <- SubData$form$`years-of-practice`
        if (SubData$form$`geoscientist-or-psychologist`$value == "Geoscientist"){
            tempRaw.2$Familiar[i] <- ifelse(SubData$form$familiarity$value == "I am not familiar with any features",0,1)
            tempQual.2$How.Familiar[i] <- ifelse(tempRaw.2$Familiar[i] == 1, SubData$form$familiarity$reason, "")
            tempRaw.2$AcademicPosition[i] <- ifelse(SubData$form$`academia-or-industry`$value == "Academia", SubData$form$`academia-position`,
                                                    ifelse(SubData$form$`academia-or-industry`$value == "Industry", "Industry", SubData$form$`academia-or-industry`$reason))
            tempRaw.2$GeoIdentity[i] <- ifelse(SubData$form$identification$value == "Other", SubData$form$identification$reason, SubData$form$identification$value)
            tempRaw.2$GeoSpecialization[i] <- SubData$form$`geoscience-specialization` 
            }
    }
}

#tempRaw.2 # print data frame for debugging purposes
#tempQual.2

In [5]:
# combine dataframes into final version
temp <- full_join(tempRaw.1, tempRaw.2, by = c("ID", "Version", "Familiar", "GeoIdentity"))
DataFinal <- full_join(DataRaw, temp, by = "ID")
temp <- full_join(tempQual.1, tempQual.2, by = c("ID", "Version", "How.Familiar"))
QualFinal <- full_join(DataQual, temp, by = "ID")

# remove repeat variables
DataFinal$YearsPractice <- ifelse(DataFinal$Version == 1, DataFinal$YearsPractice.x, DataFinal$YearsPractice.y)
DataFinal$GeoSpecialization <- ifelse (DataFinal$Version == 1, DataFinal$GeoSpecialization.x, DataFinal$GeoSpecialization.y)
DataFinal$AcademicPosition <- ifelse (DataFinal$Version == 1, DataFinal$AcademicPosition.x, DataFinal$AcademicPosition.y)
DataFinal <- subset(DataFinal, select = -c(YearsPractice.x, YearsPractice.y, GeoSpecialization.x, GeoSpecialization.y,
                                         AcademicPosition.x, AcademicPosition.y))
QualFinal <- subset(QualFinal, select = -Version)

# remove NA and NULL cell values
DataFinal$Bachelors <- ifelse(is.na(DataFinal$Bachelors), 1, DataFinal$Bachelors)
DataFinal$YearsUG <- ifelse (DataFinal$Version == 1, "", DataFinal$YearsUG)
DataFinal$MajorUG <- ifelse (DataFinal$Version == 1, "", DataFinal$MajorUG)
DataFinal$MinorUG <- ifelse (DataFinal$Version == 1, "", DataFinal$MinorUG)

# write final dataframe to files
fwrite(DataFinal, file="DataRaw.csv")
fwrite(QualFinal, file="DataQual.csv")

#DataFinal # print data frame for debugging purposes
#QualFinal

In [None]:
#Yuanfeng code to deal with "rows" behavioral data in json files

#### *Next...*