<a href="https://colab.research.google.com/github/ZhijiaoGao/Programming-Course/blob/main/Rweek1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [31]:
# Load required packages
library(dplyr)
library(readr)
#library(scale)

# Load the nutrient data
foods <- read.csv("https://raw.githubusercontent.com/CaitlinLloyd/Psychology_Programming2025/refs/heads/main/Data/nutrient_info.csv")

# Standardize relevant nutrients
nutrients_scaled <- scale(foods %>% dplyr::select(Fat_g, CHO_g, PRO_g, Energy_Density))
foods <- bind_cols(foods, as.data.frame(nutrients_scaled))
colnames(foods)[(ncol(foods)-3):ncol(foods)] <- c("Fat_s", "CHO_s", "PRO_s", "ED_s")

# Set up parameters
n_participants <- 20

# Function to simulate ratings for one participant
simulate_participant <- function(pid, foods_df) {
  n_foods <- nrow(foods_df)

  health_rating <- round(3 + (-0.6 * foods_df$Fat_s + 0.4 * foods_df$PRO_s +
                              0.3 * foods_df$CHO_s + rnorm(n_foods, 0, 0.5)))
  health_rating <- pmin(pmax(health_rating, 1), 5)

  taste_rating <- round(3 + (0.6 * foods_df$Fat_s + 0.4 * foods_df$ED_s +
                             rnorm(n_foods, 0, 0.5)))
  taste_rating <- pmin(pmax(taste_rating, 1), 5)

  choice <- round(3 + 0.5 * (taste_rating - 3) + 0.3 * (health_rating - 3) +
                    rnorm(n_foods, 0, 0.7))
  choice <- pmin(pmax(choice, 1), 5)

  # Reaction time: faster (closer to 1s) for more extreme choices
  preference_strength <- abs(choice - 3)
  reaction_time <- round(runif(n_foods, 3, 5) - 0.5 * preference_strength + rnorm(n_foods, 0, 0.2), 2)
  reaction_time <- pmax(pmin(reaction_time, 5), 1)

  df <- data.frame(
    participant = pid,
    stimulus = foods_df$stimulus,
    Fat_g = foods_df$Fat_g,
    CHO_g = foods_df$CHO_g,
    PRO_g = foods_df$PRO_g,
    Energy_Density = foods_df$Energy_Density,
    health_rating = health_rating,
    taste_rating = taste_rating,
    choice = choice,
    reaction_time = reaction_time
  )

  # Introduce missing values
  n_missing <- round(0.05 * n_foods)
  miss_idx <- sample(1:n_foods, n_missing)
  df$choice[miss_idx] <- NA
  df$reaction_time[miss_idx] <- NA

  return(df)
}

simulated_data <- bind_rows(lapply(1:n_participants, simulate_participant, foods_df = foods))

[1m[22mNew names:
[36m•[39m `CHO_g` -> `CHO_g...5`
[36m•[39m `PRO_g` -> `PRO_g...6`
[36m•[39m `Fat_g` -> `Fat_g...7`
[36m•[39m `Energy_Density` -> `Energy_Density...11`
[36m•[39m `...13` -> `...12`
[36m•[39m `Fat_g` -> `Fat_g...14`
[36m•[39m `CHO_g` -> `CHO_g...15`
[36m•[39m `PRO_g` -> `PRO_g...16`
[36m•[39m `Energy_Density` -> `Energy_Density...17`


In [32]:
#Now access the food column (hint, use $)
simulated_data$stimulus
#And the RT
simulated_data$reaction_time

mean(simulated_data$reaction_time, na.rm = TRUE)
sd(simulated_data$reaction_time, na.rm = TRUE)

In [33]:
#Remove missing values
simulated_data_complete <- simulated_data[complete.cases(simulated_data),]

In [34]:
simulated_data_complete <- subset(simulated_data,!is.na(simulated_data$choice))

In [35]:
#Summarize health for one person
# Filter for one participant (e.g., participant 1)
participant_1_data <- simulated_data %>% filter(participant == 1)

# Summary statistics for health rating
mean(participant_1_data$health_rating)

#print this value

print(paste0("the average health rating for participant one is: ",mean(participant_1_data$health_rating)))

[1] "the average health rating for participant one is: 2.97101449275362"


In [36]:
for (pt in c(unique(simulated_data$participant))) {
participant_data <- simulated_data %>% filter(participant == pt)

# Summary statistics for health rating
mean(participant_data$health_rating)

#print this value

print(paste0("the average health rating for participant ",pt, " is: ",mean(participant_data$health_rating)))
}

[1] "the average health rating for participant 1 is: 2.97101449275362"
[1] "the average health rating for participant 2 is: 3.03623188405797"
[1] "the average health rating for participant 3 is: 3.03623188405797"
[1] "the average health rating for participant 4 is: 2.97826086956522"
[1] "the average health rating for participant 5 is: 3.04347826086957"
[1] "the average health rating for participant 6 is: 2.90579710144928"
[1] "the average health rating for participant 7 is: 2.98550724637681"
[1] "the average health rating for participant 8 is: 3.01449275362319"
[1] "the average health rating for participant 9 is: 3.05072463768116"
[1] "the average health rating for participant 10 is: 3"
[1] "the average health rating for participant 11 is: 3.06521739130435"
[1] "the average health rating for participant 12 is: 3.02173913043478"
[1] "the average health rating for participant 13 is: 2.93478260869565"
[1] "the average health rating for participant 14 is: 2.98550724637681"
[1] "the average

In [41]:
#Now lets merge the rating file with the info file
simulated_data_complete <- merge(simulated_data_complete,foods,by.x="stimulus",by.y="stimulus")

In [55]:
mean

In [53]:
simulated_data_complete$reaction_time

In [54]:
 simulated_data_complete %>% dplyr::group_by(participant,HI_LO_fat) %>% dplyr::summarize_at(c('choice',"reaction_time"),c(sd))

participant,HI_LO_fat,choice,reaction_time
<int>,<int>,<dbl>,<dbl>
1,0,0.7495381,0.7420552
1,1,0.8196748,0.6413005
2,0,0.7974811,0.6281066
2,1,0.9654256,0.7122748
3,0,0.7788224,0.6578936
3,1,0.8624305,0.5500214
4,0,0.8390076,0.6664349
4,1,0.7465769,0.659928
5,0,0.8662047,0.75312
5,1,0.8639723,0.7367612


In [48]:
summary <- simulated_data_complete %>% dplyr::group_by(participant,HI_LO_fat) %>% summarize_at(c('choice',"reaction_time"),c(mean))

#replace the 0 1 values with high and low fat
summary$HI_LO_fat <- ifelse(summary$HI_LO_fat==0,"low","high")

# what are other ways we can do this?

In [None]:
#pivot summary frame to wide
#now lets group by fat content and summarize for each participant
wide <- summary  %>% pivot_wider(1,names_from = "HI_LO_fat",values_from = c("choice","reaction_time"))

# let's compare the average choice ratings for each participant for high and low-fat foods

t.test(wide$choice_low,wide$choice_high)

In [None]:
# OPTIONAL: Here try and simulate a different dataset - a monetary choice task where the participant
# selects between an immediate vs delayed reward. Compare the RT between when the participant chooses the immediate vs delayed option

In [None]:
#Now load in a dataset we created
dd <- read.csv("https://raw.githubusercontent.com/CaitlinLloyd/Psychology_Programming2025/refs/heads/main/Data/DelayDisc_example.csv")

In [None]:
#Load in the Delay Discounting dataset
dd <- read.csv("https://raw.githubusercontent.com/CaitlinLloyd/Psychology_Programming2025/refs/heads/main/Data/DelayDisc_example.csv")
##HOMEWORK
#Use if statements to figure out which is delayed option
# Summarize the RT for each person when they chose delayed vs chose sooner reward
# Calculate the average earnings per person (average value of choices across participants) and the number of times they chose delayed vs sooner (you will need group_by and count functions)
# Copy and paste the code into canvas
