In [1]:
library(tidyverse)

"package 'ggplot2' was built under R version 4.3.3"
── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.3     [32m✔[39m [34mreadr    [39m 2.1.4
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.0
[32m✔[39m [34mggplot2  [39m 3.5.1     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.2     [32m✔[39m [34mtidyr    [39m 1.3.0
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors


# Creating grade-like data
This simulated file contains grade data like one could get from the eLC gradebook. Each student is given a single row with each exam as a separate column.

In [177]:
data1 = data.frame(id= 1:100)
data2 = data.frame(id = 1:100)

set.seed(42)
data1 <- data1 %>%
        mutate(group="group1",
            exam1 = round(rnorm(100, 85, 5),0),
              exam2 = exam1 + round(rnorm(100, 5, 1),0),
               exam3 = round((exam1+ exam2)/2 + rnorm(100, 5, 1),0),
              across(c(exam1, exam2, exam3), ~ if_else(. >100, 100, .))

              )

set.seed(1785)
data2 <- data2 %>%
        mutate(group = "group2",
            exam1 = round(rnorm(100, 82, 4),0),
              exam2 = exam1 + round(rnorm(100, 4, 1),0),
               exam3 = round((exam1+ exam2)/2 + rnorm(100, 4, 1),0),
              across(c(exam1, exam2, exam3), ~ if_else(. >100, 100, .))
              )

data <- rbind.data.frame(data1,data2)

set.seed(1)
data <- data %>%
            slice_sample(n=nrow(data)) %>%
            mutate(id = 1000 + 1:nrow(data))

In [179]:
head(data, 10)

Unnamed: 0_level_0,id,group,exam1,exam2,exam3
Unnamed: 0_level_1,<dbl>,<chr>,<dbl>,<dbl>,<dbl>
1,1001,group1,90,95,97
2,1002,group2,77,80,81
3,1003,group2,80,84,86
4,1004,group2,89,93,95
5,1005,group1,89,94,98
6,1006,group1,84,88,91
7,1007,group2,83,87,89
8,1008,group1,87,92,93
9,1009,group1,79,84,87
10,1010,group1,83,87,90


In [181]:
write.csv(data, file="exam_data.csv", row.names=FALSE)

## With missing data

In [225]:
set.seed(123)

missing_rows <- sample(1:200, 5, replace=FALSE)
missing_columns1 <- sample(1:200, 5, replace=FALSE)
missing_columns2<- sample(1:200, 5, replace=FALSE)
missing_columns3 <- sample(1:200, 5, replace=FALSE)

data1m<-data %>% 
            mutate(
                    Row_number = row_number(),
                    across(c(exam1, exam2, exam3), ~if_else(Row_number %in% missing_rows, NA_real_, .)),
                    exam1= if_else(Row_number %in% missing_columns1, NA_real_, exam1),
                    exam2= if_else(Row_number %in% missing_columns2, NA_real_, exam2),
                    exam3= if_else(Row_number %in% missing_columns3, NA_real_, exam3)
                  ) %>%
            select(-Row_number)
            

In [227]:
write.csv(data1m, file="exam_data_missing.csv", row.names=FALSE)

# Test question data

In [142]:
data3 = data.frame(id= 100:199)
data4 = data.frame(id = 200:299)

In [170]:
set.seed(42)

data3 <- data3 %>%
                mutate(section = "morning",
                       Q1 = sample(c(0,1), size=100, replace=TRUE),
                       Q2 = sample(c(0,1), size=100, replace=TRUE),
                       Q3 = sample(c(0,1), size=100, replace=TRUE, prob = c(0.4,.6)),
                       Q4 = sample(c(0,1), size=100, replace=TRUE, prob = c(0.8,.2)),
        
                       )

write.csv(data3, file="morning_exam.csv", row.names=FALSE)

In [172]:
set.seed(1785)
data4 <- data4 %>%
                mutate(section= "afternoon",
                       Q1 = sample(c(0,1), size=100, replace=TRUE),
                       Q2 = sample(c(0,1), size=100, replace=TRUE, prob = c(0.36,.64)),
                       Q3 = sample(c("A","B", "C", "D"), size=100, replace=TRUE, prob = c(0.1, 0.5, 0.25, 0.15)),
                       Q4 = sample(c(0,1), size=100, replace=TRUE, prob = c(0.82,.18)),
        
                       )

write.csv(data4, file="afternoon_exam.csv", row.names=FALSE)

In [174]:
set.seed(1776)
demographics <- data.frame(id = 100:299) %>%
                    mutate(group1 = sample(c(0, 1), size = 200, replace=TRUE, prob=c(0.75, 0.25)),
                           group2 = sample(c(0, 1), size = 200, replace=TRUE)

                           )

write.csv(demographics, file="class_demographics.csv", row.names=FALSE)