diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..cb8f050 --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,22 @@ +Package: DAKS +Type: Package +Title: Data Analysis and Knowledge Spaces +Version: 1.0-0 +Date: 2009-02-26 +Author: Anatol Sargin , + Ali Uenlue +Maintainer: Anatol Sargin , + Ali Uenlue +Description: Functions and example datasets for the psychometric + theory of knowledge spaces. This package implements data + analysis methods and procedures for simulating data and + transforming different formulations in knowledge space + theory. +Depends: R (>= 2.8.1), relations, sets +Suggests: Rgraphviz +LazyLoad: yes +LazyData: yes +License: GPL (>= 2) +URL: http://stats.math.uni-augsburg.de/mitarbeiter/sargin/, + http://www.math.uni-augsburg.de/~uenlueal/ +Packaged: Thu Feb 26 14:42:35 2009; uenlueal diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..b8444ad --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,8 @@ +export(corr_iita, hasse, iita, imp2state, ind_gen, mini_iita, + ob_counter, orig_iita, pattern, pop_iita, pop_variance, simu, + state2imp, variance) + +# internal functions; not intended to be visible to users +# export() + +import(graphics, relations, sets) diff --git a/R/corr_iita.r b/R/corr_iita.r new file mode 100644 index 0000000..de72eec --- /dev/null +++ b/R/corr_iita.r @@ -0,0 +1,48 @@ +################## +# corrected IITA # +################## + +############################################## +# # +# This function performs the corrected # +# inductive item tree analysis procedure # +# and returns the corresponding diff values. # +# # +############################################## + +corr_iita<-function(dataset, A){ +b<-ob_counter(dataset) +m<-ncol(dataset) +n<-nrow(dataset) + +bs_neu<-list() +for(i in 1:length(A)){ +bs_neu[[i]]<-matrix(0,ncol = m, nrow = m) +} +diff_value_neu<-rep(0,length(A)) + +error_neu<-rep(0,length(A)) + +#computation of error rate +for(k in 1:length(A)){ +for(i in A[[k]]){ +error_neu[k]<-error_neu[k] + ((b[as.integer(i[1]), as.integer(i[2])]) / sum(dataset[,as.integer(i[2])])) +} +if(set_is_empty(A[[k]])){error_neu[k]<-NA} +if(set_is_empty(A[[k]]) == FALSE) {error_neu[k]<-error_neu[k] / length(A[[k]])} +} + +#computation of diff values +for(k in 1:length(A)){ +if(set_is_empty(A[[k]])){diff_value_neu[k]<-NA} +else{ +for(i in A[[length(A)]]){ +if(is.element(set(i), A[[k]])) {bs_neu[[k]][as.integer(i[1]),as.integer(i[2])]<-error_neu[k] * sum(dataset[,as.integer(i[2])])} +if(is.element(set(i), A[[k]]) == FALSE && is.element(set(tuple(as.integer(i[2]),as.integer(i[1]))), A[[k]]) == FALSE){bs_neu[[k]][as.integer(i[1]),as.integer(i[2])]<-(1- sum(dataset[,as.integer(i[1])]) / n) * sum(dataset[,as.integer(i[2])])} +if(is.element(set(i), A[[k]]) == FALSE && is.element(set(tuple(as.integer(i[2]),as.integer(i[1]))), A[[k]]) == TRUE){bs_neu[[k]][as.integer(i[1]),as.integer(i[2])]<-sum(dataset[,as.integer(i[2])]) - sum(dataset[,as.integer(i[1])]) + sum(dataset[,as.integer(i[1])]) * error_neu[k]} +} +diff_value_neu[k]<-sum((b - bs_neu[[k]])^2) / (m^2 - m) +} +} +return(diff_value_neu) +} \ No newline at end of file diff --git a/R/hasse.r b/R/hasse.r new file mode 100644 index 0000000..44fe5f8 --- /dev/null +++ b/R/hasse.r @@ -0,0 +1,44 @@ +############ +# Plotting # +############ + +################################################################ +# # +# This function plots the Hasse diagram of a surmise relation. # +# # +################################################################ + +hasse<-function(imp, items){ +struct<-relation(domain = list(1:items,1:items),graph = imp) + +#computation of parallel items +parallel<-list() +k<-1 +for(i in 1:items){ +for(j in i:items){ +if(relation_incidence(struct)[i,j] ==1 && relation_incidence(struct)[j,i] ==1){ +parallel[[k]]<-tuple(i,j) +k<-k+1 +} +} +} + +#collapsing of parallel items +if(length(parallel) > 0){ +pardrop<-vector(length = length(parallel)) +for(i in 1:length(parallel)){ +pardrop[i]<-as.integer(parallel[[i]][2]) +} +pardrop<-pardrop[!duplicated(pardrop)] + +nparitems<-1:items +nparitems<-nparitems[-pardrop] +struct<-relation(domain = list(nparitems, nparitems), incidence = relation_incidence(struct)[-pardrop, -pardrop]) +} + +#plotting +plot(struct) + +#returning a list of parallel items +return(parallel) +} \ No newline at end of file diff --git a/R/iita.r b/R/iita.r new file mode 100644 index 0000000..6d05174 --- /dev/null +++ b/R/iita.r @@ -0,0 +1,44 @@ +########################################### +# Inductive item tree analysis algorithms # +########################################### + +############################################# +# # +# This function can be used to perform one # +# of the three inductive item tree analysis # +# algorithms (original, corrected, and # +# minimized corrected) selectively. # +# # +############################################# + +iita<-function(dataset, v){ +if ((!is.data.frame(dataset) & !is.matrix(dataset)) || ncol(dataset) == 1){ +stop("data must be either a numeric matrix or a data.frame, with at least two columns.\n") +} + +if(sum(!(dataset == 0 | dataset == 1) | is.na(dataset)) != 0){ +stop("data must contain only 0 and 1") +} + +if(v != 1 && v != 2 && v !=3){ +stop("IITA version must be specified") +} + +# call to the chosen algorithm +if(v == 3){ +i<-ind_gen(ob_counter(dataset)) +ii<-orig_iita(dataset, ind_gen(ob_counter(dataset))) +} + +if(v == 2){ +i<-ind_gen(ob_counter(dataset)) +ii<-corr_iita(dataset, ind_gen(ob_counter(dataset))) +} + +if(v == 1){ +i<-ind_gen(ob_counter(dataset)) +ii<-mini_iita(dataset, ind_gen(ob_counter(dataset))) +} + +return(list(diff = ii, implications = i[which(ii == min(ii))][[1]])) +} diff --git a/R/imp2state.r b/R/imp2state.r new file mode 100644 index 0000000..65bc942 --- /dev/null +++ b/R/imp2state.r @@ -0,0 +1,78 @@ +################## +# tranformations # +################## + +################################################## +# # +# This function transforms a set of implications # +# to the corresponding set of knowledge states. # +# # +################################################## + +imp2state<-function(imp, items){ +R_2<-matrix(1, ncol = items, nrow = items) +for(i in 1:items){ +for(j in 1:items){ +if(!is.element(set(tuple(i,j)), imp) && i != j){R_2[j,i]<-0} +} +} + +#Base +base<-list() + +for(i in 1:items){ +tmp<-vector() +for(j in 1:items){ +if(R_2[i,j] == 1){tmp<-c(tmp, j)} +} +base[[i]]<-sort(tmp) +} + +base_list<-list() +for(i in 1:items){ +base_list[[i]]<-set() +for(j in 1:length(base[[i]])) +base_list[[i]]<-set_union(base_list[[i]], set(base[[i]][j])) +} + +#span of base +G<-list() +G[[1]]<-set(set()) +G[[2]]<-set() +for(i in 1:length(base[[1]])){G[[2]]<-set_union(G[[2]], base[[1]][i])} +G[[2]]<-set(set(), G[[2]]) + +for(i in 2:items){ +H<-set(set()) +for(j in G[[i]]){ +check<-0 +if(set_is_subset(base_list[[i]], j) == FALSE){ +for(d in 1:i){ +if(set_is_subset(base_list[[d]], set(j, base_list[[i]])) == TRUE){ +if(set_is_subset(base_list[[d]], j)){ +H<-set_union(H,set(set_union(j,base_list[[i]]))) +} +} +if(set_is_subset(base_list[[d]], set(j, base_list[[i]])) == FALSE){ +H<-set_union(H,set(set_union(j,base_list[[i]]))) +} +} +} +} +G[[i+1]]<-set_union(G[[i]], H) +} + +#Patterns + +P<-matrix(0, ncol = items, nrow = length(G[[items+1]])) +i<-1 + +for(k in (G[[items+1]])){ +for(j in 1:items){ +if(is.element(j, k)){P[i,j]<-1} +} +i<-i+1 +} + +return(P) +} \ No newline at end of file diff --git a/R/ind_gen.r b/R/ind_gen.r new file mode 100644 index 0000000..27a5961 --- /dev/null +++ b/R/ind_gen.r @@ -0,0 +1,71 @@ +######################## +# inductive generation # +######################## + +####################################### +# # +# This function generates inductively # +# a set of competing quasi orders. # +# # +####################################### + +ind_gen<-function(b){ +m<-nrow(b) +#set of all pairs with a maximum of k-1 counterexamples +S<-list() + +#constructed relation for a maximum of k-1 counterexamples +A<-list() + +#set of non-transitive triples +M<-list() +M[[1]]<-set() +S[[1]]<-set() +for(i in 1:m){ +for(j in 1:m){ +if(b[i,j] == min(b) & i != j) {S[[1]]<-set_union(S[[1]],set(tuple(i,j)))} +} +} + +A[[1]]<-S[[1]] + +#inductive gneration process +elements<-sort(b)[!duplicated(sort(b))] +if(is.element(0,elements)){elements<-elements[2:(length(elements))]} + +k<-2 + +for(elem in elements){ +S[[k]]<-set() +A[[k]]<-set() +M[[k]]<-set() +#building of S +for(i in 1:m){ +for(j in 1:m){ +if(b[i,j] <= elem && i !=j && is.element(set(tuple(i,j)), A[[k-1]]) == FALSE){S[[k]]<-set_union(S[[k]], set(tuple(i,j)))} +} +} +#transitivity test +if(set_is_empty(S[[k]]) == FALSE){ +M[[k]]<-S[[k]] +brake_test<-1 +while(brake_test != 0){ +brake<-M[[k]] +for(i in M[[k]]){ +for(h in 1:m){ +if(h != as.integer(i)[1] && h != as.integer(i)[2] && is.element(set(tuple(as.integer(i)[2],h)), set_union(A[[k-1]], M[[k]])) == TRUE && is.element(set(tuple(as.integer(i)[1],h)), set_union(A[[k-1]], M[[k]])) == FALSE){M[[k]]<-M[[k]] - set(i)} +if(h != as.integer(i)[1] && h != as.integer(i)[2] && is.element(set(tuple(h,as.integer(i)[1])), set_union(A[[k-1]], M[[k]])) == TRUE && is.element(set(tuple(h,as.integer(i)[2])), set_union(A[[k-1]], M[[k]])) == FALSE){M[[k]]<-M[[k]] - set(i)} +} +} +if(brake == M[[k]]){brake_test<-0} +} +A[[k]]<-set_union(A[[k-1]], (M[[k]])) +} +k<-k+1 +} + +#deletion of empty and duplicated quasi orders +A<-A[(!duplicated(A))] +A<-A[!set_is_empty(A)] +return(A) +} \ No newline at end of file diff --git a/R/mini_iita.r b/R/mini_iita.r new file mode 100644 index 0000000..aff32f8 --- /dev/null +++ b/R/mini_iita.r @@ -0,0 +1,61 @@ +############################ +# minimized corrected IITA # +############################ + +################################################## +# # +# This function performs the minimized corrected # +# inductive item tree analysis procedure and # +# returns the corresponding diff values. # +# # +################################################## + +mini_iita<-function(dataset, A){ +b<-ob_counter(dataset) +m<-ncol(dataset) +n<-nrow(dataset) + +bs_num<-list() +for(i in 1:length(A)){ +bs_num[[i]]<-matrix(0,ncol = m, nrow = m) +} + +p<-rep(0,m) +for(i in 1:m){p[i]<-sum(dataset[,i])} + +error_num<-rep(0,length(A)) +diff_value_num<-rep(0,length(A)) + +#computation of error rate +for(k in 1:length(A)){ +x<-rep(0,4) +for(i in 1:m){ +for(j in 1:m){ +if(is.element(set(tuple(i,j)), A[[k]]) == TRUE && i != j){ +x[2]<-x[2]-2*b[i,j] * p[j] +x[4]<-x[4]+2 * p[j]^2 +} +if(is.element(set(tuple(i,j)), A[[k]]) == FALSE && is.element(set(tuple(j,i)), A[[k]]) == TRUE && i != j){ +x[1]<-x[1]-2*b[i,j]*p[i] + 2 * p[i] * p[j] - 2 * p[i]^2 +x[3]<-x[3]+2*p[i]^2 +} +} +} +error_num[k]<- -(x[1] + x[2]) / (x[3] + x[4]) +} + +#computation of diff values +for(k in 1:length(A)){ +if(set_is_empty(A[[k]])){diff_value_num[k]<-NA} +else{ +for(i in A[[length(A)]]){ +if(is.element(set(i), A[[k]])) {bs_num[[k]][as.integer(i[1]),as.integer(i[2])]<-error_num[k] * sum(dataset[,as.integer(i[2])])} +if(is.element(set(i), A[[k]]) == FALSE && is.element(set(tuple(as.integer(i[2]),as.integer(i[1]))), A[[k]]) == FALSE){bs_num[[k]][as.integer(i[1]),as.integer(i[2])]<-(1- sum(dataset[,as.integer(i[1])]) / n) * sum(dataset[,as.integer(i[2])])} +if(is.element(set(i), A[[k]]) == FALSE && is.element(set(tuple(as.integer(i[2]),as.integer(i[1]))), A[[k]]) == TRUE){bs_num[[k]][as.integer(i[1]),as.integer(i[2])]<-sum(dataset[,as.integer(i[2])]) - sum(dataset[,as.integer(i[1])]) + sum(dataset[,as.integer(i[1])]) * error_num[k]} +} +diff_value_num[k]<-sum((b - bs_num[[k]])^2) / (m^2 - m) +} +} + +return(diff_value_num) +} \ No newline at end of file diff --git a/R/ob_counter.r b/R/ob_counter.r new file mode 100644 index 0000000..58b7f11 --- /dev/null +++ b/R/ob_counter.r @@ -0,0 +1,23 @@ +############################ +# observed counterexamples # +############################ + +######################################### +# # +# This function computes from a dataset # +# for all item pairs the corresponding # +# numbers of counterexamples. # +# # +######################################### + +ob_counter<-function(dataset){ +m<-ncol(dataset) +n<-nrow(dataset) +b<-matrix(0,ncol = m, nrow = m) +for(i in 1:m){ +for(j in 1:m){ +if(i != j) b[i,j]<-sum(dataset[,i] == 0 & dataset[,j] == 1) +} +} +return(b) +} \ No newline at end of file diff --git a/R/orig_iita.r b/R/orig_iita.r new file mode 100644 index 0000000..443125e --- /dev/null +++ b/R/orig_iita.r @@ -0,0 +1,49 @@ +################# +# original IITA # +################# + +############################################## +# # +# This function performs the original # +# inductive item tree analysis procedure # +# and returns the corresponding diff values. # +# # +############################################## + +orig_iita<-function(dataset, A){ +b<-ob_counter(dataset) +m<-ncol(dataset) +n<-nrow(dataset) + +bs<-list() +for(i in 1:length(A)){ +bs[[i]]<-matrix(0,ncol = ncol(b), nrow = nrow(b)) +} +diff_value_alt<-rep(0,length(A)) + +error<-rep(0,length(A)) + +#computation of error rate +for(k in 1:length(A)){ +for(i in A[[k]]){ +error[k]<-error[k] + ((b[as.integer(i[1]), as.integer(i[2])]) / sum(dataset[,as.integer(i[2])])) +} +if(set_is_empty(A[[k]])){error[k]<-NA} +if(set_is_empty(A[[k]]) == FALSE) {error[k]<-error[k] / length(A[[k]])} +} + +#computation of diff values +for(k in 1:length(A)){ +if(set_is_empty(A[[k]])){diff_value_alt[k]<-NA} +else{ +for(i in A[[length(A)]]){ +if(is.element(set(i), A[[k]])) {bs[[k]][as.integer(i[1]), as.integer(i[2])]<-error[k] * sum(dataset[,as.integer(i[2])])} +else{bs[[k]][as.integer(i[1]), as.integer(i[2])]<-(1- sum(dataset[,as.integer(i[1])]) / n) * sum(dataset[,as.integer(i[2])]) * (1-error[k]) +} +} +diff_value_alt[k]<-sum((b - bs[[k]])^2) / (m^2 - m) +} +} + +return(diff_value_alt) +} \ No newline at end of file diff --git a/R/pattern.r b/R/pattern.r new file mode 100644 index 0000000..fa225c2 --- /dev/null +++ b/R/pattern.r @@ -0,0 +1,29 @@ +##################### +# pattern frequency # +##################### + +############################################ +# # +# This function computes the absolute # +# frequencies of the response patterns, # +# and optionally, the absolute frequencies # +# of a collection of specified knowledge # +# states in a dataset. # +# # +############################################ + +pattern<-function(dataset, n = 5, P = NULL){ + +pattern<-sort(table(apply(dataset,1, function(x) paste(x, collapse = ""))), decreasing = TRUE) + +if(n < 1) stop("Number of patterns must be greater than zero.\n") +if(n > length(pattern)) n = length(pattern) + +if(is.null(P)){ +return(list(response.pattern = pattern[1:n], states = P)) +}else{ +states<-cbind(P, 0) +states[,ncol(states)] <- sapply(apply(P, 1, function(x) pattern[names(pattern) == paste(x, collapse = "")]), function(y) max(0, y)) +return(list(response.pattern = pattern[1:n],states = states)) +} +} \ No newline at end of file diff --git a/R/pop_iita.r b/R/pop_iita.r new file mode 100644 index 0000000..db164d7 --- /dev/null +++ b/R/pop_iita.r @@ -0,0 +1,264 @@ +##################### +# Population values # +##################### + +###################################################### +# # +# This function can be used to perform one of # +# the three inductive item tree analysis algorithms # +# in population quantities selectively. # +# # +###################################################### + +pop_iita<-function(imp, ce, lg, items, dataset = NULL, v){ + +if(v != 1 && v != 2 && v !=3){ +stop("IITA version must be specified") +} + +p_pop<-vector(length = items) +b_pop<-matrix(0,ncol = items, nrow = items) +bs_pop_alt<-matrix(0,ncol = items, nrow = items) +bs_pop_neu<-matrix(0,ncol = items, nrow = items) +bs_pop_num<-matrix(0,ncol = items, nrow = items) +error_pop_theo<-matrix(ncol = items, nrow = items) + +#Population matrix +pop_matrix<-matrix(0,nrow = 2^items, ncol = items+1) +for(i in 1:items){ +pop_matrix[,i]<-c(rep(0, 2^(items-i)), rep(1, 2^(items-i))) +} + +#Probabilities of all patterns + +R_2<-matrix(1, ncol = items, nrow = items) +for(i in 1:items){ +for(j in 1:items){ +if(!is.element(set(tuple(i,j)), imp) && i != j){R_2[j,i]<-0} +} +} + +#Base +base<-list() + +for(i in 1:items){ +tmp<-vector() +for(j in 1:items){ +if(R_2[i,j] == 1){tmp<-c(tmp, j)} +} +base[[i]]<-sort(tmp) +} + +base_list<-list() +for(i in 1:items){ +base_list[[i]]<-set() +for(j in 1:length(base[[i]])) +base_list[[i]]<-set_union(base_list[[i]], set(base[[i]][j])) +} + +#span of base +G<-list() +G[[1]]<-set(set()) +G[[2]]<-set() +for(i in 1:length(base[[1]])){G[[2]]<-set_union(G[[2]], base[[1]][i])} +G[[2]]<-set(set(), G[[2]]) + +for(i in 2:items){ +H<-set(set()) +for(j in G[[i]]){ +check<-0 +if(set_is_subset(base_list[[i]], j) == FALSE){ +for(d in 1:i){ +if(set_is_subset(base_list[[d]], set(j, base_list[[i]])) == TRUE){ +if(set_is_subset(base_list[[d]], j)){ +H<-set_union(H,set(set_union(j,base_list[[i]]))) +} +} +if(set_is_subset(base_list[[d]], set(j, base_list[[i]])) == FALSE){ +H<-set_union(H,set(set_union(j,base_list[[i]]))) +} +} +} +} +G[[i+1]]<-set_union(G[[i]], H) +} + +#Patterns + +P<-matrix(0, ncol = items, nrow = length(G[[items+1]])) +i<-1 + +for(k in (G[[items+1]])){ +for(j in 1:items){ +if(is.element(j, k)){P[i,j]<-1} +} +i<-i+1 +} + +#computation of population matrix +r_pop<-matrix(1, ncol = length(G[[items+1]]), nrow = 2^items) + +for(i in 1:2^items){ +for(j in 1:(length(G[[items+1]]))){ +for(k in 1:items){ +if(pop_matrix[i,k] == 0 && P[j,k] == 1){r_pop[i,j]<-r_pop[i,j] * ce} +if(pop_matrix[i,k] == 1 && P[j,k] == 1){r_pop[i,j]<-r_pop[i,j] * (1-ce)} +if(pop_matrix[i,k] == 1 && P[j,k] == 0){r_pop[i,j]<-r_pop[i,j] * lg} +if(pop_matrix[i,k] == 0 && P[j,k] == 0){r_pop[i,j]<-r_pop[i,j] * (1-lg)} +} +pop_matrix[i,items+1]<-pop_matrix[i,items+1] + r_pop[i,j]/length(G[[items+1]]) +} +} + +#item probabilities and b_ij +for(i in 1:items){ +p_pop[i]<-sum(pop_matrix[pop_matrix[,i] == 1,items+1 ]) +for(j in 1:items){ +tmp1<-which(pop_matrix[,i] == 0) +tmp2<-which(pop_matrix[,j] == 1) +if(i != j){b_pop[i,j]<-sum(pop_matrix[c(tmp1,tmp2)[duplicated(c(tmp1, tmp2))] ,items+1])} +} +} + +if(is.null(dataset)){ +#inductive generation process on population values +S<-list() +A<-list() +M<-list() + +S[[1]]<-set() +A[[1]]<-set() +M[[1]]<-set() + +elements<-sort(b_pop)[!duplicated(sort(b_pop))] +elements<-elements[2:(length(elements))] + +k<-2 + +for(elem in elements){ +S[[k]]<-set() +A[[k]]<-set() +M[[k]]<-set() +for(i in 1:items){ +for(j in 1:items){ +if(b_pop[i,j] <= elem && i !=j && is.element(set(tuple(i,j)), A[[k-1]]) == FALSE){S[[k]]<-set_union(S[[k]], set(tuple(i,j)))} +} +} +#transitivity test +if(set_is_empty(S[[k]])){A[[k]]<-A[[k-1]]} +if(set_is_empty(S[[k]]) == FALSE){ +M[[k]]<-S[[k]] +brake_test<-1 +while(brake_test != 0){ +brake<-M[[k]] +for(i in M[[k]]){ +for(h in 1:items){ +if(h != as.integer(i)[1] && h != as.integer(i)[2] && is.element(set(tuple(as.integer(i)[2],h)), set_union(A[[k-1]], M[[k]])) == TRUE && is.element(set(tuple(as.integer(i)[1],h)), set_union(A[[k-1]], M[[k]])) == FALSE){M[[k]]<-set_intersection(M[[k]], set_symdiff(M[[k]],set(i)))} +if(h != as.integer(i)[1] && h != as.integer(i)[2] && is.element(set(tuple(h,as.integer(i)[1])), set_union(A[[k-1]], M[[k]])) == TRUE && is.element(set(tuple(h,as.integer(i)[2])), set_union(A[[k-1]], M[[k]])) == FALSE){M[[k]]<-set_intersection(M[[k]], set_symdiff(M[[k]],set(i)))} +} +} +if(brake == M[[k]]){brake_test<-0} +} +A[[k]]<-set_union(A[[k-1]], (M[[k]])) +} +k<-k+1 +} + +#deletion of empty and duplicated quasi orders +A<-A[(!duplicated(A))] +A<-A[!set_is_empty(A)] +}else{ +A<-ind_gen(ob_counter(dataset)) +} + +error_pop<-vector(length = (length(A))) +error_pop_num<-vector(length = (length(A))) + +#Gamma_ij +for(i in 1:items){ +error_pop_theo[,i]<-b_pop[,i] / p_pop[i] +} + +if(v == 3 | v == 2){ +#Gamma_L +for(k in 1:length(A)){ +for(i in A[[k]]){ +error_pop[k]<-error_pop[k] + ((b_pop[as.integer(i[1]), as.integer(i[2])]) / sum(p_pop[as.integer(i[2])])) +} +if(set_is_empty(A[[k]])){error_pop[k]<-NA} +if(set_is_empty(A[[k]]) == FALSE) {error_pop[k]<-error_pop[k] / length(A[[k]])} +} +} + +if(v == 1){ +#Gamma_min +for(k in 1:length(A)){ +x<-rep(0,4) +for(i in 1:items){ +for(j in 1:items){ +if(is.element(set(tuple(i,j)), A[[k]]) == TRUE && i != j){ +x[2]<-x[2]-2*b_pop[i,j] * p_pop[j] +x[4]<-x[4]+2 * p_pop[j]^2 +} +if(is.element(set(tuple(i,j)), A[[k]]) == FALSE && is.element(set(tuple(j,i)), A[[k]]) == TRUE && i != j){ +x[1]<-x[1]-2*b_pop[i,j]*p_pop[i] + 2 * p_pop[i] * p_pop[j] - 2 * p_pop[i]^2 +x[3]<-x[3]+2*p_pop[i]^2 +} +} +} +error_pop_num[k]<- -(x[1] + x[2]) / (x[3] + x[4]) +} +} + +#bs_ij and diff + +if(v == 3){ +#original +diff_value_pop_alt<-vector(length = length(A)) +for(k in 1:length(A)){ +for(i in 1:items){ +for(j in 1:items){ +if(is.element(set(tuple(i,j)), A[[k]]) == TRUE && i != j){bs_pop_alt[i,j]<-error_pop[k] * p_pop[j]} +if(is.element(set(tuple(i,j)), A[[k]]) == FALSE && i != j){bs_pop_alt[i,j]<-(1-p_pop[i]) * p_pop[j] * (1-error_pop[k])} +} +} +if(set_is_empty(A[[k]])){diff_value_pop_alt[k]<-NA} +if(set_is_empty(A[[k]]) == FALSE) {diff_value_pop_alt[k]<-sum((b_pop - bs_pop_alt)^2) / (items^2 - items)} +} +return(list(pop.diff = diff_value_pop_alt, pop.matrix = pop_matrix, error.pop = error_pop, selection.set = A)) +} + +if(v == 2){ +#corrected +diff_value_pop_neu<-vector(length = length(A)) +for(k in 1:length(A)){ +for(i in 1:items){ +for(j in 1:items){ +if(is.element(set(tuple(i,j)), A[[k]]) == TRUE && i != j){bs_pop_neu[i,j]<-error_pop[k] * p_pop[j]} +if(is.element(set(tuple(i,j)), A[[k]]) == FALSE && is.element(set(tuple(j,i)), A[[k]]) == FALSE && i != j){bs_pop_neu[i,j]<-(1-p_pop[i]) * p_pop[j]} +if(is.element(set(tuple(i,j)), A[[k]]) == FALSE && is.element(set(tuple(j,i)), A[[k]]) == TRUE && i != j){bs_pop_neu[i,j]<-p_pop[j] - p_pop[i] + p_pop[i] * error_pop[k]} +} +} +if(set_is_empty(A[[k]])){diff_value_pop_neu[k]<-NA} +if(set_is_empty(A[[k]]) == FALSE) {diff_value_pop_neu[k]<-sum((b_pop - bs_pop_neu)^2) / (items^2 - items)} +} +return(list(pop.diff = diff_value_pop_neu, pop.matrix = pop_matrix, error.pop = error_pop, selection.set = A)) +} + +if(v == 1){ +#minimized corrected +diff_value_pop_num<-vector(length = length(A)) +for(k in 1:length(A)){ +for(i in 1:items){ +for(j in 1:items){ +if(is.element(set(tuple(i,j)), A[[k]]) == TRUE && i != j){bs_pop_num[i,j]<-error_pop_num[k] * p_pop[j]} +if(is.element(set(tuple(i,j)), A[[k]]) == FALSE && is.element(set(tuple(j,i)), A[[k]]) == FALSE && i != j){bs_pop_num[i,j]<-(1-p_pop[i]) * p_pop[j]} +if(is.element(set(tuple(i,j)), A[[k]]) == FALSE && is.element(set(tuple(j,i)), A[[k]]) == TRUE && i != j){bs_pop_num[i,j]<-p_pop[j] - p_pop[i] + p_pop[i] * error_pop_num[k]} +} +} +if(set_is_empty(A[[k]])){diff_value_pop_num[k]<-NA} +if(set_is_empty(A[[k]]) == FALSE) {diff_value_pop_num[k]<-sum((b_pop - bs_pop_num)^2) / (items^2 - items)} +} +return(list(pop.diff = diff_value_pop_num, pop.matrix = pop_matrix, error.pop = error_pop_num, selection.set = A)) +} +} \ No newline at end of file diff --git a/R/pop_variance.r b/R/pop_variance.r new file mode 100644 index 0000000..885d433 --- /dev/null +++ b/R/pop_variance.r @@ -0,0 +1,175 @@ +####################### +# population variance # +####################### + +######################################################## +# # +# This function computes the population asymptotic # +# variances of the maximum likelihood estimators diff, # +# assuming a multinomial probability distribution on # +# the set of all response patterns. # +# # +######################################################## + +pop_variance<-function(pop_matrix, imp, error_pop, v){ +if(length(imp) == 0){ +stop("Number of implications must be greater than zero.\n") +} + +if(v != 1 && v != 2){ +stop("IITA version must be specified") +} + +items<-ncol(pop_matrix)-1 + +#expected fisher information +exp_fish<-matrix(0, ncol = 2^items -1, nrow = 2^items -1) + +for(i in 2:2^items){ +for(j in 2:2^items){ +if(i == j){exp_fish[i-1,j-1]<-pop_matrix[i,items+1] * (1-pop_matrix[i,items+1])} +if(i != j){exp_fish[i-1,j-1]<- (-1) * pop_matrix[i,items+1] * pop_matrix[j,items+1]} +} +} + +#Sum of the rho, to avoid computaion every single time +rho_sum<-vector(length = items) +for(i in 1:items){ +rho_sum[i]<-sum(pop_matrix[which(pop_matrix[,i] == 1),items+1]) +} + +rho_sum_counter<-matrix(0, ncol = items, nrow = items) +for(i in 1:items){ +for(j in 1:items){ +if(i !=j){rho_sum_counter[i,j]<-sum(pop_matrix[which(pop_matrix[,i] == 0 & pop_matrix[,j] == 1), items+1])} +} +} + +#gamma derivative + +#corrected and original + +if(v == 2){ +gamma_deriv<-rep(0,2^items -1) +for(i in 2:2^items){ +for(j in imp){ +if(pop_matrix[i,as.integer(j[1])] == 0 && pop_matrix[i,as.integer(j[2])] == 1){ +gamma_deriv[i-1]<-gamma_deriv[i-1] + (rho_sum[as.integer(j[2])] - rho_sum_counter[as.integer(j[1]), as.integer(j[2])])/(rho_sum[as.integer(j[2])]^2) +}else{ +if(pop_matrix[i,as.integer(j[2])] ==1) +gamma_deriv[i-1]<-gamma_deriv[i-1] + rho_sum_counter[as.integer(j[1]), as.integer(j[2])]/(rho_sum[as.integer(j[2])]^2) +} +} +} + +gamma_deriv<-gamma_deriv/length(imp) +} + +#minimized corrected +if(v == 1){ +gamma_deriv<-rep(0, 2^items -1) +x<-rep(0,4) + +for(k in 1:items){ +for(h in 1:items){ +if(is.element(set(tuple(k,h)), imp) == FALSE && is.element(set(tuple(h,k)), imp) && k !=h){ +x[1]<- x[1] + (-2) * rho_sum_counter[k,h] * rho_sum[k] + 2 * rho_sum[k] * rho_sum[h] - 2 * (rho_sum[k])^2 +x[3]<- x[3] + 2 * (rho_sum[k])^2 +} +if(is.element(set(tuple(k,h)), imp) && k !=h){ +x[2]<- x[2] + (-2) * rho_sum_counter[k,h] * rho_sum[h] +x[4]<- x[4] + 2 * (rho_sum[h])^2 +} +} +} + +tmp1<-0 +tmp2<-0 +for(i in 2:2^items){ +for(k in 1:items){ +for(h in 1:items){ +if(is.element(set(tuple(k,h)), imp) == FALSE && is.element(set(tuple(h,k)), imp) && k !=h){ +if(pop_matrix[i,k] == 0 && pop_matrix[i,h] == 1){ +tmp1<-tmp1 + (-2) * rho_sum[k] +} +if(pop_matrix[i,k] == 1){ +tmp1<-tmp1 + (-2) * rho_sum_counter[k,h] + 2 * rho_sum[h] + (-4) * rho_sum[k] +tmp2<-tmp2 + 4 * rho_sum[k] +} +if(pop_matrix[i,h] == 1){ +tmp1<-tmp1 + 2 * rho_sum[k] +} +} +if(is.element(set(tuple(h,k)), imp) && k !=h){ +if(pop_matrix[i,k] == 0 && pop_matrix[i,h] == 1){ +tmp1<-tmp1 + (-2) * rho_sum[h] +} +if(pop_matrix[i,h] == 1){ +tmp1<-tmp1 + (-2) * rho_sum_counter[k,h] +tmp2<-tmp2 + 4 * (rho_sum[h]) +} +} +} +} +gamma_deriv[i-1]<- (-1) * (tmp1 * (x[3] + x[4]) - tmp2 * (x[1] + x[2])) / ((x[3] + x[4])^2 ) +} +} + + +#gradient of diff for corrected and minimized corrected +grad<-rep(0, 2^items -1) +for(i in 2:2^items){ +for(k in 1:items){ +for(h in 1:items){ +if(is.element(set(tuple(k,h)), imp) && k !=h){ +if(pop_matrix[i,k] == 0 && pop_matrix[i,h] == 1){ +grad[i-1]<-grad[i-1] + 2 * (rho_sum_counter[k,h] - rho_sum[h] * error_pop) * (1-error_pop - (rho_sum[h])*gamma_deriv[i-1]) +}else{ +if(pop_matrix[i, h] == 1){ +grad[i-1]<-grad[i-1] + 2 * (rho_sum_counter[k,h] - rho_sum[h] * error_pop) * (-error_pop - rho_sum[h] * gamma_deriv[i-1] ) +}else{ +grad[i-1]<-grad[i-1] + 2 * (rho_sum_counter[k,h] - rho_sum[h] * error_pop) * (-rho_sum[h] * gamma_deriv[i-1]) +} +} +} +if(is.element(set(tuple(k,h)), imp) == FALSE && is.element(set(tuple(h,k)), imp) && k !=h){ +if(pop_matrix[i,k] == 0 && pop_matrix[i,h] == 1){ +grad[i-1]<-grad[i-1] + 2 * (rho_sum_counter[k,h] - rho_sum[h] + rho_sum[k] - rho_sum[k] * error_pop) * (-rho_sum[k] * gamma_deriv[i-1]) +}else{ +if(pop_matrix[i,h] == 1){ +if(pop_matrix[i,k] == 1){ +grad[i-1]<-grad[i-1] + 2 * (rho_sum_counter[k,h] - rho_sum[h] + rho_sum[k] - rho_sum[k] * error_pop) * (-1 + 1 - error_pop - rho_sum[k] * gamma_deriv[i-1]) +}else{ +grad[i-1]<-grad[i-1] + 2 * (rho_sum_counter[k,h] - rho_sum[h] + rho_sum[k] - rho_sum[k] * error_pop) * (-1 - rho_sum[k] * gamma_deriv[i-1]) +} +}else{ +if(pop_matrix[i,k] == 1){ +grad[i-1]<-grad[i-1] + 2 * (rho_sum_counter[k,h] - rho_sum[h] + rho_sum[k] - rho_sum[k] * error_pop) * (1 - error_pop - rho_sum[k] * gamma_deriv[i-1]) +}else{ +grad[i-1]<-grad[i-1] + 2 * (rho_sum_counter[k,h] - rho_sum[h] + rho_sum[k] - rho_sum[k] * error_pop) * (-rho_sum[k] * gamma_deriv[i-1]) +} +} +} +} +if(is.element(set(tuple(k,h)), imp) == FALSE && is.element(set(tuple(h,k)), imp) == FALSE && k !=h){ +if(pop_matrix[i,k] == 0 && pop_matrix[i,h] == 1){ +grad[i-1]<-grad[i-1] + 2 * (rho_sum_counter[k,h] - (1-rho_sum[k]) * rho_sum[h]) * rho_sum[k] +}else{ +if(pop_matrix[i,h] == 1){ +if(pop_matrix[i,k] == 1){ +grad[i-1]<-grad[i-1] + 2 * (rho_sum_counter[k,h] - (1-rho_sum[k]) * rho_sum[h]) * (rho_sum[h] - (1 - rho_sum[k])) +}else{ +grad[i-1]<-grad[i-1] + 2 * (rho_sum_counter[k,h] - (1-rho_sum[k]) * rho_sum[h]) * rho_sum[h] +} +} +} +} +} +} +} +grad<-grad / (items * (items-1)) + +#final computation +variance<- grad%*%exp_fish%*%grad +return(variance) +} \ No newline at end of file diff --git a/R/simu.r b/R/simu.r new file mode 100644 index 0000000..f09d724 --- /dev/null +++ b/R/simu.r @@ -0,0 +1,115 @@ +################### +# data simulation # +################### + +######################################################### +# # +# This function simulates a dataset using a basic local # +# independence model. The number of items, the sample # +# size, and two parameters for the careless error and # +# lucky guess probabilities can be set explicitly. The # +# underlying combinatorial structure can either be # +# specified manually or is generated randomly. # +# # +######################################################### + +simu<-function(items, size, ce, lg, imp = NULL, delta){ + +R<-set() + +if(is.null(imp)){ +#computation of transitive relations +for(i in 1:items){ +for(j in 1:items){ +if(i != j && delta > runif(1,0,1)){R<-set_union(R, set(tuple(i,j)))} +if(i == j) {R<-set_union(R, set(tuple(i,j)))} +} +} + +R_2<-relation_incidence(transitive_closure(relation(domain = list(1:items,1:items), graph = R))) + +#Base +base<-list() + +for(i in 1:items){ +tmp<-vector() +for(j in 1:items){ +if(R_2[i,j] == 1){tmp<-c(tmp, j)} +} +base[[i]]<-sort(tmp) +} + +base_list<-list() +for(i in 1:items){ +base_list[[i]]<-set() +for(j in 1:length(base[[i]])) +base_list[[i]]<-set_union(base_list[[i]], set(base[[i]][j])) +} + +#span of base +G<-list() +G[[1]]<-set(set()) +G[[2]]<-set() +for(i in 1:length(base[[1]])){G[[2]]<-set_union(G[[2]], base[[1]][i])} +G[[2]]<-set(set(), G[[2]]) + +for(i in 2:items){ +H<-set(set()) +for(j in G[[i]]){ +check<-0 +if(set_is_subset(base_list[[i]], j) == FALSE){ +for(d in 1:i){ +if(set_is_subset(base_list[[d]], set(j, base_list[[i]])) == TRUE){ +if(set_is_subset(base_list[[d]], j)){ +H<-set_union(H,set(set_union(j,base_list[[i]]))) +} +} +if(set_is_subset(base_list[[d]], set(j, base_list[[i]])) == FALSE){ +H<-set_union(H,set(set_union(j,base_list[[i]]))) +} +} +} +} +G[[i+1]]<-set_union(G[[i]], H) +} + +#Patterns + +P<-matrix(0, ncol = items, nrow = length(G[[items+1]])) +i<-1 + +for(k in (G[[items+1]])){ +for(j in 1:items){ +if(is.element(j, k)){P[i,j]<-1} +} +i<-i+1 +} + +#implications + +imp<-set() +for(i in 1:items){ +for(j in 1:items){ +if(i != j && set_is_subset(base_list[[i]], base_list[[j]])){imp<-set_union(imp,set(tuple(i,j)))} +} +} + +#for specified imp +}else{ +#Patterns +P<-imp2state(imp, items) +} + +#simulating the dataset +sim<-matrix(ncol = items, nrow = size) + +for(i in 1:size){ +sim[i,]<-P[sample(1:nrow(P), 1),] +for(j in 1:items){ +if(sim[i,j] == 1 && runif(1,0,1) < ce) {sim[i,j]<-0} +if(sim[i,j] == 0 && runif(1,0,1) < lg) {sim[i,j]<-1} +} +} + +list(dataset = sim, implications = imp, states = P) +} diff --git a/R/state2imp.r b/R/state2imp.r new file mode 100644 index 0000000..22ab784 --- /dev/null +++ b/R/state2imp.r @@ -0,0 +1,33 @@ +################## +# tranformations # +################## + +#################################################### +# # +# This function transforms a set of knowledge # +# states to the corresponding set of implications. # +# # +#################################################### + +state2imp<-function(P){ + +#Base +base_list<-list() +for(i in 1:ncol(P)){ +base_list[[i]]<-set(i) +tmp<-P[which(P[,i] == 1),] +for(j in (1:ncol(P))[-i]){ +if(length(which(P[,i] == 1)) == 1){if(sum(tmp[j]) == 1){base_list[[i]]<-set_union(base_list[[i]], set(j))}} +if(length(which(P[,i] == 1)) > 1){if(sum(tmp[,j]) == nrow(tmp)){base_list[[i]]<-set_union(base_list[[i]], set(j))}} +} +} + +imp<-set() +for(i in 1:ncol(P)){ +for(j in 1:ncol(P)){ +if(i != j && set_is_subset(base_list[[i]], base_list[[j]])){imp<-set_union(imp,set(tuple(i,j)))} +} +} + +return(imp) +} \ No newline at end of file diff --git a/R/variance.r b/R/variance.r new file mode 100644 index 0000000..5d97ab5 --- /dev/null +++ b/R/variance.r @@ -0,0 +1,228 @@ +################################# +# estimated asymptotic variance # +################################# + +################################################### +# # +# This function computes estimated asymptotic # +# variances of the maximum likelihood estimators # +# diff from data, assuming a multinomial # +# probability distribution on the set of all # +# response patterns. # +# # +################################################### + + +variance<-function(dataset, imp, v){ +if(length(imp) == 0){ +stop("Number of implications must be greater than zero.\n") +} + +if(v != 1 && v != 2){ +stop("IITA version must be specified") +} + +items<-ncol(dataset) + +#Number of times a pattern occurs +patterns<-matrix(0,ncol = ncol(dataset)+1, nrow = 2^ncol(dataset)) +for(j in 1:ncol(patterns)-1){ +patterns[,j]<-c(rep(0,2^(ncol(dataset)-j)), rep(1,2^(ncol(dataset)-j))) +} +dataset_2<-matrix(nrow = nrow(dataset),ncol = 1 ) +for(i in 1:nrow(dataset)){ +dataset_2[i,]<-toString(dataset[i,]) +} +tot_cases<-table(dataset_2) +if(length(tot_cases) == 2^(ncol(patterns)-1)){ +patterns[,ncol(patterns)]<-tot_cases +}else{ +for(i in 1:length(tot_cases)){ +for(j in i:nrow(patterns)){ +if(sum(as.integer(strsplit(dimnames(tot_cases)$dataset_2[i], ", ")[[1]][1:(ncol(patterns)-1)]) == patterns[j,1:(ncol(patterns)-1)]) == ncol(patterns)-1){ +patterns[j,ncol(patterns)]<-tot_cases[[i]] +break +} +} +} +} + +#relative frequencies +rho_sum<-vector(length = items) +for(i in 1:items){ +rho_sum[i]<-sum(patterns[which(patterns[,i] == 1),items+1]) / nrow(dataset) +} + +rho_sum_counter<-matrix(0, ncol = items, nrow = items) +for(i in 1:items){ +for(j in 1:items){ +if(i !=j){rho_sum_counter[i,j]<-sum(patterns[which(patterns[,i] == 0 & patterns[,j] == 1), items+1]) / nrow(dataset)} +} +} + +#expected fisher information + +exp_fish<-matrix(0, ncol = 2^items -1, nrow = 2^items -1) + +for(i in 2:2^items){ +for(j in 2:2^items){ +if(i == j){exp_fish[i-1,j-1]<-(patterns[i,ncol(patterns)] / nrow(dataset)) * (1 - patterns[i,items+1]/nrow(dataset))} +if(i != j){exp_fish[i-1,j-1]<- (-1) * (patterns[i,ncol(patterns)] / nrow(dataset)) * (patterns[j,items+1] / nrow(dataset))} +} +} + +#error + +#original and corrected +error<-0 +if(v == 2){ +for(i in imp){ +error<-error + ((rho_sum_counter[as.integer(i[1]), as.integer(i[2])]) * ncol(dataset) / sum(dataset[,as.integer(i[2])])) +} +error<-error / length(imp) +} + +#minimized corrected +if(v == 1){ +x<-rep(0,4) +for(i in 1:items){ +for(j in 1:items){ +if(is.element(set(tuple(i,j)), imp) == TRUE && i != j){ +x[2]<-x[2]-2*rho_sum_counter[i,j] * rho_sum[j] +x[4]<-x[4]+2 * rho_sum[j]^2 +} +if(is.element(set(tuple(i,j)), imp) == FALSE && is.element(set(tuple(j,i)), imp) == TRUE && i != j){ +x[1]<-x[1]-2*rho_sum_counter[i,j]*rho_sum[i] + 2 * rho_sum[i] * rho_sum[j] - 2 * rho_sum[i]^2 +x[3]<-x[3]+2*rho_sum[i]^2 +} +} +} +error<- -(x[1] + x[2]) / (x[3] + x[4]) +} + +#gamma derivative + +# original and corrected +if(v == 2){ +gamma_deriv<-rep(0,2^items -1) +for(i in 2:2^items){ +for(j in imp){ +if(patterns[i,as.integer(j[1])] == 0 && patterns[i,as.integer(j[2])] == 1){ +gamma_deriv[i-1]<-gamma_deriv[i-1] + (rho_sum[as.integer(j[2])] - rho_sum_counter[as.integer(j[1]), as.integer(j[2])])/(rho_sum[as.integer(j[2])]^2) +}else{ +if(patterns[i,as.integer(j[2])] ==1) +gamma_deriv[i-1]<-gamma_deriv[i-1] + rho_sum_counter[as.integer(j[1]), as.integer(j[2])]/(rho_sum[as.integer(j[2])]^2) +} +} +} + +gamma_deriv<-gamma_deriv/length(imp) +} + +#minimized corrected +if(v == 1){ +gamma_deriv<-rep(0, 2^items -1) +x<-rep(0,4) + +for(k in 1:items){ +for(h in 1:items){ +if(is.element(set(tuple(k,h)), imp) == FALSE && is.element(set(tuple(h,k)), imp) && k !=h){ +x[1]<- x[1] + (-2) * rho_sum_counter[k,h] * rho_sum[k] + 2 * rho_sum[k] * rho_sum[h] - 2 * (rho_sum[k])^2 +x[3]<- x[3] + 2 * (rho_sum[k])^2 +} +if(is.element(set(tuple(k,h)), imp) && k !=h){ +x[2]<- x[2] + (-2) * rho_sum_counter[k,h] * rho_sum[h] +x[4]<- x[4] + 2 * (rho_sum[h])^2 +} +} +} + +tmp1<-0 +tmp2<-0 +for(i in 2:2^items){ +for(k in 1:items){ +for(h in 1:items){ +if(is.element(set(tuple(k,h)), imp) == FALSE && is.element(set(tuple(h,k)), imp) && k !=h){ +if(patterns[i,k] == 0 && patterns[i,h] == 1){ +tmp1<-tmp1 + (-2) * rho_sum[k] +} +if(patterns[i,k] == 1){ +tmp1<-tmp1 + (-2) * rho_sum_counter[k,h] + 2 * rho_sum[h] + (-4) * rho_sum[k] +tmp2<-tmp2 + 4 * rho_sum[k] +} +if(patterns[i,h] == 1){ +tmp1<-tmp1 + 2 * rho_sum[k] +} +} +if(is.element(set(tuple(h,k)), imp) && k !=h){ +if(patterns[i,k] == 0 && patterns[i,h] == 1){ +tmp1<-tmp1 + (-2) * rho_sum[h] +} +if(patterns[i,h] == 1){ +tmp1<-tmp1 + (-2) * rho_sum_counter[k,h] +tmp2<-tmp2 + 4 * (rho_sum[h]) +} +} +} +} +gamma_deriv[i-1]<- (-1) * (tmp1 * (x[3] + x[4]) - tmp2 * (x[1] + x[2])) / ((x[3] + x[4])^2 ) +} +} + +#gradient of diff for corrected and minimized corrected +grad<-rep(0, 2^items -1) +for(i in 2:2^items){ +for(k in 1:items){ +for(h in 1:items){ +if(is.element(set(tuple(k,h)), imp) && k !=h){ +if(patterns[i,k] == 0 && patterns[i,h] == 1){ +grad[i-1]<-grad[i-1] + 2 * (rho_sum_counter[k,h] - rho_sum[h] * error) * (1-error - (rho_sum[h])*gamma_deriv[i-1]) +}else{ +if(patterns[i, h] == 1){ +grad[i-1]<-grad[i-1] + 2 * (rho_sum_counter[k,h] - rho_sum[h] * error) * (-error - rho_sum[h] * gamma_deriv[i-1] ) +}else{ +grad[i-1]<-grad[i-1] + 2 * (rho_sum_counter[k,h] - rho_sum[h] * error) * (-rho_sum[h] * gamma_deriv[i-1]) +} +} +} +if(is.element(set(tuple(k,h)), imp) == FALSE && is.element(set(tuple(h,k)), imp) && k !=h){ +if(patterns[i,k] == 0 && patterns[i,h] == 1){ +grad[i-1]<-grad[i-1] + 2 * (rho_sum_counter[k,h] - rho_sum[h] + rho_sum[k] - rho_sum[k] * error) * (-rho_sum[k] * gamma_deriv[i-1]) +}else{ +if(patterns[i,h] == 1){ +if(patterns[i,k] == 1){ +grad[i-1]<-grad[i-1] + 2 * (rho_sum_counter[k,h] - rho_sum[h] + rho_sum[k] - rho_sum[k] * error) * (-1 + 1 - error - rho_sum[k] * gamma_deriv[i-1]) +}else{ +grad[i-1]<-grad[i-1] + 2 * (rho_sum_counter[k,h] - rho_sum[h] + rho_sum[k] - rho_sum[k] * error) * (-1 - rho_sum[k] * gamma_deriv[i-1]) +} +}else{ +if(patterns[i,k] == 1){ +grad[i-1]<-grad[i-1] + 2 * (rho_sum_counter[k,h] - rho_sum[h] + rho_sum[k] - rho_sum[k] * error) * (1 - error - rho_sum[k] * gamma_deriv[i-1]) +}else{ +grad[i-1]<-grad[i-1] + 2 * (rho_sum_counter[k,h] - rho_sum[h] + rho_sum[k] - rho_sum[k] * error) * (-rho_sum[k] * gamma_deriv[i-1]) +} +} +} +} +if(is.element(set(tuple(k,h)), imp) == FALSE && is.element(set(tuple(h,k)), imp) == FALSE && k !=h){ +if(patterns[i,k] == 0 && patterns[i,h] == 1){ +grad[i-1]<-grad[i-1] + 2 * (rho_sum_counter[k,h] - (1-rho_sum[k]) * rho_sum[h]) * rho_sum[k] +}else{ +if(patterns[i,h] == 1){ +if(patterns[i,k] == 1){ +grad[i-1]<-grad[i-1] + 2 * (rho_sum_counter[k,h] - (1-rho_sum[k]) * rho_sum[h]) * (rho_sum[h] - (1 - rho_sum[k])) +}else{ +grad[i-1]<-grad[i-1] + 2 * (rho_sum_counter[k,h] - (1-rho_sum[k]) * rho_sum[h]) * rho_sum[h] +} +} +} +} +} +} +} +grad<-grad / (items * (items-1)) + +#final computation +variance<- grad%*%exp_fish%*%grad +return(variance) +} \ No newline at end of file diff --git a/data/pisa.rda b/data/pisa.rda new file mode 100644 index 0000000..ca2e534 Binary files /dev/null and b/data/pisa.rda differ diff --git a/man/DAKS-package.Rd b/man/DAKS-package.Rd new file mode 100644 index 0000000..ee6e99c --- /dev/null +++ b/man/DAKS-package.Rd @@ -0,0 +1,82 @@ +\name{DAKS-package} +\alias{DAKS-package} +\concept{knowledge space theory} +\concept{inductive item tree analysis} +\docType{package} +\title{ + Data Analysis and Knowledge Spaces: The R Package DAKS +} +\description{ + The package \pkg{DAKS} implements three inductive item tree analysis + algorithms for deriving quasi orders from binary data, the original, + corrected, and minimized corrected algorithms. It provides + functions for computing population and estimated asymptotic + variances of the \emph{diff} fit measures, and for switching between + test item and knowledge state representations. Other features are a + Hasse diagram drawing device, a data simulation tool based on a + finite mixture latent variable model, and a function for computing + response pattern and knowledge state frequencies. +} +\details{ + \tabular{ll}{ + Package: \tab \pkg{DAKS}\cr + Type: \tab Package\cr + Version: \tab 1.0-0\cr + Date: \tab 2009-02-23\cr + License: \tab \acronym{GPL} (>= 2) + } + Knowledge space theory is a recent psychometric test theory based on + combinatorial mathematical structures (order and lattice theory); + see \cite{Doignon and Falmagne (1999)}. Solvability dependencies + between dichotomous test items play an important role in knowledge + space theory. Utilizing hypothesized dependencies between items, + knowledge space theory has been successfully applied for the + computerized, adaptive assessment and training of knowledge. For + instance, see the \acronym{ALEKS} system, a fully automated math + tutor on the Internet (\url{http://www.aleks.com/}). + + The package \pkg{DAKS} is implemented based on the S3 system. It + comes with a namespace and consists of the following functions (all + functions are external, there are no internal functions): + \code{\link{corr_iita}}, \code{\link{hasse}}, \code{\link{iita}}, + \code{\link{imp2state}}, \code{\link{ind_gen}}, + \code{\link{mini_iita}}, \code{\link{ob_counter}}, + \code{\link{orig_iita}}, \code{\link{pattern}}, + \code{\link{pop_iita}}, \code{\link{pop_variance}}, + \code{\link{simu}}, \code{\link{state2imp}}, and + \code{\link{variance}}. There is an empirical dataset, + \code{\link{pisa}}, accompanying the package \pkg{DAKS}. This + dataset is part of the 2003 Programme for International Student + Assessment (PISA; \url{http://www.pisa.oecd.org/}). +} +\author{ + Anatol Sargin, Ali Uenlue + + Maintainer: Anatol Sargin , + Ali Uenlue +} +\references{ + Doignon, J.-P. and Falmagne, J.-C. (1999) \emph{Knowledge Spaces}. + Berlin, Heidelberg, and New York: Springer-Verlag. + + Sargin, A. and Uenlue, A. (2009) Inductive item tree analysis: + Corrections, improvements, and comparisons. Manuscript under + revision. \url{http://www.math.uni-augsburg.de/~uenlueal/} + + Sargin, A. and Uenlue, A. (2009) \pkg{DAKS}: An \R package for data + analysis in knowledge space theory. Manuscript submitted for + publication. \url{http://www.math.uni-augsburg.de/~uenlueal/} + + Schrepp, M. (1999) On the empirical construction of implications + between bi-valued test items. \emph{Mathematical Social Sciences}, + \bold{38}, 361--375. + + Schrepp, M. (2003) A method for the analysis of hierarchical + dependencies between items of a questionnaire. \emph{Methods of + Psychological Research}, \bold{19}, 43--79. + + Uenlue, A. and Sargin, A. (2008) Maximum likelihood methodology for + \emph{diff} fit measures for quasi orders. Manuscript submitted for + publication. \url{http://www.math.uni-augsburg.de/~uenlueal/} +} +\keyword{package} diff --git a/man/corr_iita.Rd b/man/corr_iita.Rd new file mode 100644 index 0000000..32c2111 --- /dev/null +++ b/man/corr_iita.Rd @@ -0,0 +1,91 @@ +\name{corr_iita} +\alias{corr_iita} +\concept{corrected inductive item tree analysis} +\concept{IITA} +\title{Corrected Inductive Item Tree Analysis} +\description{ + \code{corr_iita} performs the corrected inductive item tree analysis + procedure and returns the corresponding \emph{diff} values. +} +\usage{ +corr_iita(dataset, A) +} +\arguments{ + \item{dataset}{a required data frame or matrix consisting of binary, + \eqn{1} or \eqn{0}, numeric data.} + \item{A}{a required list of competing quasi orders (surmise + relations), for instance obtained from a call to + \code{\link{ind_gen}}.} +} +\details{ + Corrected inductive item tree analysis is a data analysis method for + deriving knowledge structures (more precisely, surmise relations) + from binary data. Details on this procedure can be found in + \code{\link{iita}}. The set of competing quasi orders is passed via + the argument \code{A}, so any selection set of quasi orders can be + used. + + The set of competing quasi orders must be a list of objects of the + class \code{\link[sets]{set}}. These objects (quasi orders) consist + of \eqn{2}-tuples \eqn{(i, j)} of the class + \code{\link[sets]{tuple}}, where a \eqn{2}-tuple \eqn{(i, j)} is + interpreted as `mastering item \eqn{j} implies mastering item + \eqn{i}.' + + The data must contain only ones and zeros, which encode solving or + failing to solve an item, respectively. +} +\value{ + If the arguments \code{dataset} and \code{A} are of required types, + \code{corr_iita} returns a vector of the \emph{diff} values + corresponding to the competing quasi orders in \code{A}. +} +\references{ + Sargin, A. and Uenlue, A. (2009) Inductive item tree analysis: + Corrections, improvements, and comparisons. Manuscript under + revision. \url{http://www.math.uni-augsburg.de/~uenlueal/} + + Sargin, A. and Uenlue, A. (2009) \pkg{DAKS}: An \R package for data + analysis in knowledge space theory. Manuscript submitted for + publication. \url{http://www.math.uni-augsburg.de/~uenlueal/} +} +\author{ + Anatol Sargin \email{anatol.sargin@math.uni-augsburg.de}, + Ali Uenlue \email{ali.uenlue@math.uni-augsburg.de} +} +\note{ + The function \code{\link{iita}} can be used to perform one of the + three inductive item tree analysis procedures (including the + corrected inductive item tree analysis method) selectively. Whereas + for the function \code{corr_iita} a selection set of competing quasi + orders has to be passed via the argument \code{A} manually, + \code{iita} automatically generates a selection set from the data + using the inductive generation procedure implemented in + \code{\link{ind_gen}}. + + The latter approach using \code{\link{iita}} is common so far, in + knowledge space theory, where the inductive data analysis methods + have been utilized for exploratory derivations of surmise relations + from data. The function \code{corr_iita}, on the other hand, can be + used to select among surmise relations for instance obtained from + querying experts or from competing psychological theories. +} +\seealso{ + \code{\link{orig_iita}} for original inductive item tree analysis; + \code{\link{mini_iita}} for minimized corrected inductive item tree + analysis; \code{\link{iita}}, the interface that provides the three + inductive item tree analysis methods under one umbrella; + \code{\link{pop_variance}} for population asymptotic variances of + \emph{diff} coefficients; \code{\link{variance}} for estimated + asymptotic variances of \emph{diff} coefficients; + \code{\link{pop_iita}} for population inductive item tree analysis. + See also \code{\link{DAKS-package}} for general information about + this package. +} +\examples{ +ind <- ind_gen(ob_counter(pisa)) +corr_iita(pisa, ind) +} +\keyword{manip} +\keyword{models} +\keyword{univar} diff --git a/man/hasse.Rd b/man/hasse.Rd new file mode 100644 index 0000000..54422f8 --- /dev/null +++ b/man/hasse.Rd @@ -0,0 +1,60 @@ +\name{hasse} +\alias{hasse} +\concept{Hasse diagram} +\title{ + Hasse Diagram of Surmise Relation +} +\description{ + \code{hasse} plots the Hasse diagram of a surmise relation (more + precisely, of its corresponding quotient set). +} +\usage{ +hasse(imp, items) +} +\arguments{ + \item{imp}{a required object of class \code{\link[sets]{set}} + representing the set of implications, for instance obtained from a + call to \code{\link{iita}}.} + \item{items}{a required numeric giving the number of items of the + domain taken as basis for \code{imp}.} +} +\value{ + If the arguments \code{imp} and \code{items} are of required types, + \code{hasse} produces a plot, and returns a list of the equally + informative items. +} +\references{ + Doignon, J.-P. and Falmagne, J.-C. (1999) \emph{Knowledge Spaces}. + Berlin, Heidelberg, and New York: Springer-Verlag. + + Sargin, A. and Uenlue, A. (2009) \pkg{DAKS}: An \R package for data + analysis in knowledge space theory. Manuscript submitted for + publication. \url{http://www.math.uni-augsburg.de/~uenlueal/} +} +\author{ + Anatol Sargin \email{anatol.sargin@math.uni-augsburg.de}, + Ali Uenlue \email{ali.uenlue@math.uni-augsburg.de} +} +\note{ + The function \code{hasse} is not capable of plotting equally + informative items. This is why equally informative items are + returned in a list. + + A set of implications, an object of the class + \code{\link[sets]{set}}, consists of \eqn{2}-tuples \eqn{(i, j)} of + the class \code{\link[sets]{tuple}}, where a \eqn{2}-tuple + \eqn{(i, j)} is interpreted as `mastering item \eqn{j} implies + mastering item \eqn{i}.' +} +\seealso{ + \code{\link{iita}}, the interface that provides the three + inductive item tree analysis methods under one umbrella. See also + \code{\link{DAKS-package}} for general information about this + package. +} +\examples{ +hasse(iita(pisa, v = 2)$implications, 5) +} +\keyword{graphs} +\keyword{hplot} +\keyword{print} diff --git a/man/iita.Rd b/man/iita.Rd new file mode 100644 index 0000000..5600e53 --- /dev/null +++ b/man/iita.Rd @@ -0,0 +1,134 @@ +\name{iita} +\alias{iita} +\concept{inductive item tree analysis} +\concept{IITA} +\title{ + Inductive Item Tree Analysis +} +\description{ + \code{iita} can be used to perform one of the three inductive item + tree analysis algorithms (original, corrected, and minimized + corrected) selectively. +} +\usage{ +iita(dataset, v) +} +\arguments{ + \item{dataset}{a required data frame or matrix consisting of binary, + \eqn{1} or \eqn{0}, numeric data.} + \item{v}{a required numeric giving the inductive item tree analysis + algorithm to be performed; \code{v = 1} (minimized corrected), + \code{v = 2} (corrected), and \code{v = 3} (original).} +} +\details{ + The three inductive item tree analysis algorithms are exploratory + methods for extracting quasi orders (surmise relations) from data. + In each algorithm, competing binary relations are generated (in the + same way for all three versions), and a fit measure (differing from + version to version) is computed for every relation of the selection + set in order to find the quasi order that fits the data best. In + all three algorithms, the idea is to estimate the numbers of + counterexamples for each quasi order, and to find, over all + competing quasi orders, the minimum value for the discrepancy + between the observed and expected numbers of counterexamples. The + three data analysis methods differ in their choices of estimates for + the expected numbers of counterexamples. (For an item pair + \eqn{(i, j)}, the number of subjects solving item \eqn{j} but + failing to solve item \eqn{i}, is the corresponding number of + counterexamples. Their response patterns contradict the + interpretation of \eqn{(i, j)} as `mastering item \eqn{j} implies + mastering item \eqn{i}.') The algorithms are described in the paper + about the \pkg{DAKS} package by \cite{Sargin and Uenlue (2009)}, and + in the papers by \cite{Sargin and Uenlue (2009)} and + \cite{Uenlue and Sargin (2008)}. + + \code{iita} calls \code{\link{ind_gen}} for constructing the set of + competing quasi orders according to the inductive generation + procedure. Subject to the selected version to be performed, + \code{iita} computes the discrepancies between observed and expected + numbers of counterexamples under each relation, and finds a quasi + order with the minimum discrepancy (\emph{diff}) value. + + A set of implications, an object of the class + \code{\link[sets]{set}}, consists of \eqn{2}-tuples \eqn{(i, j)} of + the class \code{\link[sets]{tuple}}, where a \eqn{2}-tuple + \eqn{(i, j)} is interpreted as `mastering item \eqn{j} implies + mastering item \eqn{i}.' + + The data must contain only ones and zeros, which encode solving or + failing to solve an item, respectively. +} +\value{ + If the arguments \code{dataset} and \code{v} are of required types, + \code{iita} returns a named list consisting of the following three + components: + + \item{diff}{a vector giving the \emph{diff} values corresponding to + the (inductively generated) competing quasi orders.} + \item{implications}{an object of class \code{\link[sets]{set}} + representing the solution quasi order (with smallest \emph{diff} + value) under the selected algorithm.} + \item{selection_set_index}{a numeric giving the index of the + solution quasi order in the selection set.} +} +\references{ + Sargin, A. and Uenlue, A. (2009) Inductive item tree analysis: + Corrections, improvements, and comparisons. Manuscript under + revision. \url{http://www.math.uni-augsburg.de/~uenlueal/} + + Sargin, A. and Uenlue, A. (2009) \pkg{DAKS}: An \R package for data + analysis in knowledge space theory. Manuscript submitted for + publication. \url{http://www.math.uni-augsburg.de/~uenlueal/} + + Schrepp, M. (1999) On the empirical construction of implications + between bi-valued test items. \emph{Mathematical Social Sciences}, + \bold{38}, 361--375. + + Schrepp, M. (2003) A method for the analysis of hierarchical + dependencies between items of a questionnaire. \emph{Methods of + Psychological Research}, \bold{19}, 43--79. + + Uenlue, A. and Sargin, A. (2008) Maximum likelihood methodology for + \emph{diff} fit measures for quasi orders. Manuscript submitted for + publication. \url{http://www.math.uni-augsburg.de/~uenlueal/} +} +\author{ + Anatol Sargin \email{anatol.sargin@math.uni-augsburg.de}, + Ali Uenlue \email{ali.uenlue@math.uni-augsburg.de} +} +\note{ + The function \code{\link{iita}} can be used to perform one of the + three inductive item tree analysis procedures selectively. Whereas + for the functions \code{\link{orig_iita}}, \code{\link{corr_iita}}, + \code{\link{mini_iita}} selection sets of competing quasi orders + have to be passed via an argument manually, \code{iita} + automatically generates a selection set from the data using the + inductive generation procedure implemented in \code{\link{ind_gen}}. + + The latter approach using \code{iita} is common so far, in knowledge + space theory, where the inductive data analysis methods have been + utilized for exploratory derivations of surmise relations from data. + The functions \code{\link{orig_iita}}, \code{\link{corr_iita}}, + \code{\link{mini_iita}}, on the other hand, can be used to select + among surmise relations for instance obtained from querying experts + or from competing psychological theories. +} +\seealso{ + \code{\link{orig_iita}} for original inductive item tree analysis; + \code{\link{corr_iita}} for corrected inductive item tree analysis; + \code{\link{mini_iita}} for minimized corrected inductive item tree + analysis; \code{\link{ind_gen}} for inductive generation procedure; + \code{\link{pop_variance}} for population asymptotic variances of + \emph{diff} coefficients; \code{\link{variance}} for estimated + asymptotic variances of \emph{diff} coefficients; + \code{\link{pop_iita}} for population inductive item tree analysis. + See also \code{\link{DAKS-package}} for general information about + this package. +} +\examples{ +iita(pisa, v = 1) +iita(pisa, v = 3) +} +\keyword{manip} +\keyword{models} +\keyword{univar} diff --git a/man/imp2state.Rd b/man/imp2state.Rd new file mode 100644 index 0000000..d6153ee --- /dev/null +++ b/man/imp2state.Rd @@ -0,0 +1,66 @@ +\name{imp2state} +\alias{imp2state} +\concept{surmise relation} +\concept{quasi ordinal knowledge space} +\concept{Birkhoff theorem} +\title{ + Transformation from Implications to Knowledge States +} +\description{ + \code{imp2state} transforms a set of implications (ought to be + a surmise relation) to the corresponding set of knowledge states + (the quasi ordinal knowledge space). +} +\usage{ +imp2state(imp, items) +} +\arguments{ + \item{imp}{a required object of class \code{\link[sets]{set}} + representing the set of implications, for instance obtained from a + call to \code{\link{iita}}.} + \item{items}{a required numeric giving the number of items of the + domain taken as basis for \code{imp}.} +} +\value{ + If the arguments \code{imp} and \code{items} are of required types, + \code{imp2state} returns a matrix consisting of ones or zeros (the + quasi ordinal knowledge space), in which each row represents the + \eqn{1}/\eqn{0}-pattern of a knowledge state. +} +\references{ + Doignon, J.-P. and Falmagne, J.-C. (1999) \emph{Knowledge Spaces}. + Berlin, Heidelberg, and New York: Springer-Verlag. + + Sargin, A. and Uenlue, A. (2009) \pkg{DAKS}: An \R package for data + analysis in knowledge space theory. Manuscript submitted for + publication. \url{http://www.math.uni-augsburg.de/~uenlueal/} +} +\author{ + Anatol Sargin \email{anatol.sargin@math.uni-augsburg.de}, + Ali Uenlue \email{ali.uenlue@math.uni-augsburg.de} +} +\note{ + For any set of implications the returned knowledge structure is a + quasi ordinal knowledge space. In case of a surmise relation this + is Birkhoff's theorem. For details refer to \cite{Doignon and + Falmagne (1999, Theorem 1.49)}. + + A set of implications, an object of the class + \code{\link[sets]{set}}, consists of \eqn{2}-tuples \eqn{(i, j)} of + the class \code{\link[sets]{tuple}}, where a \eqn{2}-tuple + \eqn{(i, j)} is interpreted as `mastering item \eqn{j} implies + mastering item \eqn{i}.' +} +\seealso{ + \code{\link{state2imp}} for transformation from knowledge states to + implications. See also \code{\link{DAKS-package}} for general + information about this package. +} +\examples{ +x <- iita(pisa, v = 1) +imp2state(x$implications, ncol(pisa)) +} +\keyword{manip} +\keyword{math} +\keyword{models} +\keyword{utilities} diff --git a/man/ind_gen.Rd b/man/ind_gen.Rd new file mode 100644 index 0000000..e614026 --- /dev/null +++ b/man/ind_gen.Rd @@ -0,0 +1,66 @@ +\name{ind_gen} +\alias{ind_gen} +\concept{inductive generation} +\concept{counterexample} +\title{Inductive Generation Procedure} +\description{ + \code{ind_gen} generates inductively a set of competing quasi + orders. +} +\usage{ +ind_gen(b) +} +\arguments{ + \item{b}{a required matrix of the numbers of counterexamples for all + pairs of items, for instance obtained from a call to + \code{\link{ob_counter}}.} +} +\value{ + If the argument \code{b} is of required type, \code{ind_gen} returns + a list of the inductively generated quasi orders. +} +\references{ + Sargin, A. and Uenlue, A. (2009) Inductive item tree analysis: + Corrections, improvements, and comparisons. Manuscript under + revision. \url{http://www.math.uni-augsburg.de/~uenlueal/} + + Sargin, A. and Uenlue, A. (2009) \pkg{DAKS}: An \R package for data + analysis in knowledge space theory. Manuscript submitted for + publication. \url{http://www.math.uni-augsburg.de/~uenlueal/} + + Schrepp, M. (1999) On the empirical construction of implications + between bi-valued test items. \emph{Mathematical Social Sciences}, + \bold{38}, 361--375. + + Schrepp, M. (2003) A method for the analysis of hierarchical + dependencies between items of a questionnaire. \emph{Methods of + Psychological Research}, \bold{19}, 43--79. +} +\author{ + Anatol Sargin \email{anatol.sargin@math.uni-augsburg.de}, + Ali Uenlue \email{ali.uenlue@math.uni-augsburg.de} +} +\note{ + The function \code{\link{iita}} calls \code{\link{ind_gen}} for + constructing the set of competing quasi orders according to the + inductive generation procedure. + + The set of competing quasi orders is a list of objects of the class + \code{\link[sets]{set}}. These objects (quasi orders) consist of + \eqn{2}-tuples \eqn{(i, j)} of the class \code{\link[sets]{tuple}}, + where a \eqn{2}-tuple \eqn{(i, j)} is interpreted as `mastering item + \eqn{j} implies mastering item \eqn{i}.' +} +\seealso{ + \code{\link{ob_counter}} for computation of numbers of + counterexamples; \code{\link{iita}}, the interface that provides the + three inductive item tree analysis methods under one umbrella. See + also \code{\link{DAKS-package}} for general information about this + package. +} +\examples{ +ob <- ob_counter(pisa) +ind_gen(ob) +} +\keyword{manip} +\keyword{models} diff --git a/man/mini_iita.Rd b/man/mini_iita.Rd new file mode 100644 index 0000000..06d79c2 --- /dev/null +++ b/man/mini_iita.Rd @@ -0,0 +1,92 @@ +\name{mini_iita} +\alias{mini_iita} +\concept{minimized corrected inductive item tree analysis} +\concept{IITA} +\title{Minimized Corrected Inductive Item Tree Analysis} +\description{ + \code{mini_iita} performs the minimized corrected inductive item + tree analysis procedure and returns the corresponding \emph{diff} + values. +} +\usage{ +mini_iita(dataset, A) +} +\arguments{ + \item{dataset}{a required data frame or matrix consisting of binary, + \eqn{1} or \eqn{0}, numeric data.} + \item{A}{a required list of competing quasi orders (surmise + relations), for instance obtained from a call to + \code{\link{ind_gen}}.} +} +\details{ + Minimized corrected inductive item tree analysis is a data analysis + method for deriving knowledge structures (more precisely, surmise + relations) from binary data. Details on this procedure can be found + in \code{\link{iita}}. The set of competing quasi orders is passed + via the argument \code{A}, so any selection set of quasi orders can + be used. + + The set of competing quasi orders must be a list of objects of the + class \code{\link[sets]{set}}. These objects (quasi orders) consist + of \eqn{2}-tuples \eqn{(i, j)} of the class + \code{\link[sets]{tuple}}, where a \eqn{2}-tuple \eqn{(i, j)} is + interpreted as `mastering item \eqn{j} implies mastering item + \eqn{i}.' + + The data must contain only ones and zeros, which encode solving or + failing to solve an item, respectively. +} +\value{ + If the arguments \code{dataset} and \code{A} are of required types, + \code{corr_iita} returns a vector of the \emph{diff} values + corresponding to the competing quasi orders in \code{A}. +} +\references{ + Sargin, A. and Uenlue, A. (2009) Inductive item tree analysis: + Corrections, improvements, and comparisons. Manuscript under + revision. \url{http://www.math.uni-augsburg.de/~uenlueal/} + + Sargin, A. and Uenlue, A. (2009) \pkg{DAKS}: An \R package for data + analysis in knowledge space theory. Manuscript submitted for + publication. \url{http://www.math.uni-augsburg.de/~uenlueal/} +} +\author{ + Anatol Sargin \email{anatol.sargin@math.uni-augsburg.de}, + Ali Uenlue \email{ali.uenlue@math.uni-augsburg.de} +} +\note{ + The function \code{\link{iita}} can be used to perform one of the + three inductive item tree analysis procedures (including the + minimized corrected inductive item tree analysis method) + selectively. Whereas for the function \code{mini_iita} a selection + set of competing quasi orders has to be passed via the argument + \code{A} manually, \code{iita} automatically generates a selection + set from the data using the inductive generation procedure + implemented in \code{\link{ind_gen}}. + + The latter approach using \code{\link{iita}} is common so far, in + knowledge space theory, where the inductive data analysis methods + have been utilized for exploratory derivations of surmise relations + from data. The function \code{mini_iita}, on the other hand, can be + used to select among surmise relations for instance obtained from + querying experts or from competing psychological theories. +} +\seealso{ + \code{\link{orig_iita}} for original inductive item tree analysis; + \code{\link{corr_iita}} for corrected inductive item tree analysis; + \code{\link{iita}}, the interface that provides the three inductive + item tree analysis methods under one umbrella; + \code{\link{pop_variance}} for population asymptotic variances of + \emph{diff} coefficients; \code{\link{variance}} for estimated + asymptotic variances of \emph{diff} coefficients; + \code{\link{pop_iita}} for population inductive item tree analysis. + See also \code{\link{DAKS-package}} for general information about + this package. +} +\examples{ +ind <- ind_gen(ob_counter(pisa)) +mini_iita(pisa, ind) +} +\keyword{manip} +\keyword{models} +\keyword{univar} diff --git a/man/ob_counter.Rd b/man/ob_counter.Rd new file mode 100644 index 0000000..dd3b2f0 --- /dev/null +++ b/man/ob_counter.Rd @@ -0,0 +1,63 @@ +\name{ob_counter} +\alias{ob_counter} +\concept{counterexample} +\title{Computation of Numbers of Counterexamples} +\description{ + \code{ob_counter} computes from a dataset for all item pairs the + corresponding numbers of counterexamples. +} +\usage{ +ob_counter(dataset) +} +\arguments{ + \item{dataset}{a required data frame or matrix consisting of binary, + \eqn{1} or \eqn{0}, numeric data.} +} +\details{ + For an item pair \eqn{(i, j)}, the number of subjects solving item + \eqn{j} but failing to solve item \eqn{i}, is the corresponding + number of counterexamples. Their response patterns contradict the + interpretation of \eqn{(i, j)} as `mastering item \eqn{j} implies + mastering item \eqn{i}.' + + The data must contain only ones and zeros, which encode solving or + failing to solve an item, respectively. +} +\value{ + If the argument \code{dataset} is of required type, + \code{ob_counter} returns a matrix of the numbers of counterexamples + for all pairs of items. +} +\references{ + Sargin, A. and Uenlue, A. (2009) Inductive item tree analysis: + Corrections, improvements, and comparisons. Manuscript under + revision. \url{http://www.math.uni-augsburg.de/~uenlueal/} + + Sargin, A. and Uenlue, A. (2009) \pkg{DAKS}: An \R package for data + analysis in knowledge space theory. Manuscript submitted for + publication. \url{http://www.math.uni-augsburg.de/~uenlueal/} +} +\author{ + Anatol Sargin \email{anatol.sargin@math.uni-augsburg.de}, + Ali Uenlue \email{ali.uenlue@math.uni-augsburg.de} +} +\note{ + The function \code{ind_gen} can be used to inductively generate from + the returned matrix of the numbers of counterexamples a set of quasi + orders. + + The function \code{\link{iita}} calls \code{\link{ob_counter}}. +} +\seealso{ + \code{\link{ind_gen}} for inductive generation procedure using + numbers of counterexamples; \code{\link{iita}}, the interface that + provides the three inductive item tree analysis methods under one + umbrella. See also \code{\link{DAKS-package}} for general + information about this package. +} +\examples{ +ob_counter(pisa) +} +\keyword{manip} +\keyword{univar} +\keyword{utilities} diff --git a/man/orig_iita.Rd b/man/orig_iita.Rd new file mode 100644 index 0000000..773dd3a --- /dev/null +++ b/man/orig_iita.Rd @@ -0,0 +1,91 @@ +\name{orig_iita} +\alias{orig_iita} +\concept{original inductive item tree analysis} +\concept{IITA} +\title{Original Inductive Item Tree Analysis } +\description{ + \code{orig_iita} performs the original inductive item tree analysis + procedure and returns the corresponding \emph{diff} values. +} +\usage{ +orig_iita(dataset, A) +} +\arguments{ + \item{dataset}{a required data frame or matrix consisting of binary, + \eqn{1} or \eqn{0}, numeric data.} + \item{A}{a required list of competing quasi orders (surmise + relations), for instance obtained from a call to + \code{\link{ind_gen}}.} +} +\details{ + Original inductive item tree analysis is a data analysis method for + deriving knowledge structures (more precisely, surmise relations) + from binary data. Details on this procedure can be found in + \code{\link{iita}}. The set of competing quasi orders is passed via + the argument \code{A}, so any selection set of quasi orders can be + used. + + The set of competing quasi orders must be a list of objects of the + class \code{\link[sets]{set}}. These objects (quasi orders) consist + of \eqn{2}-tuples \eqn{(i, j)} of the class + \code{\link[sets]{tuple}}, where a \eqn{2}-tuple \eqn{(i, j)} is + interpreted as `mastering item \eqn{j} implies mastering item + \eqn{i}.' + + The data must contain only ones and zeros, which encode solving or + failing to solve an item, respectively. +} +\value{ + If the arguments \code{dataset} and \code{A} are of required types, + \code{corr_iita} returns a vector of the \emph{diff} values + corresponding to the competing quasi orders in \code{A}. +} +\references{ + Sargin, A. and Uenlue, A. (2009) Inductive item tree analysis: + Corrections, improvements, and comparisons. Manuscript under + revision. \url{http://www.math.uni-augsburg.de/~uenlueal/} + + Sargin, A. and Uenlue, A. (2009) \pkg{DAKS}: An \R package for data + analysis in knowledge space theory. Manuscript submitted for + publication. \url{http://www.math.uni-augsburg.de/~uenlueal/} +} +\author{ + Anatol Sargin \email{anatol.sargin@math.uni-augsburg.de}, + Ali Uenlue \email{ali.uenlue@math.uni-augsburg.de} +} +\note{ + The function \code{\link{iita}} can be used to perform one of the + three inductive item tree analysis procedures (including the + original inductive item tree analysis method) selectively. Whereas + for the function \code{orig_iita} a selection set of competing quasi + orders has to be passed via the argument \code{A} manually, + \code{iita} automatically generates a selection set from the data + using the inductive generation procedure implemented in + \code{\link{ind_gen}}. + + The latter approach using \code{\link{iita}} is common so far, in + knowledge space theory, where the inductive data analysis methods + have been utilized for exploratory derivations of surmise relations + from data. The function \code{orig_iita}, on the other hand, can be + used to select among surmise relations for instance obtained from + querying experts or from competing psychological theories. +} +\seealso{ + \code{\link{corr_iita}} for corrected inductive item tree analysis; + \code{\link{mini_iita}} for minimized corrected inductive item tree + analysis; \code{\link{iita}}, the interface that provides the three + inductive item tree analysis methods under one umbrella; + \code{\link{pop_variance}} for population asymptotic variances of + \emph{diff} coefficients; \code{\link{variance}} for estimated + asymptotic variances of \emph{diff} coefficients; + \code{\link{pop_iita}} for population inductive item tree analysis. + See also \code{\link{DAKS-package}} for general information about + this package. +} +\examples{ +ind <- ind_gen(ob_counter(pisa)) +orig_iita(pisa, ind) +} +\keyword{manip} +\keyword{models} +\keyword{univar} diff --git a/man/pattern.Rd b/man/pattern.Rd new file mode 100644 index 0000000..bab8f06 --- /dev/null +++ b/man/pattern.Rd @@ -0,0 +1,87 @@ +\name{pattern} +\alias{pattern} +\concept{response pattern} +\concept{absolute frequency} +\title{Frequencies of Response Patterns and Knowledge States} +\description{ + \code{pattern} computes the absolute frequencies of the (occurring) + response patterns, and optionally, the absolute frequencies of a + collection of specified knowledge states in a dataset. +} +\usage{ +pattern(dataset, n = 5, P = NULL) +} +\arguments{ + \item{dataset}{a required data frame or matrix consisting of binary, + \eqn{1} or \eqn{0}, numeric data.} + \item{n}{an optional numeric, with default \code{n = 5}, giving the + \eqn{n} highest frequencies and their corresponding response + patterns to be returned.} + \item{P}{an optional matrix of ones and zeros giving the knowledge + states to be used. The default \code{P = NULL} corresponds to no + knowledge states being specified.} +} +\details{ + This function can be used to retrieve information about how often + response patterns and knowledge states occur in a dataset. The + argument \code{n} refers to response patterns, not knowledge states, + and in particular is independent of specifications of the argument + \code{P}. If \code{pattern} is called without specifying \eqn{n} + explicitly, the response patterns with the five highest frequencies + are returned (along with their frequencies). If \eqn{n} is + specified, the response patterns with the \eqn{n} highest + frequencies are returned (along with their frequencies). If \eqn{n} + is larger than the number of different response patterns in the + dataset, \eqn{n} is set the number of different response patterns. + + The knowledge states are represented as \eqn{1}/\eqn{0}-patterns and + are the rows of the argument matrix \code{P}. The matrix \code{P} + must contain only ones and zeros, which encode whether or not an + item belongs to a knowledge state, respectively. If \code{P} is not + specified, \code{pattern} only returns information about response + patterns (as described previously). + + The data must contain only ones and zeros, which encode solving or + failing to solve an item, respectively. +} +\value{ + If the arguments \code{dataset}, \code{n}, and \code{P} are of + required types, \code{pattern} returns a named list consisting of + the following two components: + + \item{response.patterns}{an array giving the response patterns (with + the \eqn{n} highest frequencies) and their absolute frequencies in + \code{dataset}.} + \item{states}{a matrix of the knowledge states and their absolute + frequencies in \code{dataset}. Each row represents a knowledge + state, the last column gives the frequencies of the states. If + \code{P = NULL}, the component \code{states} is \code{NULL}.} +} +\references{ + Sargin, A. and Uenlue, A. (2009) Inductive item tree analysis: + Corrections, improvements, and comparisons. Manuscript under + revision. \url{http://www.math.uni-augsburg.de/~uenlueal/} + + Sargin, A. and Uenlue, A. (2009) \pkg{DAKS}: An \R package for data + analysis in knowledge space theory. Manuscript submitted for + publication. \url{http://www.math.uni-augsburg.de/~uenlueal/} +} +\author{ + Anatol Sargin \email{anatol.sargin@math.uni-augsburg.de}, + Ali Uenlue \email{ali.uenlue@math.uni-augsburg.de} +} +\seealso{ + \code{\link{ob_counter}} for computation of numbers of + counterexamples; \code{\link{simu}} for data simulation tool; + \code{\link{iita}}, the interface that provides the three inductive + item tree analysis methods under one umbrella. See also + \code{\link{DAKS-package}} for general information about this + package. +} +\examples{ +pattern(pisa, n = 3) +pattern(pisa) +} +\keyword{manip} +\keyword{univar} +\keyword{utilities} diff --git a/man/pisa.Rd b/man/pisa.Rd new file mode 100755 index 0000000..beeed4e --- /dev/null +++ b/man/pisa.Rd @@ -0,0 +1,43 @@ +\name{pisa} +\alias{pisa} +\docType{data} +\title{Programme for International Student Assessment (PISA) Data} +\description{ + The accompanying binary dataset is part of the empirical 2003 + Programme for International Student Assessment (PISA) data. It + contains the item responses by \eqn{340} German students on a + \eqn{5}-item dichotomously scored mathematical literacy test. +} +\usage{ +pisa +} +\format{ + The \code{pisa} data frame consists of \eqn{340} rows and \eqn{5} + columns, representing the response patterns of the students to the + test items. Each number, an integer, \eqn{1} or \eqn{0}, encodes a + correct or incorrect response, respectively. +} +\note{ + The dataset \code{pisa} was obtained after dichotomizing the + original multiple-choice or open format test data. Wording of the + test items used in the assessment is not known (not available + publicly). +} +\source{ + OECD Programme for International Student Assessment (PISA). + \url{http://www.pisa.oecd.org/} +} +\references{ + Sargin, A. and Uenlue, A. (2009) Inductive item tree analysis: + Corrections, improvements, and comparisons. Manuscript under + revision. \url{http://www.math.uni-augsburg.de/~uenlueal/} + + Sargin, A. and Uenlue, A. (2009) \pkg{DAKS}: An \R package for data + analysis in knowledge space theory. Manuscript submitted for + publication. \url{http://www.math.uni-augsburg.de/~uenlueal/} +} +\seealso{ + See \code{\link{DAKS-package}} for general information about this + package. +} +\keyword{datasets} diff --git a/man/pop_iita.Rd b/man/pop_iita.Rd new file mode 100644 index 0000000..4c4cc48 --- /dev/null +++ b/man/pop_iita.Rd @@ -0,0 +1,151 @@ +\name{pop_iita} +\alias{pop_iita} +\concept{population inductive item tree analysis} +\title{ + Population Inductive Item Tree Analysis +} +\description{ + \code{pop_iita} can be used to perform one of the three inductive + item tree analysis algorithms (original, corrected, and minimized + corrected) in population quantities (in a known population) + selectively. +} +\usage{ +pop_iita(imp, ce, lg, items, dataset = NULL, v) +} +\arguments{ + \item{imp}{a required object of class \code{\link[sets]{set}} + representing the underlying set of implications (assumed to be a + quasi order), for instance obtained from a call to + \code{\link{iita}}.} + \item{ce}{a required numeric giving the probability for a careless + error.} + \item{lg}{a required numeric giving the probability for a lucky + guess.} + \item{items}{a required numeric giving the number of items of the + domain taken as basis for \code{imp}.} + \item{dataset}{an optional data frame or matrix consisting of + binary, \eqn{1} or \eqn{0}, numeric data. The default + \code{dataset = NULL} corresponds to no dataset being used.} + \item{v}{a required numeric giving the inductive item tree analysis + algorithm to be performed, in population quantities; \code{v = 1} + (minimized corrected), \code{v = 2} (corrected), and \code{v = 3} + (original).} +} +\details{ + The three inductive item tree analysis algorithms are exploratory + methods for extracting quasi orders (surmise relations) from data. + In each algorithm, competing binary relations are generated (in the + same way for all three versions), and a fit measure (differing from + version to version) is computed for every relation of the selection + set in order to find the quasi order that fits the data best. In + all three algorithms, the idea is to estimate the numbers of + counterexamples for each quasi order, and to find, over all + competing quasi orders, the minimum value for the discrepancy + between the observed and expected numbers of counterexamples. The + three data analysis methods differ in their choices of estimates for + the expected numbers of counterexamples. For details see + \code{\link{iita}}. The algorithms are described in the paper about + the \pkg{DAKS} package by \cite{Sargin and Uenlue (2009)}, and in + the papers by \cite{Sargin and Uenlue (2009)} and + \cite{Uenlue and Sargin (2008)}. + + Compared to \code{\link{iita}}, the function \code{pop_iita} + implements the three inductive item tree analysis algorithms in + population, \bold{not} sample, quantities. The argument \code{imp} + must give a quasi order, and equipped with the error probabilities + \code{ce} and \code{lg}, it is considered a special case of the + basic local independence model (\cite{Doignon \& Falmagne, 1999}). + The latter then is considered as the underlying population model. + If \code{dataset = NULL} a set of competing quasi orders is + constructed based on a population analog of the inductive generation + procedure implemented in sample quantities in \code{\link{ind_gen}}. + If a dataset is specified explicitly, that data are used to generate + the set of competing quasi orders based on the sample version of the + inductive generation procedure. + + A set of implications, an object of the class + \code{\link[sets]{set}}, consists of \eqn{2}-tuples \eqn{(i, j)} of + the class \code{\link[sets]{tuple}}, where a \eqn{2}-tuple + \eqn{(i, j)} is interpreted as `mastering item \eqn{j} implies + mastering item \eqn{i}.' + + The data (in \code{dataset}) must contain only ones and zeros, which + encode solving or failing to solve an item, respectively. +} +\value{ + If the arguments \code{imp}, \code{ce}, \code{lg}, \code{items}, + \code{dataset}, and \code{v} are of required types, \code{pop_iita} + returns a named list consisting of the following four components: + + \item{pop.diff}{a vector giving the population \emph{diff} values + corresponding to the (inductively generated) competing quasi + orders (subject to selected version that was performed).} + \item{pop.matrix}{a matrix of all possible response patterns and + their corresponding population occurrence probabilities.} + \item{error_pop}{a vector of the population \eqn{\gamma}{gamma} + rates corresponding to the (inductively generated) competing quasi + orders (subject to selected version that was performed).} + \item{selection.set}{a list of the (inductively generated) competing + quasi orders.} +} +\references{ + Doignon, J.-P. and Falmagne, J.-C. (1999) \emph{Knowledge Spaces}. + Berlin, Heidelberg, and New York: Springer-Verlag. + + Sargin, A. and Uenlue, A. (2009) Inductive item tree analysis: + Corrections, improvements, and comparisons. Manuscript under + revision. \url{http://www.math.uni-augsburg.de/~uenlueal/} + + Sargin, A. and Uenlue, A. (2009) \pkg{DAKS}: An \R package for data + analysis in knowledge space theory. Manuscript submitted for + publication. \url{http://www.math.uni-augsburg.de/~uenlueal/} + + Uenlue, A. and Sargin, A. (2008) Maximum likelihood methodology for + \emph{diff} fit measures for quasi orders. Manuscript submitted for + publication. \url{http://www.math.uni-augsburg.de/~uenlueal/} +} +\author{ + Anatol Sargin \email{anatol.sargin@math.uni-augsburg.de}, + Ali Uenlue \email{ali.uenlue@math.uni-augsburg.de} +} +\note{ + The single careless error \code{ce} and lucky guess \code{lg} + probabilities are assumed to be constant over all items. The most + general case that can be specified thus includes two error + probabilities, which are the same for all items. + + The sample \emph{diff} coefficients of the three inductive item tree + analysis algorithms can be transformed into maximum likelihood + estimators, by division through the square of sample size. These + transformed \emph{diff} coefficients are considered in population + quantities. The \eqn{\gamma}{gamma} rates are the algorithms' + specific estimates of the postulated response error probability. + + Population and estimated asymptotic variances of the maximum + likelihood estimators \emph{diff} are implemented in the functions + \code{\link{pop_variance}} and \code{\link{variance}}, respectively. +} +\seealso{ + \code{\link{pop_variance}} for population asymptotic variances of + \emph{diff} coefficients; \code{\link{variance}} for estimated + asymptotic variances of \emph{diff} coefficients; \code{\link{simu}} + for data simulation tool; \code{\link{ind_gen}} for (sample) + inductive generation procedure; \code{\link{iita}}, the interface + that provides the three (sample) inductive item tree analysis + methods under one umbrella. See also \code{\link{DAKS-package}} for + general information about this package. +} +\examples{ +x <- simu(7, 10000, ce = 0.01, lg = 0.01, delta = 0.12) +y <- iita(x$dataset, v = 2) +z <- pop_iita(x$implications, 0.01, 0.01, 7, x$dataset, v = 2) + +## similar sample and population diff values are obtained +(y$diff) / (10000^2) +z +} +\keyword{htest} +\keyword{manip} +\keyword{models} +\keyword{univar} diff --git a/man/pop_variance.Rd b/man/pop_variance.Rd new file mode 100644 index 0000000..cd0cdbc --- /dev/null +++ b/man/pop_variance.Rd @@ -0,0 +1,109 @@ +\name{pop_variance} +\alias{pop_variance} +\concept{population asymptotic variance} +\title{Population Asymptotic Variance} +\description{ + \code{pop_variance} computes the population (exact) asymptotic + variances of the maximum likelihood estimators \emph{diff}, assuming + a multinomial probability distribution on the set of all response + patterns. +} +\usage{ +pop_variance(pop_matrix, imp, error_pop, v) +} +\arguments{ + \item{pop_matrix}{a required matrix of all possible response + patterns and their corresponding population occurrence + probabilities, for instance obtained from a call to + \code{\link{pop_iita}}.} + \item{imp}{a required object of class \code{\link[sets]{set}} + representing the set of implications (ought to be a quasi order) + for which \emph{diff} is computed, for instance obtained from a + call to \code{\link{pop_iita}}.} + \item{error_pop}{a required numeric giving the \eqn{\gamma}{gamma} + rate to be used for computing \emph{diff}, for instance obtained + from a call to \code{\link{pop_iita}}.} + \item{v}{a required numeric giving the inductive item tree analysis + algorithm to be performed, in population quantities; \code{v = 1} + (minimized corrected) and \code{v = 2} (corrected).} +} +\details{ + Subject to the selected version to be performed, \code{pop_variance} + computes the population asymptotic variance of the maximum + likelihood estimator \emph{diff}, which here is formulated for the + relation specified in \code{imp} and for the \eqn{\gamma}{gamma} + rate in \code{error_pop}. This population variance is obtained + using the delta method, which requires calculating the Jacobian + matrix of the \emph{diff} coefficient and the inverse of the + expected Fisher information matrix for the multinomial distribution + with cell probabilities as specified in \code{pop_matrix}. + + A set of implications, an object of the class + \code{\link[sets]{set}}, consists of \eqn{2}-tuples \eqn{(i, j)} of + the class \code{\link[sets]{tuple}}, where a \eqn{2}-tuple + \eqn{(i, j)} is interpreted as `mastering item \eqn{j} implies + mastering item \eqn{i}.' +} +\value{ + If the arguments \code{pop_matrix}, \code{imp}, \code{error_pop}, + and \code{v} are of required types, \code{pop_variance} returns a + numeric giving the population asymptotic variance of the maximum + likelihood estimator \emph{diff} (formulated for the relation in + \code{imp} and the \eqn{\gamma}{gamma} rate in \code{error_pop}). +} +\references{ + Sargin, A. and Uenlue, A. (2009) Inductive item tree analysis: + Corrections, improvements, and comparisons. Manuscript under + revision. \url{http://www.math.uni-augsburg.de/~uenlueal/} + + Sargin, A. and Uenlue, A. (2009) \pkg{DAKS}: An \R package for data + analysis in knowledge space theory. Manuscript submitted for + publication. \url{http://www.math.uni-augsburg.de/~uenlueal/} + + Uenlue, A. and Sargin, A. (2008) Maximum likelihood methodology for + \emph{diff} fit measures for quasi orders. Manuscript submitted for + publication. \url{http://www.math.uni-augsburg.de/~uenlueal/} +} +\author{ + Anatol Sargin \email{anatol.sargin@math.uni-augsburg.de}, + Ali Uenlue \email{ali.uenlue@math.uni-augsburg.de} +} +\note{ + The current version of the package \pkg{DAKS} does not support + computing population asymptotic variances for the original inductive + item tree analysis algorithm; population asymptotic variances can be + calculated only for the corrected and minimized corrected + algorithms. + + The sample \emph{diff} coefficients of the three inductive item tree + analysis algorithms can be transformed into maximum likelihood + estimators, by division through the square of sample size. These + transformed \emph{diff} coefficients are considered in population + quantities. The \eqn{\gamma}{gamma} rates are the algorithms' + specific estimates of the postulated response error probability. + + Estimated asymptotic variances of the maximum likelihood estimators + \emph{diff} are implemented in the function \code{\link{variance}}. +} +\seealso{ + \code{\link{variance}} for estimated asymptotic variances of + \emph{diff} coefficients; \code{\link{pop_iita}} for population + inductive item tree analysis; \code{\link{ind_gen}} for (sample) + inductive generation procedure; \code{\link{iita}}, the interface + that provides the three (sample) inductive item tree analysis + methods under one umbrella. See also \code{\link{DAKS-package}} for + general information about this package. +} +\examples{ +x <- simu(5, 100, 0.05, 0.05, delta = 0.15) +y <- pop_iita(x$implications, 0.05, 0.05, 5, x$dataset, v = 2) +pop_variance(y$pop.matrix, + y$selection.set[[which(y$pop.diff == min(y$pop.diff))]], + y$error.pop[which(y$pop.diff == min(y$pop.diff))], v = 2) +} +\keyword{htest} +\keyword{math} +\keyword{models} +\keyword{multivariate} +\keyword{univar} +\keyword{utilities} diff --git a/man/simu.Rd b/man/simu.Rd new file mode 100644 index 0000000..66a4303 --- /dev/null +++ b/man/simu.Rd @@ -0,0 +1,145 @@ +\name{simu} +\alias{simu} +\concept{basic local independence model} +\concept{simulation} +\concept{simulated data} +\concept{simulated quasi order} +\title{ + Data Simulation Tool +} +\description{ + \code{simu} can be used to simulate binary, of type \eqn{1}/\eqn{0}, + data using a basic local independence model. The number of + items, the sample size, and two parameters for the careless error + and lucky guess probabilities can be set explicitly. The underlying + combinatorial structure used for simulating the data can either be + specified manually or is generated randomly. +} +\usage{ +simu(items, size, ce, lg, imp = NULL, delta) +} +\arguments{ + \item{items}{a required numeric giving the number of items of the + domain taken as basis for the simulation.} + \item{size}{a required numeric giving the number of response + patterns to be simulated (the sample size).} + \item{ce}{a required numeric giving the probability for a careless + error.} + \item{lg}{a required numeric giving the probability for a lucky + guess.} + \item{imp}{an optional object of class \code{\link[sets]{set}} + representing the underlying set of implications (assumed to be a + quasi order) used for simulating the data, for instance obtained + from a call to \code{\link{iita}}. The default \code{imp = NULL} + corresponds to generating the quasi order used for simulating the + data randomly.} + \item{delta}{a required (if \code{imp = NULL}) numeric giving the + probability for adding an item pair to the randomly generated + quasi order (reflexive pairs are always included a priori).} +} +\details{ + The function \code{simu} simulates data using a special case of the + basic local independence model, which is a fundamental restricted + latent class model in knowledge space theory + (\cite{Doignon \& Falmagne, 1999}). The single careless error + \code{ce} and lucky guess \code{lg} probabilities are assumed to be + constant over all items. The most general case that can be + specified thus includes two error probabilities at each item, the + same two rates for all items. The general form of the basic local + independence model allows for varying careless error and lucky guess + rates from item to item (not identifiable in general, however). + + If a quasi order is specified in \code{imp} explicitly, Birkhoff's + theorem is used to derive its corresponding quasi ordinal knowledge + space, which is equipped with the error probabilities \code{ce} and + \code{lg} to give the basic local independence model used for + simulating the data. If \code{imp = NULL}, the underlying quasi + order is generated randomly as follows. All reflexive pairs are + added to the relation. The constant specified in \code{delta} is + utilized as the probability for adding each of the remaining + non-reflexive item pairs to the relation. The transitive closure of + this relation is computed, and the resulting quasi order is then the + relation underlying the simulation. + + A set of implications, an object of the class + \code{\link[sets]{set}}, consists of \eqn{2}-tuples \eqn{(i, j)} of + the class \code{\link[sets]{tuple}}, where a \eqn{2}-tuple + \eqn{(i, j)} is interpreted as `mastering item \eqn{j} implies + mastering item \eqn{i}.' + + The simulated dataset contains only ones and zeros, which encode + solving or failing to solve an item, respectively. +} +\value{ + If the arguments \code{items}, \code{size}, \code{ce}, \code{lg}, + \code{imp}, and \code{delta} are of required types, \code{simu} + returns a named list consisting of the following three components: + + \item{dataset}{a matrix of binary, \eqn{1} or \eqn{0}, numeric + data.} + \item{implications}{an object of class \code{\link[sets]{set}} + representing the underlying set of implications (assumed to be a + quasi order) used for simulating the data. If \code{imp = NULL}, + this is the quasi order that was randomly generated; otherwise + identical to the set of implications specified in the argument + \code{imp}.} + \item{states}{a matrix consisting of ones or zeros (the quasi + ordinal knowledge space), in which each row represents the + \eqn{1}/\eqn{0}-pattern of a knowledge state. This is the + knowledge structure corresponding to the set of implications + specified in \code{implications}.} +} +\references{ + Doignon, J.-P. and Falmagne, J.-C. (1999) \emph{Knowledge Spaces}. + Berlin, Heidelberg, and New York: Springer-Verlag. + + Sargin, A. and Uenlue, A. (2009) Inductive item tree analysis: + Corrections, improvements, and comparisons. Manuscript under + revision. \url{http://www.math.uni-augsburg.de/~uenlueal/} + + Sargin, A. and Uenlue, A. (2009) \pkg{DAKS}: An \R package for data + analysis in knowledge space theory. Manuscript submitted for + publication. \url{http://www.math.uni-augsburg.de/~uenlueal/} + + Uenlue, A. and Sargin, A. (2008) Maximum likelihood methodology for + \emph{diff} fit measures for quasi orders. Manuscript submitted for + publication. \url{http://www.math.uni-augsburg.de/~uenlueal/} +} +\author{ + Anatol Sargin \email{anatol.sargin@math.uni-augsburg.de}, + Ali Uenlue \email{ali.uenlue@math.uni-augsburg.de} +} +\note{ + To pass a quasi order as the argument \code{imp} to \code{simu} it + may be more convenient to transform from knowledge states to + implications using the function \code{\link{state2imp}}. + + The probability specified in \code{delta} does not necessarily + correspond to the ratio of implications in the randomly generated + quasi order, because the transitive closure is formed after having + added item pairs. In \cite{Sargin and Uenlue (2009)} a normal + sampling scheme for drawing \code{delta} values using + \eqn{\mu = 0.16}{mean = 0.16} and \eqn{\sigma = 0.06}{sd = 0.06} for + nine items has been proposed. This sampling scheme provides far + better representative samples of quasi orders than sampling + \code{delta} values uniformly from the unit interval. +} +\seealso{ + \code{\link{state2imp}} for transformation from knowledge states to + implications; \code{\link{imp2state}} for transformation from + implications to knowledge states; \code{\link{pop_iita}} for + population inductive item tree analysis; \code{\link{iita}}, the + interface that provides the three (sample) inductive item tree + analysis methods under one umbrella. See also + \code{\link{DAKS-package}} for general information about this + package. +} +\examples{ +simu(7, 20, 0.1, 0.1, delta = 0.15) +} +\keyword{datagen} +\keyword{distribution} +\keyword{math} +\keyword{models} +\keyword{multivariate} +\keyword{utilities} diff --git a/man/state2imp.Rd b/man/state2imp.Rd new file mode 100644 index 0000000..b77ec43 --- /dev/null +++ b/man/state2imp.Rd @@ -0,0 +1,70 @@ +\name{state2imp} +\alias{state2imp} +\concept{surmise relation} +\concept{quasi ordinal knowledge space} +\concept{Birkhoff theorem} +\title{ + Transformation from Knowledge States to Implications +} +\description{ + \code{state2imp} transforms a set of knowledge states (ought to + be a quasi ordinal knowledge space) to the corresponding set of + implications (the surmise relation). +} +\usage{ +state2imp(P) +} +\arguments{ + \item{P}{a required matrix of ones and zeros giving the knowledge + states to be used. Each row represents the + \eqn{1}/\eqn{0}-pattern of a knowledge state.} +} +\value{ + If the argument \code{P} is of required type, \code{state2imp} + returns an object of class \code{\link[sets]{set}} (the surmise + relation) representing the set of implications. +} +\references{ + Doignon, J.-P. and Falmagne, J.-C. (1999) \emph{Knowledge Spaces}. + Berlin, Heidelberg, and New York: Springer-Verlag. + + Sargin, A. and Uenlue, A. (2009) \pkg{DAKS}: An \R package for data + analysis in knowledge space theory. Manuscript submitted for + publication. \url{http://www.math.uni-augsburg.de/~uenlueal/} +} +\author{ + Anatol Sargin \email{anatol.sargin@math.uni-augsburg.de}, + Ali Uenlue \email{ali.uenlue@math.uni-augsburg.de} +} +\note{ + For any set of knowledge states the returned binary relation is a + surmise relation. In case of a quasi ordinal knowledge space this + is Birkhoff's theorem. For details refer to \cite{Doignon and + Falmagne (1999, Theorem 1.49)}. + + A set of implications, an object of the class + \code{\link[sets]{set}}, consists of \eqn{2}-tuples \eqn{(i, j)} of + the class \code{\link[sets]{tuple}}, where a \eqn{2}-tuple + \eqn{(i, j)} is interpreted as `mastering item \eqn{j} implies + mastering item \eqn{i}.' +} +\seealso{ + \code{\link{imp2state}} for transformation from implications to + knowledge states. See also \code{\link{DAKS-package}} for general + information about this package. +} +\examples{ +## an arbitrary matrix of knowledge states is defined +x <- matrix(0, nrow = 5, ncol = 3) +x[1, ] <- c(0, 0, 0) +x[2, ] <- c(0, 0, 1) +x[3, ] <- c(0, 1, 0) +x[4, ] <- c(0, 1, 1) +x[5, ] <- c(1, 1, 1) + +state2imp(x) +} +\keyword{manip} +\keyword{math} +\keyword{models} +\keyword{utilities} diff --git a/man/variance.Rd b/man/variance.Rd new file mode 100644 index 0000000..2c07643 --- /dev/null +++ b/man/variance.Rd @@ -0,0 +1,115 @@ +\name{variance} +\alias{variance} +\concept{estimated asymptotic variance} +\title{Estimated Asymptotic Variance} +\description{ + \code{variance} computes estimated asymptotic variances of the + maximum likelihood estimators \emph{diff} from data, assuming a + multinomial probability distribution on the set of all response + patterns. +} +\usage{ +variance(dataset, imp, v) +} +\arguments{ + \item{dataset}{a required data frame or matrix consisting of binary, + \eqn{1} or \eqn{0}, numeric data.} + \item{imp}{a required object of class \code{\link[sets]{set}} + representing the set of implications (ought to be a quasi order) + for which \emph{diff} is computed, for instance obtained from a + call to \code{\link{simu}}.} + \item{v}{a required numeric giving the inductive item tree analysis + algorithm to be performed; \code{v = 1} (minimized corrected) and + \code{v = 2} (corrected).} +} +\details{ + Subject to the selected version to be performed, \code{variance} + computes a consistent estimator for the population asymptotic + variance of the maximum likelihood estimator \emph{diff}, which here + is formulated for the relation specified in \code{imp} and for the + data in \code{dataset}. This estimated asymptotic variance is + obtained using the delta method, which requires calculating the + Jacobian matrix of the \emph{diff} coefficient and the inverse of + the expected Fisher information matrix for the multinomial + distribution on the set of all response patterns. In the expression + for the exact asymptotic variance, the true parameter vector of + multinomial probabilities is estimated by its corresponding maximum + likelihood estimate (vector of the relative frequencies of the + response patterns). + + A set of implications, an object of the class + \code{\link[sets]{set}}, consists of \eqn{2}-tuples \eqn{(i, j)} of + the class \code{\link[sets]{tuple}}, where a \eqn{2}-tuple + \eqn{(i, j)} is interpreted as `mastering item \eqn{j} implies + mastering item \eqn{i}.' + + The data must contain only ones and zeros, which encode solving or + failing to solve an item, respectively. +} +\value{ + If the arguments \code{dataset}, \code{imp}, and \code{v} are of + required types, \code{variance} returns a numeric giving the + estimated asymptotic variance of the maximum likelihood estimator + \emph{diff} (formulated for the relation in \code{imp} and the data + in \code{dataset}). +} +\references{ + Sargin, A. and Uenlue, A. (2009) Inductive item tree analysis: + Corrections, improvements, and comparisons. Manuscript under + revision. \url{http://www.math.uni-augsburg.de/~uenlueal/} + + Sargin, A. and Uenlue, A. (2009) \pkg{DAKS}: An \R package for data + analysis in knowledge space theory. Manuscript submitted for + publication. \url{http://www.math.uni-augsburg.de/~uenlueal/} + + Uenlue, A. and Sargin, A. (2008) Maximum likelihood methodology for + \emph{diff} fit measures for quasi orders. Manuscript submitted for + publication. \url{http://www.math.uni-augsburg.de/~uenlueal/} +} +\author{ + Anatol Sargin \email{anatol.sargin@math.uni-augsburg.de}, + Ali Uenlue \email{ali.uenlue@math.uni-augsburg.de} +} +\note{ + The current version of the package \pkg{DAKS} does not support + computing estimated asymptotic variances for the original inductive + item tree analysis algorithm; population asymptotic variances can be + estimated only for the corrected and minimized corrected algorithms. + + The two types of estimators for the population asymptotic variances + of the \emph{diff} coefficients obtained using the expected Fisher + information matrix on the one hand, and the observed Fisher + information matrix on the other, yield the same result, in the case + of the multinomial distribution. Since computation based on + expected Fisher information is faster, this is implemented in + \code{variance}. + + The sample \emph{diff} coefficients of the three inductive item tree + analysis algorithms can be transformed into maximum likelihood + estimators, by division through the square of sample size. These + transformed \emph{diff} coefficients are considered in sample and + population quantities. + + Population (exact) asymptotic variances of the maximum likelihood + estimators \emph{diff} are implemented in the function + \code{\link{pop_variance}}. +} +\seealso{ + \code{\link{pop_variance}} for population asymptotic variances of + \emph{diff} coefficients; \code{\link{pop_iita}} for population + inductive item tree analysis; \code{\link{iita}}, the interface that + provides the three (sample) inductive item tree analysis methods + under one umbrella. See also \code{\link{DAKS-package}} for general + information about this package. +} +\examples{ +x <- simu(5, 100, 0.05, 0.05, delta = 0.15) +variance(x$dataset, x$implications, v = 2) +} +\keyword{htest} +\keyword{manip} +\keyword{math} +\keyword{models} +\keyword{multivariate} +\keyword{univar} +\keyword{utilities}