In [11]:
library("entropy")

compute_entropy <- function(dataset_name, binning_method, bycell){
    
    counts_path <- paste('../data/count_histogram/', dataset_name, sep='')
    counts_path <- paste(counts_path, binning_method, sep='')
    counts_path <- paste(counts_path, bycell , sep='_')
    counts <- read.csv(counts_path)

    #Compute entropy
    entropies <- c()
    for (i  in 1:length(unlist(counts[1]))) {
        #Get the count of current cell
        count <- sapply(counts, "[", i)
        count <- count[!is.na(count)]

        #Compute entropy using count
        entropy <- entropy(count)
        entropies <- c(entropies, entropy)
    }
    
    mean_entropy <- mean(entropy)
    std_entropy <- sqrt(var(entropies))
    
    output_path <- paste("../data/variability_measure/", dataset_name, sep='')
    output_path <- paste(output_path, binning_method, sep='')
    output_path <- paste(output_path, "entropy", sep='_')
    output_path <- paste(output_path, bycell, sep='_')
    write.csv(entropies, output_path, row.names = F)
    
    return (c(mean_entropy, std_entropy))
}

In [24]:
dataset_names <- c('AE3', 'AE4', 'D0', 'D0_2', 'D6', 'D6_2', 'D15', 'D15_2', 'LK', 'LK_2', 'LSK', 'LSK_2', 'LSKmix')

means <- data.frame(matrix(0, nrow = length(dataset_names), ncol = 2))
rownames(means) <- dataset_names

for (i  in 1:length(dataset_names)){
    mean <- compute_entropy(dataset_names[i], 'bayesian', 'bycell.csv') 
    means[i,] <- mean
}
colnames(means) <- c('mean_entropy', 'std_entropy')
write.csv(means, "../data/variability_measure/mean_entropy_bayesian_bycell.csv", row.names = T)

In [22]:
means <- data.frame(matrix(0, nrow = length(dataset_names), ncol = 2))
rownames(means) <- dataset_names

for (i  in 1:length(dataset_names)){
    mean <- compute_entropy(dataset_names[i], 'binary', 'bycell.csv') 
    means[i,] <- mean
}
colnames(means) <- c('mean_entropy', 'std_entropy')
write.csv(means, "../data/variability_measure/mean_entropy_binary_bycell.csv", row.names = T)

In [None]:
means <- data.frame(matrix(0, nrow = length(dataset_names), ncol = 2))
rownames(means) <- dataset_names

for (i  in 1:length(dataset_names)){
    mean <- compute_entropy(dataset_names[i], 'bayesian', 'bygene.csv') 
    means[i,] <- mean
}
colnames(means) <- c('mean_entropy', 'std_entropy')
write.csv(means, "../data/variability_measure/mean_entropy_bayesian_bygene.csv", row.names = T)

In [23]:
means <- data.frame(matrix(0, nrow = length(dataset_names), ncol = 2))
rownames(means) <- dataset_names

for (i  in 1:length(dataset_names)){
    mean <- compute_entropy(dataset_names[i], 'binary', 'bygene.csv') 
    means[i,] <- mean
}
colnames(means) <- c('mean_entropy', 'std_entropy')
write.csv(means, "../data/variability_measure/mean_entropy_binary_bygene.csv", row.names = T)