In [1]:
library(R6)

In [23]:
MDMM <- R6Class("MDMM",
  # public methods and parameters
  public = list(
  initialize = function(){},
      
  fit = function(Xs,Ks,intial.alpha=NULL, max.iter=200, threshold=0.001, learning.rate=0.1){
      
      ## input
      ## Xs: group of count vectors
      ## Ks: group of mask vectors
      ## Initial_alpha: the initial value of alpha, can be a vector, the default value is the mean value of all Xs
      
      ###### parameter validation ######
      if (!sum(dim(Xs)==dim(Ks))==2){
          print("Error! The dimension of input Xs and Ks must be same.")
          return(-1)
      }
      if (!is.null(intial.alpha) && !dim(Xs)[2]==length(intial.alpha)){
          print("Error! The column dimension of input Xs and the length of intial.alpha must be same.")
          return(-1)
      }
      
      ###### parameter initialize ######
      private$dims = dim(Xs)[2]
      private$learning_rate <- learning.rate
      private$max_iter <- max.iter
      private$threshold <- threshold
      
      if(is.null(intial.alpha)){
          initial.alpha <- colMeans(Xs)
      }
      private$initial_alpha <- initial.alpha
      alpha <- initial.alpha
    
      private$LL_list <- c(private$log.Likelihood(alpha,Xs,Ks))
      
      ###### estimation ######
      for(epoch in 1:private$max_iter){
        for(sample in 1:dim(Xs)[1]){
            gradient.alpha <- matrix(0, private$dims, 1)
            K <- Ks[sample,]
            X <- Xs[sample,]

            for(j in 1: private$dims){
                if(K[j]==1){
                    gradient.alpha[j] <- digamma(sum(alpha))-digamma(sum(alpha)+sum(X))+digamma(X[j]+alpha[j])-digamma(alpha[j])
                }
            }
            alpha <- alpha + private$learning_rate * as.double(gradient.alpha)
        }
        if(abs(private$LL_list[length(private$LL_list)]-private$log.Likelihood(alpha,Xs,Ks))<= private$threshold){
            break
        }else{
            private$LL_list <- c(private$LL_list, private$log.Likelihood(alpha,Xs,Ks))
        }  
    }
    
      ###### confidence range ######
      high_boundary <- matrix(1,1,private$dims)
      low_boundary <- matrix(1,1,private$dims)
  
      for(i in 1: private$dims){
          Second.derivative.i <- 0
          for(sample in 1:dim(Xs)[1]){
              K <- Ks[sample,]
              X <- Xs[sample,]
              Second.derivative.i <- Second.derivative.i + trigamma(sum(alpha))-trigamma(sum(alpha)+sum(X))+trigamma(X[i]+alpha[i])-trigamma(alpha[i])
          }
              high_boundary[i] <- alpha[i] + 1.96/sqrt(-1*Second.derivative.i)
              low_boundary[i] <- max(alpha[i] - 1.96/sqrt(-1*Second.derivative.i),0)
      }
      
      
      ###### save result ######
      result <- as.data.frame(rbind(as.double(high_boundary),alpha,as.double(low_boundary)))
      row.names(result)<- c("high_boundary","estimated_alpha","low_boundary")
      private$estimation <- result 
  },
      
      
  get_parameters = function(){
      print(paste("dims:",private$dims))
      print(paste("learning_rate:",private$learning_rate))
      print(paste("max_iter:",private$max_iter))
      print(paste("threshold:",private$threshold))
  },
  get_result = function(){return(private$estimation)},
  draw_trainning_process = function(){plot(private$LL_list,xlab =  "epoch", ylab ="Log.Likelihood")}
  ), 
                
  
  private = list(
      ## private parameters
      dims = NULL,
      learning_rate = NULL,
      initial_alpha = NULL,
      threshold = NULL,
      max_iter = NULL,
      
      estimation = NULL,
      LL_list = NULL,
      
      ## private methods
      log.Likelihood = function(alpha,Xs,Ks){
        Likelihood <- 0
        for(i in 1:dim(Xs)[1]){
            K_i <- Ks[i,]
            X_i <- as.double(Xs[i,K_i==1])
            alpha_i <- as.double(alpha[K_i==1])

            Likelihood <- Likelihood + log(gamma(sum(alpha_i))) - log(gamma(sum(alpha_i+X_i)))
            for(j in 1:length(alpha_i)){
                Likelihood <- Likelihood + log(gamma(alpha_i[j] + X_i[j])/gamma(alpha_i[j]))
            }
        }
        return(Likelihood)
    }
  )
)

# load data

In [18]:
df_proportion <- read.csv(file = "Cell_proportion.csv")
df_proportion <- df_proportion[15:234,]

In [19]:
subregions <- unique(df_proportion$subregion)

In [29]:
for (sr in subregions){
    df_test = df_proportion[df_proportion$subregion==sr,]
    Xs <- df_test[,4:12]
    Ks <- matrix(1,dim(Xs)[1],9)
    for(i in 1: dim(Xs)[1]){
        if(df_test$method[i]=="rm-CM"){
            Ks[i,2] <- 0
        }
        if(df_test$method[i]=="CD45+"){
            Ks[i,] <- c(1,0,1,0,1,1,0,1,0)
        }

    }
    Xs <- Xs/rowSums(Xs)
    
    my_model <- MDMM$new()
    my_model$fit(Xs,Ks,max.iter=300,learning.rate=0.1)
    df_result <- my_model$get_result()

    write.csv(df_result,paste0("/stor/public/chenyx/HHCAd/DM_result2/subregion/",sr,".csv"))
}

“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”
“NaNs produced”


In [38]:
df_result <- read.csv("/stor/public/chenyx/HHCAd/DM_result2/subregion/Left atria.csv",row.names =  1)
df_result/sum(df_result[2,])

Unnamed: 0_level_0,Endothelial.cell,Cardiomyocyte.cell,Myelocyte,Fibroblast,Lymphocyte,Smooth.muscle.cell,Neuron,Pericyte,Adipocyte
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
high_boundary,0.28498791,0.42399683,0.15346795,0.3344048,0.11227446,0.12771026,0.06484273,0.26335734,0.09596971
estimated_alpha,0.16240963,0.25020912,0.07404702,0.1953547,0.04910884,0.05846611,0.02360722,0.14763384,0.03916351
low_boundary,0.03983135,0.07642141,0.0,0.0563046,0.0,0.0,0.0,0.03191035,0.0


In [39]:
df_result <- read.csv("/stor/public/chenyx/HHCAd/DM_result2/subregion/Left ventricle.csv",row.names =  1)
df_result/sum(df_result[2,])

Unnamed: 0_level_0,Endothelial.cell,Cardiomyocyte.cell,Myelocyte,Fibroblast,Lymphocyte,Smooth.muscle.cell,Neuron,Pericyte,Adipocyte
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
high_boundary,0.324689,0.5274146,0.1014329882,0.2117711,0.07292507,0.0836615,0.03003386,0.26183527,0.03286035
estimated_alpha,0.2125388,0.3526041,0.0508353661,0.1246588,0.03297154,0.03967254,0.01044786,0.16481956,0.01145139
low_boundary,0.1003886,0.1777937,0.0002377441,0.03754649,0.0,0.0,0.0,0.06780386,0.0


In [40]:
df_result <- read.csv("/stor/public/chenyx/HHCAd/DM_result2/subregion/Right ventricle.csv",row.names =  1)
df_result/sum(df_result[2,])

Unnamed: 0_level_0,Endothelial.cell,Cardiomyocyte.cell,Myelocyte,Fibroblast,Lymphocyte,Smooth.muscle.cell,Neuron,Pericyte,Adipocyte
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
high_boundary,0.30588609,0.6094942,0.11224311,0.25009001,0.08124923,0.103535,0.03753476,0.27254552,0.06721205
estimated_alpha,0.17426999,0.3812007,0.04870105,0.13319267,0.03170772,0.0438001,0.0116395,0.15168738,0.02380084
low_boundary,0.04265389,0.1529073,0.0,0.01629534,0.0,0.0,0.0,0.03082924,0.0


In [41]:
df_result <- read.csv("/stor/public/chenyx/HHCAd/DM_result2/subregion/Right atria.csv",row.names =  1)
df_result/sum(df_result[2,])

Unnamed: 0_level_0,Endothelial.cell,Cardiomyocyte.cell,Myelocyte,Fibroblast,Lymphocyte,Smooth.muscle.cell,Neuron,Pericyte,Adipocyte
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
high_boundary,0.32545403,0.40526115,0.16398117,0.3953526,0.15470757,0.12039635,0.08195704,0.17279586,0.07968775
estimated_alpha,0.18649464,0.23181729,0.07812997,0.2347215,0.07205126,0.05232474,0.03118277,0.08365542,0.02962245
low_boundary,0.04753525,0.05837343,0.0,0.0740903,0.0,0.0,0.0,0.0,0.0


In [42]:
df_result <- read.csv("/stor/public/chenyx/HHCAd/DM_result2/subregion/Interatrial septum.csv",row.names =  1)
df_result/sum(df_result[2,])

Unnamed: 0_level_0,Endothelial.cell,Cardiomyocyte.cell,Myelocyte,Fibroblast,Lymphocyte,Smooth.muscle.cell,Neuron,Pericyte,Adipocyte
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
high_boundary,0.4839059,0.5578171,0.43118909,0.6287535,,0.37618674,0.29814098,0.41748669,
estimated_alpha,0.1102581,0.1465031,0.08814951,0.1879018,0.1344125,0.06847583,0.04465963,0.08293995,0.1366995
low_boundary,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,


In [43]:
df_result <- read.csv("/stor/public/chenyx/HHCAd/DM_result2/subregion/Interventricular septum.csv",row.names =  1)
df_result/sum(df_result[2,])

Unnamed: 0_level_0,Endothelial.cell,Cardiomyocyte.cell,Myelocyte,Fibroblast,Lymphocyte,Smooth.muscle.cell,Neuron,Pericyte,Adipocyte
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
high_boundary,0.3632135,0.5370635,0.12395911,0.2328911,0.09155712,0.1022261,0.04576474,0.29709489,
estimated_alpha,0.2058879,0.325844,0.04964595,0.1110796,0.03286596,0.03863349,0.01239499,0.15843532,0.06521277
low_boundary,0.0485624,0.1146244,0.0,0.0,0.0,0.0,0.0,0.01977575,
