This document contain all the relevant scripts used for **processing the raw data** acquired in `get_data`, producing **usable and formated tables**.

In [1]:
!jt -l

Available Themes: 
   chesterish
   grade3
   gruvboxd
   gruvboxl
   monokai
   oceans16
   onedork
   solarizedd
   solarizedl


In [7]:
library(tidyverse);

setwd("~/Documents/jupyter-workplace/laser-anoxia/") # geting raw folder
load("raw/list_skeleton.rdata")
skeleton.data     <- x
load("raw/list_branches_info.rdata")
branche.info.data <- x
load("raw/list_hull.rdata")
hull.data         <- x
load("raw/list_measures.rdata")
measure.data      <- x
load("raw/list_frac.rdata")
frac.data         <- x

# returning to current folder
setwd("~/Documents/jupyter-workplace/laser-anoxia/microglia-morphology/data-processing/") 
rm(x)

### Skeleton Data

In [2]:
# formating columns for the info slice data-frame (15 cells)
# (information about the origin of the cell data)
Grupo  <- vector(length = 15)  # $group
Animal <- vector(length = 15)  # $animal
Region <- vector(length = 15)  # $region
Corte  <- vector(length = 15)  # $slice

#checking if there is more, or less, than 15 cells in the slice (checking for errors)
for(i in seq_along(skeleton.data)){
  if(15 %in% attributes(skeleton.data[[i]])$row.names != TRUE){print(i)} 
}

# build data-frames
for(i in seq_along(skeleton.data)){
  for(cell in 1:15){ # adding info for each cell 
    Grupo[cell]  <- attributes(skeleton.data[[i]])$group
    Animal[cell] <- attributes(skeleton.data[[i]])$animal
    Region[cell] <- attributes(skeleton.data[[i]])$region
    Corte[cell]  <- attributes(skeleton.data[[i]])$slice
  }
  # creating a list with dataframes for each slice (group of 15 cells)
  skeleton.data[[i]] <- cbind(Animal, Corte, Grupo, Region, skeleton.data[[i]])
  skeleton.data[[i]] <- skeleton.data[[i]] %>% select(- X) # removing unwanted parameter
}

# column names
cnames <- c("Animal","Corte","Grupo", "Region","Branches","Junctions",
            "End.point.voxels","Juncction.voxels","Slab.voxels",
            "Average.branch.length","Triple.points","Quadruple.points",
            "Maximum.branch.length")

# head of the complete dataframe 
data <- skeleton.data[[1]]

for(i in 2:192){ # connecting by row all the data
  data <- rbind(data,skeleton.data[[i]])
}
skeleton.data <- data
colnames(skeleton.data) <- cnames # adding colnames

#saving dataframe
write_csv(skeleton.data,"skeleton-data.csv")

### Hull & Circularity Data

In [3]:
Grupo <- vector(length = 15)  # $group
Animal <- vector(length = 15) # $animal
Region <- vector(length = 15) # $region
Corte <- vector(length = 15)  # $slice

for(i in seq_along(hull.data)){ # checking errors
  if(15 %in% attributes(hull.data[[i]])$row.names != TRUE){print(i)}
  }

for(i in seq_along(hull.data)){
  for(cell in 1:15){
    Grupo[cell]  <- attributes(hull.data[[i]])$group
    Animal[cell] <- attributes(hull.data[[i]])$animal
    Region[cell] <- attributes(hull.data[[i]])$region
    Corte[cell]  <- attributes(hull.data[[i]])$slice
  }
  # removing unwanted variables/columns
  hull.data[[i]] <- hull.data[[i]] %>% select(-V1,-V2,-V4,-V7,-V8,-V19,-V20)
  hull.data[[i]] <- cbind(Animal, Corte, Grupo, Region, hull.data[[i]])
}

c.names <- c("Animal","Corte","Grupo","Region","Mean Foreground Pixels",
             "Density = Foreground Pixels/Hull Area","Span Ratio",
             "Maximum Span Across Hull","Area","Perimeter","Circularity",
             "Width of Bounding Rectangle","Height of Bounding Rectangle",
             "Maximum Radius from Hull's Centre of Mass","Max/Min Radii",
             "CV for all Radii","Mean Radius","Diameter of Bounding Circle",
             "Maximum Radius from Circle's Centre","Max/Min Radii from Circle Centre",
             "CV for all Radii from Circle's Centre","Mean Radius from Circle's Centre",
             "Method Used to Calculate Circle")

# formating the dataframe
data <- t(tibble(R1 = 1:23)) # first row reference to build a formated dataframe
colnames(data) <- c.names

for(i in seq_along(hull.data)){
  colnames(hull.data[[i]]) <- c.names
  data <- rbind(data,hull.data[[i]])
}

hull.data <- data[-1,] # removing the firsrow reference
write_csv(hull.data,"hull-data.csv")

### Cell Measures Data

In [4]:
library(tidyverse)

Grupo  <- vector(length = 15)  # $group
Animal <- vector(length = 15)  # $animal
Region <- vector(length = 15)  # $region
Corte  <- vector(length = 15)  # $slice

for(i in seq_along(measure.data)){ # checking errors
  if(15 %in% attributes(measure.data[[i]])$row.names != TRUE){print(i)}
  }

for(i in seq_along(measure.data)){
  for(cell in 1:15){ # para cada tabela
    Grupo[cell]  <- attributes(measure.data[[i]])$group
    Animal[cell] <- attributes(measure.data[[i]])$animal
    Region[cell] <- attributes(measure.data[[i]])$region
    Corte[cell]  <- attributes(measure.data[[i]])$slice
  }
  # removing unwanted variables/columns
  measure.data[[i]] <- cbind(Animal, Corte, Grupo, Region, measure.data[[i]])
  measure.data[[i]] <- measure.data[[i]] %>% select(- X.1, - Label)
}

# formating the dataframe

# first row reference to build a formated dataframe
# data: model for rbind
data <- tibble(Animal = 0,Corte =  0, Grupo = 0, Region = 0, Area = 0, 
               X = 0, Y = 0, XM = 0, YM = 0, Perim. = 0, Major = 0, 
               Minor = 0, Angle = 0, Circ. = 0, AR = 0, Round = 0, 
               Solidity = 0)

# check if columns are compatible
identical(colnames(measure.data[[1]]),colnames(data))

for(i in seq_along(measure.data)){ # connecting data by row
  data <- rbind(data,measure.data[[i]])
}

measure.data = data[-1,]
#saving dataframe
write_csv(measure.data,"measure-data.csv")

### Fractal and Lacunarity

In [5]:
Grupo <- vector(length = 15)  # $group
Animal <- vector(length = 15) # $animal
Region <- vector(length = 15) # $region
Corte <- vector(length = 15)  # $slice

for(i in 1:192){ # selecting the correct row in the raw data
  frac.data[[i]] <- frac.data[[i]][c(seq(2,45,by=3)),]
}

for(i in 1:192){
  for(cell in 1:15){
    Grupo[cell] <- attributes(frac.data[[i]])$group
    Animal[cell] <- attributes(frac.data[[i]])$animal
    Region[cell] <- attributes(frac.data[[i]])$region
    Corte[cell] <- attributes(frac.data[[i]])$slice
  }
  # selecting the correct columns/variables
  frac.data[[i]] <- frac.data[[i]] %>% select(V10,V15,V20)
  frac.data[[i]] <- cbind(Animal, Corte, Grupo, Region, frac.data[[i]])
}

cnames <- c("Animal","Corte","Grupo","Region","D","D.max(r²)","Lacunarity")

data <- frac.data[[1]] # head of the dataframe
for(i in 2:192){ # connecting by row
  data <- rbind(data,frac.data[[i]])
}

frac.data <- data
colnames(frac.data) <- cnames
#saving dataframe
write_csv(frac.data,"frac-data.csv")

### Final data-frame

**integrate all** in one dataframe.

In [34]:
library(tidyverse)

setwd("~/Documents/jupyter-workplace/laser-anoxia/microglia-morphology/data-processing/_m")

measure.data    <- read.csv("measure-data.csv")
hull.data       <- read.csv("hull-data.csv")
skeleton.data   <- read.csv("skeleton-data.csv")
frac.data       <- read.csv("frac-data.csv")

# MEASURE DATA
measure.data <- measure.data %>% filter(Grupo != "CL") %>% # we wont use CL experimental group
                 select(Area,Perim., Solidity,Round, Circ.)
colnames(measure.data) <- c("Cell.Area","Cell.Perimeter",
                             "Solidity","Round","Circularity")
# HULL & CIRCULARITY
hull.data <- hull.data %>% filter(Grupo != "CL") %>% 
    select(Animal,Corte,Grupo,Region,
           Density...Foreground.Pixels.Hull.Area,Area,
           Diameter.of.Bounding.Circle,
           Mean.Radius.from.Circle.s.Centre)
colnames(hull.data) <- c("Animal","Corte","Grupo","Region",
                         "Foreground.pixels.by.hull.area",
                         "Hull.Area","Diameter.of.Bounding.Circle",
                         "Mean.Radius")


# subtract cell area from hull area and generate a new variable: Hull void area
hull.and.measure.data <- cbind(hull.data,measure.data) %>%
                            mutate(Hull.void.area = Hull.Area - Cell.Area)

# FRAC & LAC
frac.data <- frac.data %>%  
    filter(Grupo != "CL") %>% 
    mutate_at(vars(D,Lacunarity), as.numeric) %>%
    select(D,Lacunarity)
colnames(frac.data) <- c("D","Lacunarity")

# SKELETON
skeleton.data <- skeleton.data %>% 
    filter(Grupo != "CL") %>%  
    select(Branches,Junctions,End.point.voxels,Triple.points,
           Quadruple.points,Average.branch.length, Maximum.branch.length)

# integrate
data       <- cbind(hull.and.measure.data,skeleton.data,frac.data) 
Grupos     <- as.vector(data$Grupo)
data$Grupo <- factor(Grupos, levels = c("CS","AS","AL")) # rearranging groups

# export complete data-frame
write_csv(data,"features_data.csv")

rm(hull.data,measure.data,skeleton.data,frac.data,hull.and.measure.data,Grupos)

A *quick peek* on the data and **parameters selected**.

In [35]:
head(data)

Animal,Corte,Grupo,Region,Foreground.pixels.by.hull.area,Hull.Area,Diameter.of.Bounding.Circle,Mean.Radius,Cell.Area,Cell.Perimeter,...,Hull.void.area,Branches,Junctions,End.point.voxels,Triple.points,Quadruple.points,Average.branch.length,Maximum.branch.length,D,Lacunarity
CS2,1,CS,CA1,0.5968,2282,74.9667,31.5814,1362,344.416,...,920,15,7,9,7,0,13.283,29.556,1140,801
CS2,1,CS,CA1,0.8191,376,32.8571,14.7844,308,91.983,...,68,3,1,3,1,0,13.876,22.142,41,1246
CS2,1,CS,CA1,0.4057,2036,78.4092,32.8051,826,246.635,...,1210,7,3,5,3,0,17.84,34.627,87,1271
CS2,1,CS,CA1,0.4576,2380,69.3097,30.9999,1089,341.061,...,1291,20,11,7,11,0,10.674,27.385,429,1659
CS2,1,CS,CA1,0.4241,2457,70.9627,32.733,1042,371.588,...,1415,18,9,9,9,0,12.222,20.071,585,1452
CS2,1,CS,CA1,0.2993,5362,119.4364,50.7948,1605,709.72,...,3757,40,19,20,17,1,10.588,31.799,538,1596


**Session Info**

In [36]:
# adicionar session info for reprodutibility
sessionInfo()

R version 3.6.1 (2019-07-05)
Platform: x86_64-conda_cos6-linux-gnu (64-bit)
Running under: Linux Mint 20.2

Matrix products: default
BLAS/LAPACK: /home/caio/miniconda3/envs/r-jupyter/lib/R/lib/libRblas.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=pt_BR.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=pt_BR.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] forcats_0.4.0   stringr_1.4.0   dplyr_0.8.0.1   purrr_0.3.2    
[5] readr_1.3.1     tidyr_0.8.3     tibble_2.1.1    ggplot2_3.1.1  
[9] tidyverse_1.2.1

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.1       cellranger_1.1.0 plyr_1.8.4       pillar_1.3.1    
 [5] compiler_3.6.1   base64enc_0.