<span style="font-size: 24px;">Data augmentation of CCs and GCs using Gaussian noise (0h data; 0.1% of the standard deviation; 312 CCs and 680 MGCs)</span>

In [None]:
library(dplyr)
library(purrr)

In [1]:
# Load the dataset
data <- read.csv("for_seurat_0h_6.csv")

expand_dataset <- function(data, columns, n_expand, noise_level) {
  expanded_data <- data
  
  # Add Gaussian noise to the specified columns for data augmentation
  for (column in columns) {
    std_dev <- sd(data[[column]], na.rm = TRUE)
    for (i in 1:n_expand) {
      noise <- rnorm(nrow(data), mean = 0, sd = std_dev * noise_level)
      new_column <- data[[column]] + noise
      new_column[new_column < 0] <- 0  # Replace negative values with 0
      new_column <- round(new_column)  # Round the values of the new column to integers
      # Append the new column to the dataset
      expanded_data <- cbind(expanded_data, setNames(as.data.frame(new_column), paste(column, "expanded", i, sep = "_")))
    }
  }
  
  return(expanded_data)
}

# Retrieve column names
columns <- names(data)

# Expand the first three columns by generating 103 additional features
first_three_columns <- columns[2:4]
data_first_expanded <- expand_dataset(data, first_three_columns, 103, 0.001)

# Expand the last three columns by generating 226 additional features
last_three_columns <- columns[5:7]
data_last_expanded <- expand_dataset(data, last_three_columns, 226, 0.001)


In [None]:
head(data_first_expanded)
head(data_last_expanded)

In [4]:
data_first_expanded <- data_first_expanded[ , -c(5:7)]
data_last_expanded <- data_last_expanded[ , -c(2:4)]
data_last_expanded <- data_last_expanded[ , -ncol(data_last_expanded)]

head(data_first_expanded)
head(data_last_expanded)

<span style="font-size: 24px;">Convert 0h data to Seurat-compatible files</span>


In [None]:
merged_data <- cbind(data_first_expanded, data_last_expanded)
merged_data <- merged_data[ , -314]
merged_data[, -1] <- data.frame(lapply(merged_data[, -1], function(x) as.numeric(as.character(x))))
row.names(merged_data) <- merged_data[, 1]
merged_data <- merged_data[, -1]                                 
head(merged_data)

In [None]:
bulk_data <- merged_data
library(Matrix)

# Generate output directory
outdir <- "for_seurat_0h_6"
if (!dir.exists(outdir)) {
  dir.create(outdir)
}

# barcodes.tsv.gz
barcodes <- rownames(bulk_data)
write.table(barcodes, file = file.path(outdir, "barcodes.tsv"),
            quote = FALSE, col.names = FALSE, row.names = FALSE)
system(paste("gzip -c", file.path(outdir, "barcodes.tsv"), ">",
             file.path(outdir, "barcodes.tsv.gz")))
file.remove(file.path(outdir, "barcodes.tsv"))

# features.tsv.gz
features <- colnames(bulk_data)
write.table(features, file = file.path(outdir, "features.tsv"),
            quote = FALSE, col.names = FALSE, row.names = FALSE)
system(paste("gzip -c", file.path(outdir, "features.tsv"), ">",
             file.path(outdir, "features.tsv.gz")))
file.remove(file.path(outdir, "features.tsv"))

# matrix.mtx.gz
data_matrix <- as.matrix(bulk_data)
Matrix::writeMM(as(data_matrix, "sparseMatrix"), file.path(outdir, "matrix.mtx"))
system(paste("gzip", file.path(outdir, "matrix.mtx")))


<span style="font-size: 24px;">Data augmentation of CCs and GCs using Gaussian noise (4h data; 0.1% of the standard deviation; 312 CCs and 680 MGCs)</span>


In [168]:
# Load the dataset
data <- read.csv("for_seurat_4h_6.csv")

expand_dataset <- function(data, columns, n_expand, noise_level) {
  expanded_data <- data
  
  # Add Gaussian noise to the specified columns for data augmentation
  for (column in columns) {
    std_dev <- sd(data[[column]], na.rm = TRUE)
    for (i in 1:n_expand) {
      noise <- rnorm(nrow(data), mean = 0, sd = std_dev * noise_level)
      new_column <- data[[column]] + noise
      new_column[new_column < 0] <- 0  # Replace negative values with 0
      new_column <- round(new_column)  # Round the values of the new column to integers
      # Append the new column to the dataset
      expanded_data <- cbind(expanded_data, setNames(as.data.frame(new_column), paste(column, "expanded", i, sep = "_")))
    }
  }
  
  return(expanded_data)
}

# Retrieve column names
columns <- names(data)

# Expand the first three columns by generating 103 additional features
first_three_columns <- columns[2:4]
data_first_expanded <- expand_dataset(data, first_three_columns, 103, 0.001)

# Expand the last three columns by generating 226 additional features
last_three_columns <- columns[5:7]
data_last_expanded <- expand_dataset(data, last_three_columns, 226, 0.001)


In [169]:
head(data_first_expanded)
head(data_last_expanded)

In [43]:
data_first_expanded <- data_first_expanded[ , -c(5:7)]
data_last_expanded <- data_last_expanded[ , -c(2:4)]
data_last_expanded <- data_last_expanded[ , -ncol(data_last_expanded)]

head(data_first_expanded)
head(data_last_expanded)

<span style="font-size: 24px;">Convert 4h data to Seurat-compatible files</span>

In [None]:
merged_data <- cbind(data_first_expanded, data_last_expanded)
merged_data <- merged_data[ , -314]
merged_data[, -1] <- data.frame(lapply(merged_data[, -1], function(x) as.numeric(as.character(x))))
row.names(merged_data) <- merged_data[, 1]
merged_data <- merged_data[, -1]                                 
head(merged_data)

In [None]:
bulk_data <- merged_data
library(Matrix)

# Generate output directory
outdir <- "for_seurat_4h_6"
if (!dir.exists(outdir)) {
  dir.create(outdir)
}

# barcodes.tsv.gz
barcodes <- rownames(bulk_data)
write.table(barcodes, file = file.path(outdir, "barcodes.tsv"),
            quote = FALSE, col.names = FALSE, row.names = FALSE)
system(paste("gzip -c", file.path(outdir, "barcodes.tsv"), ">",
             file.path(outdir, "barcodes.tsv.gz")))
file.remove(file.path(outdir, "barcodes.tsv"))

# features.tsv.gz
features <- colnames(bulk_data)
write.table(features, file = file.path(outdir, "features.tsv"),
            quote = FALSE, col.names = FALSE, row.names = FALSE)
system(paste("gzip -c", file.path(outdir, "features.tsv"), ">",
             file.path(outdir, "features.tsv.gz")))
file.remove(file.path(outdir, "features.tsv"))

# matrix.mtx.gz
data_matrix <- as.matrix(bulk_data)
Matrix::writeMM(as(data_matrix, "sparseMatrix"), file.path(outdir, "matrix.mtx"))
system(paste("gzip", file.path(outdir, "matrix.mtx")))


<span style="font-size: 24px;">Data augmentation of CCs and GCs using Gaussian noise (12h data; 0.1% of the standard deviation; 312 CCs and 680 MGCs)</span>

In [None]:
# Load the dataset
data <- read.csv("for_seurat_12h_6.csv")

expand_dataset <- function(data, columns, n_expand, noise_level) {
  expanded_data <- data
  
  # Add Gaussian noise to the specified columns for data augmentation
  for (column in columns) {
    std_dev <- sd(data[[column]], na.rm = TRUE)
    for (i in 1:n_expand) {
      noise <- rnorm(nrow(data), mean = 0, sd = std_dev * noise_level)
      new_column <- data[[column]] + noise
      new_column[new_column < 0] <- 0  # Replace negative values with 0
      new_column <- round(new_column)  # Round the values of the new column to integers
      # Append the new column to the dataset
      expanded_data <- cbind(expanded_data, setNames(as.data.frame(new_column), paste(column, "expanded", i, sep = "_")))
    }
  }
  
  return(expanded_data)
}

# Retrieve column names
columns <- names(data)

# Expand the first three columns by generating 103 additional features
first_three_columns <- columns[2:4]
data_first_expanded <- expand_dataset(data, first_three_columns, 103, 0.001)

# Expand the last three columns by generating 226 additional features
last_three_columns <- columns[5:7]
data_last_expanded <- expand_dataset(data, last_three_columns, 226, 0.001)


In [None]:
head(data_first_expanded)
head(data_last_expanded)

In [None]:
data_first_expanded <- data_first_expanded[ , -c(5:7)]
data_last_expanded <- data_last_expanded[ , -c(2:4)]
data_last_expanded <- data_last_expanded[ , -ncol(data_last_expanded)]

head(data_first_expanded)
head(data_last_expanded)

<span style="font-size: 24px;">Convert 12h data to Seurat-compatible files</span>

In [None]:
merged_data <- cbind(data_first_expanded, data_last_expanded)
merged_data <- merged_data[ , -314]
merged_data[, -1] <- data.frame(lapply(merged_data[, -1], function(x) as.numeric(as.character(x))))
row.names(merged_data) <- merged_data[, 1]
merged_data <- merged_data[, -1]                                 
head(merged_data)

In [None]:
bulk_data <- merged_data
library(Matrix)

# 出力先フォルダをgeneration
outdir <- "for_seurat_12h_6"
if (!dir.exists(outdir)) {
  dir.create(outdir)
}

# barcodes.tsv.gz
barcodes <- rownames(bulk_data)
write.table(barcodes, file = file.path(outdir, "barcodes.tsv"),
            quote = FALSE, col.names = FALSE, row.names = FALSE)
system(paste("gzip -c", file.path(outdir, "barcodes.tsv"), ">",
             file.path(outdir, "barcodes.tsv.gz")))
file.remove(file.path(outdir, "barcodes.tsv"))

# features.tsv.gz
features <- colnames(bulk_data)
write.table(features, file = file.path(outdir, "features.tsv"),
            quote = FALSE, col.names = FALSE, row.names = FALSE)
system(paste("gzip -c", file.path(outdir, "features.tsv"), ">",
             file.path(outdir, "features.tsv.gz")))
file.remove(file.path(outdir, "features.tsv"))

# matrix.mtx.gz
data_matrix <- as.matrix(bulk_data)
Matrix::writeMM(as(data_matrix, "sparseMatrix"), file.path(outdir, "matrix.mtx"))
system(paste("gzip", file.path(outdir, "matrix.mtx")))
