In [5]:
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(tibble))
suppressPackageStartupMessages(library(stringr))
library(parallel)
library(DESeq2)
library(hise)
source('/home//jupyter/BRI_Figures_Final_V1//helper_function/helper_function_IHA.r')

Loading required package: S4Vectors

Loading required package: stats4

Loading required package: BiocGenerics


Attaching package: ‘BiocGenerics’


The following objects are masked from ‘package:dplyr’:

    combine, intersect, setdiff, union


The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs


The following objects are masked from ‘package:base’:

    anyDuplicated, aperm, append, as.data.frame, basename, cbind,
    colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
    get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
    match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
    Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
    table, tapply, union, unique, unsplit, which.max, which.min



Attaching package: ‘S4Vectors’


The following object is masked from ‘package:tidyr’:

    expand


The following objects are masked from ‘package:dplyr’:

    first, rename


The following object is masked from

# Read Meta Data

In [2]:
meta_data=read.csv("/home/jupyter/BRI_Figures_Final_V1/Dataset/scRNA_meta_data-2024-05-09.csv")

# Perform Deseq2 on Year 1 Day 0 samples vs Year 1 Day 7 samples

In [3]:
filtered_gene_set<-read.csv("filtered_gene_Y1D0_Y1D7.csv")

In [6]:
meta_data_subset=meta_data %>% filter(sample.visitName %in% c("Flu Year 1 Day 0","Flu Year 1 Day 7"))

aggregated_count_file_list<-paste0("/home/jupyter/BRI_Figures_Final_V1/Dataset/scRNA/BRI/Aggregated_Count/Aggregated_Raw_Expression_of_Celltypes_by_Sample_AIFI_L3/",meta_data_subset$pbmc_sample_id,".csv")

df_list<-read_pseudobulk_expression(aggregated_count_file_list)

[1] "Total reading time: 18.153 seconds"
[1] "The length of the list matches the length of the input path."


In [7]:
celltypes <- unique(unlist(lapply(df_list, names)))
celltypes<-unique(sub(".*:", "", celltypes))

In [9]:
length(celltypes)

In [10]:
res_list<-list()
for (celltype in celltypes){
    
    print(celltype)
    celltype_list <- lapply(df_list, function(df) { df <- df[, grep(celltype, names(df), fixed = TRUE),drop=FALSE] })
    exp_matrix<-do.call(cbind,celltype_list)
    colnames(exp_matrix)<-sub(":.*", "", colnames(exp_matrix))
    rownames(meta_data_subset)<-meta_data_subset$pbmc_sample_id
    filtered_gene_set_filtered<-filtered_gene_set%>% filter(AIFI_L3==celltype)%>% select(gene) %>%pull()
    
    res=deseq2_analysis(exp_matrix,
                     meta_data=meta_data_subset,
                     filtered_gene_set=filtered_gene_set_filtered,
                     formula= ~subject.subjectGuid+sample.visitName,
                     comparisons=list(c("sample.visitName", "Flu Year 1 Day 7", "Flu Year 1 Day 0")),
                     celltype=celltype)
    res_list[[celltype]]=res
}

[1] "ASDC"


converting counts to integer mode

“some variables in design formula are characters, converting to factors”
  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

estimating size factors

  Note: levels of factors in the design contain characters other than
  letters, numbers, '_' and '.'. It is recommended (but not required) to use
  only letters, numbers, and delimiters '_' or '.', as these are safe characters

estimating dispersions

gene-wise dispersion estimates



In [None]:
res_all<-do.call(rbind,res_list)

In [None]:
write.csv(res_all,"Deseq2_Result_Y1D0_Y1D7.csv")

# Upload Files

In [12]:
input_uuid_meta_data=pull(read.csv("/home/jupyter/BRI_Figures_Final_V1/Dataset/scRNA_BRI_meta_data_uuid.csv")['id'])
input_uuid_h5ad=pull(read.csv("/home/jupyter/BRI_Figures_Final_V1/Dataset/scRNA_BRI_h5ad_uuid.csv")['id'])


In [14]:
study_space_uuid <- 'de025812-5e73-4b3c-9c3b-6d0eac412f2a'
date_today <- Sys.Date()
working_dir <- getwd()
notebook_name <- basename("BRI_Figures_Final_V1/Extended-Figure7/01-DEG/01B_DEG_analysis.ipynb")  

title <- sprintf("Cert-Pro_IHA_Figures_Files_%s_from_%s/%s", date_today, working_dir, notebook_name)
title <- str_replace_all(title, c("/" = "-", "\\." = "_"))

title

In [15]:
uploadFiles(
  list('/home/jupyter/BRI_Figures_Final_V1/Extended-Figure7/01-DEG/Deseq2_Result_Y2D0.csv'),
  studySpaceId = 'de025812-5e73-4b3c-9c3b-6d0eac412f2a',
  title = title, fileTypes = list('csv'),destination=title,
  inputFileIds = as.list(c(input_uuid_meta_data,input_uuid_h5ad))
)

[1] "Cannot determine the current notebook."
[1] "1) /home/jupyter/BRI_Figures_Final_V1/Extended-Figure7/01-DEG/01B_DEG_analysis.ipynb"
[1] "2) /home/jupyter/BRI_Figures_Final_V1/Extended-Figure7/01-DEG/01A_DEG_Filter_Genes.ipynb"
[1] "3) /home/jupyter/IHA-Figures_Final_V1/Figure2/04_CompositeScore/02A_Aging_CompositeScore_SF4.ipynb"


Please select (1-3)  1


You are trying to upload the following files:  /home/jupyter/BRI_Figures_Final_V1/Extended-Figure7/01-DEG/Deseq2_Result_Y2D0.csv



(y/n) y
