<a href="https://colab.research.google.com/github/dinux11/Mutations-within-the-NS1-immunodominant-T-cell-epitope-of-DENV2-in-SL/blob/main/T_cell_Paper.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Load the necessary library
library(ggplot2)
library(tidyr)
library(tidyverse)
library(ggpubr)
library(ggsignif)
library(dplyr)
library(pheatmap)
library(FSA)
library(rstatix)
library(pheatmap)

In [None]:

#Figure 1 A T cell responses to DENV2 NS1 pepetide pools (1-5)
df_pools <- read.csv("/Users/dinukaariyaratne/Desktop/Data/Pool1_5.csv")
df_pools <- df_pools %>%
  rename(Pool1 = X...Pool1)

# Reshape to long format
df_pools_long <- df_pools %>% gather(key = "Pool", value = "Value")

# Perform the Kruskal-Wallis test
kruskal_result <- kruskal.test(Value ~ Pool, data = df_pools_long)

# Check the p-value of the test
if (kruskal_result$p.value < 0.05) {
  print("The Kruskal-Wallis result is significant")

  # perform post-hoc Dunn's Test
  pairwise <- dunn.test(df_pools_long$Value, g = df_pools_long$Pool, method = "bonferroni")

  # Create data frame of comparisons and p-values
  comp_df <- data.frame(
    comparison = pairwise$com,
    p_adj = pairwise$P.adjusted
  )

  # Assume Pool1 and Pool2 have the first comparison
  significant_p_value <- format.pval(comp_df$p_adj[1], digits = 2)

  # Recreate the dot plot with significance brackets & asterisks if the result is significant
  p_NS1_pools <- ggplot(df_pools_long, aes(x = Pool, y = Value, color = Pool, fill=Pool)) +
    geom_jitter(shape = 21, size = 2, alpha = 0.6, width = 0.3) +
    geom_boxplot(alpha = 0.5, outlier.shape = NA) +
    scale_color_brewer(palette = "Set1")+
    theme(legend.position = "none") +
    labs(title = "",
         x = "Stimulating peptide pool",
         y = "Spots per million cells") +
    theme_minimal() +
    theme(plot.title = element_text(hjust = 0.5),
          axis.text.x = element_text(size = 8)) +
    geom_signif(comparisons = list(c("Pool1", "Pool2")),
                map_signif_level = TRUE,
                textsize = 3, vjust = -0.5) +
    annotate("text", x = 2, y = max(df_pools_long$Value),
             label = paste0("p = ", significant_p_value), size = 4)

  print(p_NS1_pools)
} else {
  print("The Kruskal-Wallis result is not significant")
}
#Figure 1
# perform post-hoc Dunn's Test
pairwise <- dunn.test::dunn.test(df_pools_long$Value, g = df_pools_long$Pool, method="bonferroni")

# create data frame of comparisons and p-values
comp_df <- data.frame(
  comparison = pairwise$com,
  p_adj = pairwise$P.adjusted
)

In [None]:
#Figure 1B T cell responses to individual peptides(15-25)
pep15_25 <-read.csv("/Users/dinukaariyaratne/Desktop/Data/pep_15_25.csv")

colnames(pep15_25)[colnames(pep15_25)=="X...pep5"] <- "pep5"

NS1Pools_heatmap <- pheatmap(
  as.data.frame(pep15_25),
  cluster_rows = FALSE,
  cluster_cols = FALSE,
  fontsize_row = 10,
  fontsize_col = 10 - 4,  # reduce font size by 5
  angle_col = 45  # rotate the column labels 90 degrees anticlockwise

  #Create Figure 1 A and 1B

# Use cowplot::as_gtable to convert the pheatmap into a ggplot-compatible table (grob)
NS1Pools_heatmap_grob <- cowplot::as_gtable(NS1Pools_heatmap$gtable)
# Add the labels. Use tag is label arguments to add "A" and "B" labels
p_NS1_pools_labeled <- p_NS1_pools + labs(tag = "A")
NS1Pools_heatmap_labeled <- ggplot() +
    annotation_custom(NS1Pools_heatmap_grob) +
    labs(tag = "B") +
    theme_void() # Theme void to eliminate axes and labels

# Arrange the plots in the same figure
figure1 <- grid.arrange(p_NS1_pools_labeled, NS1Pools_heatmap_labeled, ncol = 2,widths= c(7,6.5))


# Print the final figure
#print(figure1)
ggsave(filename = "/Users/dinukaariyaratne/Library/Mobile Documents/com~apple~CloudDocs/Desktop/PhD USJP/PhD_Thesis/T Cell Paper/Figures/Figure1.tiff",  plot = figure1, dpi = 1200, width = 6, height = 6, units = "in")


In [None]:
#Figure 5 Total T cell responses to all peptides
df_T_all <- read.csv("/Users/dinukaariyaratne/Desktop/Data/Tcellresponses_DENV.csv")


# Transform dataset to long format
df_T_all_long <- df_T_all %>%
  pivot_longer(cols = c(R22, SL22, D1pep28, D3pep29), names_to = "Peptide", values_to = "Value")
#dotplot
p_T_all <- ggplot(df_T_all_long, aes(x = Peptide, y = Value, fill = Peptide)) +
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0.35) +
  stat_summary(fun.data = "median_hilow", fun.args = list(conf.int = 0.35), geom = "crossbar", width = 0.2, colour = "black", alpha = 0.5) +
  xlab("Stimulating Peptide") +
  ylab("Spots per million cells") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5),
        axis.title.x = element_text(face = "bold"),
        axis.title.y = element_text(face = "bold")) +
  labs(title = "")

# Print the dotplot
print(p_T_all)
ggsave(filename = "/Users/dinukaariyaratne/Library/Mobile Documents/com~apple~CloudDocs/Desktop/PhD USJP/PhD_Thesis/T Cell Paper/Figures/Figure5.tiff", plot = p_T_all, dpi = 1200, width = 6, height = 6, units = "in")



In [None]:
#Figure 6: Multitypic and Monotypic T cell responses to R22, SL22, DEN1pep28, DEN3pep29



# Read in data and transform to long data
df_T <- read.csv("/Users/dinukaariyaratne/Desktop/Data/Tcellresponses_DENV.csv")

# Transform your data to a long format
df_long <- df_T %>%
  pivot_longer(cols = c(R22, SL22, D1pep28, D3pep29), names_to = "Variable", values_to = "Value")

df_filtered <- df_long %>%
  dplyr::filter(Immune_status %in% c("Monotypic", "Multitypic"))




# Create the box plot with adjusted jitter width and p-value position and use Mann-Whitney U test
p_1 <- df_filtered %>%
  ggboxplot(x = "Immune_status", y = "Value",
            add = "jitter",
            notch = TRUE,
            color = "Immune_status",
            palette = c("#00AFBB", "#E7B800"),
            width = 0.5,
            facet.by = "Variable",
            alpha = 0.7) +
  stat_compare_means(method = "wilcox.test",
                     comparisons = list(c("Monotypic","Multitypic")),
                     label.x = 1.3) +
  labs(title = "T cell responses to NS1 pep154-171 with varying Immune Status",
       x = "Immune Status",
       y = "Spots per million cells",
       caption = "Statistical comparison by Mann-Whitney U test") +
  theme_pubr()

# Print the plot
print(p_1)

ggsave(filename = "/Users/dinukaariyaratne/Library/Mobile Documents/com~apple~CloudDocs/Desktop/PhD USJP/PhD_Thesis/T Cell Paper/Figures/Figure6.tiff", plot = p_1, dpi = 1200, width = 6, height = 6, units = "in")

In [None]:
#Supplementary Figure 3

#load T cell data sheet
df_T <-read.csv("/Users/dinukaariyaratne/Desktop/Data/Tcellresponses_DENV.csv")

# Reshape the data into long format for ggplot2
df_T_long <- df_T %>%
  tidyr::pivot_longer(cols = c(R22, SL22, D1pep28, D3pep29),
               names_to = "Variable",
               values_to = "Value")

View(df_T_long)
# Create a boxplot
supp_3<- ggplot(data = df_T_long, aes(x = Immune_status, y = Value, fill = Dengue_status)) +
  geom_boxplot() +
  facet_wrap(~ Variable, scales = "free") +
  labs(x = "Immune Status", y = "Value", fill = "Dengue Status") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  ggtitle("Boxplot of R22, SL22, D1pep28, and D3pep29 values grouped by Immune Status and Dengue Status")+
stat_compare_means(method = "wilcox.test",
                   comparisons = list(c("Monotypic","Multitypic"), c("Primary","Monotypic"), c("Primary","Multitypic")), #the pairs for the mann whitney u tests
                   label.y.npc = "atop",  # position the label at the top
                   label = "p.value",  # labels with the p value
                   textsize = 2.5) +
labs(title = "T cell responses to NS1 pep154-171 with varying Immune Status",
     x = "Immune Status",
     y = "Spots per million cells",
     caption = "Statistical comparison by Mann-Whitney U test") +
coord_cartesian(ylim = c(0, 3000)) +
theme_pubr()
print(supp_3)

# Define dpi
dpi <- 1200

# Save the plot
ggsave(filename = "/Users/dinukaariyaratne/Library/Mobile Documents/com~apple~CloudDocs/Desktop/PhD USJP/PhD_Thesis/T Cell Paper/Figures/supp_3.tiff", plot = supp_3, dpi = dpi, device = "tiff")

In [None]:
#supplementary figure 5
df_5<-read.csv("/Users/dinukaariyaratne/Desktop/Data/Tcellresponses_DENV.csv")
#create a subset with only specific rows and columns
Subset_1 <- df_5[1:37,1:7]
#Supplementary Figure 5
#DHf vs DF in multitypic and monotypic


#filter only monotypic and multitypic


df_6 <- df_5 %>%
  filter(Immune_status %in% c("Monotypic", "Multitypic"))

# Convert data to longer format
df_6_long <- df_6 %>%
  tidyr::pivot_longer(cols = c(R22, SL22, D1pep28, D3pep29), names_to = "Variable", values_to = "Value")

# Create an interaction term between Immune_status and Dengue_status
df_6_long <- df_6_long %>%
  mutate(Status_Interaction = interaction(Immune_status, Dengue_status))

# Define the comparisons
my_comparisons_1 <- list(
                  c("Monotypic.DF", "Multitypic.DF"),
                         c("Monotypic.DHF","Multitypic.DHF"))

# Create the box plot
p_DEN_1 <- df_6_long %>%
  ggpubr::ggboxplot(x = "Status_Interaction", y = "Value",
                    color = "Immune_status", palette = "jco", textsize=2,
                    add = "jitter", width = 0.5, alpha = 0.7) +
  ggpubr::stat_compare_means(method = "wilcox.test", comparisons = my_comparisons_1,label.x = c(1.2,1.7), label.y = c(2400,3000)) +
  facet_wrap(~ Variable, scales = "free") +
  labs(title = "T cell responses to NS1 pep154-171 in patients with past
                                  infections in varying severity
                                    of Dengue infection(DF/DHF)",
       x = "Dengue Status",
       y = "Spots per million cells",
       caption = "Statistical comparison by Mann Whitney U Test") +
  ggpubr::theme_pubr() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5))+
  coord_cartesian(ylim = c(0, 4000)) # set y axis limits


# Print the plot
print(p_DEN_1)

ggsave(filename = "/Users/dinukaariyaratne/Library/Mobile Documents/com~apple~CloudDocs/Desktop/PhD USJP/PhD_Thesis/T Cell Paper/Figures/Suppfig5.tiff", plot = p_DEN_1, dpi = 1200, width = 6, height = 6, units = "in")


In [None]:
#DF and DHF breakdown
# Create a contingency table
immune_dengue_table <- table(df_5$Immune_status, df_5$Dengue_status)

# Print the table
print(immune_dengue_table)

df_5 %>%
  dplyr::count(Immune_status, Dengue_status)

In [None]:
#supp fig 6 monotypic DF DHF
#monotypic
df_monotypic_1 <- df_5 %>%
  filter(Immune_status %in% c("Monotypic")) %>%
  select("R22", "SL22", "D1pep28", "D3pep29", "Dengue_status", "Immune_status")
  #Monotypic DF vs DHF
# Convert  data to longer format
df_monotypic_1_long <- df_monotypic_1 %>%
  tidyr::pivot_longer(cols = c(R22, SL22, D1pep28, D3pep29), names_to = "Variable", values_to = "Value")

# Create a list of comparisons

my_comparisons_mono <- list(c("DF", "DHF"))
# Turn Dengue_status into a factor and specify the level order
df_monotypic_1_long$Dengue_status <- factor(df_monotypic_1_long$Dengue_status, levels = c("DF", "DHF"))

# Create the box plot
p_DEN_mono <- df_monotypic_1_long %>%
  ggpubr::ggboxplot(x = "Dengue_status", y = "Value",
            color = "Dengue_status", palette = "jco",
            add = "jitter", width = 0.5, alpha = 0.7) +
            scale_color_manual(values = c("blue", "red")) + # Set the colors manually
  ggpubr::stat_compare_means(comparisons = my_comparisons_mono, label.x = 1.2, label.y = 1500) +
  facet_wrap(~ Variable, scales = "free") +
  labs(title = "",
       x = "Immune Status",
       y = "Spots per million cells")+

  ggpubr::theme_pubr()+ coord_cartesian(ylim = c(0, 2000)) # set y axis limits


# Print the plot
print(p_DEN_mono)
  ggsave(filename = "/Users/dinukaariyaratne/Library/Mobile Documents/com~apple~CloudDocs/Desktop/PhD USJP/PhD_Thesis/T Cell Paper/Figures/Suppfig6.tiff", plot = p_DEN_mono, dpi = 1200, width = 6, height = 6, units = "in")


In [None]:
#supplementary figure 7 Multitypic DF DHF
df_multitypic_1 <- df_5 %>%
  filter(Immune_status %in% c("Multitypic")) %>%
  select("R22", "SL22", "D1pep28", "D3pep29", "Dengue_status", "Immune_status")
  #Multitypic df vs dhf
# Convert  data to longer format
df_multitypic_1_long <- df_multitypic_1 %>%
  tidyr::pivot_longer(cols = c(R22, SL22, D1pep28, D3pep29), names_to = "Variable", values_to = "Value")

# Create a list of comparisons

my_comparisons <- list(c("DF", "DHF"))
# Turn Dengue_status into a factor and specify the level order
df_multitypic_1_long$Dengue_status <- factor(df_multitypic_1_long$Dengue_status, levels = c("DF", "DHF"))

# Create the box plot
p_DEN_multi <- df_multitypic_1_long %>%
  ggpubr::ggboxplot(x = "Dengue_status", y = "Value",
            color = "Dengue_status", palette = "jco",
            add = "jitter", width = 0.5, alpha = 0.7) +
            scale_color_manual(values = c("blue", "red")) + # Set the colors manually
  ggpubr::stat_compare_means(comparisons = my_comparisons, label.x = 1.2, label.y = 2500) +
  facet_wrap(~ Variable, scales = "free") +
  labs(title = "",
       x = "Immune Status",
       y = "Spots per million cells",
       ) +
  ggpubr::theme_pubr()+ coord_cartesian(ylim = c(0, 3000)) # set y axis limits


# Print the plot
print(p_DEN_multi)
ggsave(filename = "/Users/dinukaariyaratne/Library/Mobile Documents/com~apple~CloudDocs/Desktop/PhD USJP/PhD_Thesis/T Cell Paper/Figures/Suppfig7.tiff", plot = p_DEN_multi, dpi = 1200, width = 6, height = 6, units = "in")


In [None]:
#age analysis
df_T_age <-read.csv("/Users/dinukaariyaratne/Desktop/Data/Tcellresponses_DENV.csv")
library(dplyr)
library(ggplot2)


# Filter out the 'Primary' category
df_T_age_filtered <- df_T_age %>%
  filter(Immune_status != 'Primary')
  #remove ties
  df_T_age_filtered$Age <- jitter(df_T_age_filtered$Age, amount = 0.05)

# Calculate median age for each group
median_ages <- df_T_age_filtered %>%
  group_by(Immune_status) %>%
  summarise(median_age = median(Age, na.rm = TRUE))

print(median_ages)

# Perform Mann-Whitney U test
test_result <- wilcox.test(Age ~ Immune_status, data = df_T_age_filtered)

# Print the test statistic and p-value
print(test_result)

# Define p as formatted p-value
p_value <- formatC(test_result$p.value, digits = 2)

# Plot the data
p <- ggplot(df_T_age_filtered, aes(x = Immune_status, y = Age)) +
  geom_jitter(width = 0, alpha = 0.6) +
  geom_boxplot(width = 0.5, alpha = 0.5) +
  labs(title = "Age by Immune Status",
       x = "Immune Status",
       y = "Age") +
  annotate("text", x = 1.5, y = max(df_T_age_filtered$Age), label = paste("p =", p_value), hjust = 0.5) +
  theme_bw()

print(p)
# Number of individuals with age above 50
above_50 <- sum(df_T_age$Age > 50, na.rm = TRUE)

# Number of individuals with age below 50
below_50 <- sum(df_T_age$Age < 50, na.rm = TRUE)

print(paste("Number of individuals above 50: ", above_50))
print(paste("Number of individuals below 50: ", below_50))

SyntaxError: invalid syntax (<ipython-input-1-3272941716bc>, line 8)