# A3SS Statistical Analysis 

## 1. Load Libraries

## 2. Read Data

* Read unpaired_final_matrix.csv
* Read paired_final_matrix.csv

## 3. Data Transformation

* Convert paired and unpaired data to long format
* Calculate mean expressions
* Calculate slopes for paired and unpaired data
* Combine slope data for consistent and opposite analysis

## 4. Statistical Tests

* Perform Fisher's Exact Test for consistent slopes
* Perform Fisher's Exact Test for opposite slopes

## 5. Output Results

* Save cluster gene symbols to CSV files

In [1]:
getwd()

In [2]:
setwd("/Users/annedeslattesmays/Scitechcon\ Dropbox/Anne\ DeslattesMays/projects/post-rmats-single-run/TAM.AML.all/A3SS_calculate")

In [3]:
# Load necessary libraries
library(dplyr)
library(tidyr)
library(reshape2)
library(ggplot2)
library(pdftools)
library(gridExtra)

# Read the datasets
unpaired_data <- read.csv("unpaired_final_matrix.csv", stringsAsFactors = FALSE)
paired_data <- read.csv("paired_final_matrix.csv", stringsAsFactors = FALSE)

# Transform paired data to long format
paired_data_long <- paired_data %>%
  pivot_longer(cols = -X, names_to = "State", values_to = "Expression")

# Calculate mean expression for paired data
mean_AML_paired <- paired_data_long %>%
  filter(State == "AML") %>%
  group_by(X) %>%
  summarise(mean_AML = mean(Expression), .groups = 'drop')

# Calculate slopes for paired data
paired_slopes <- paired_data_long %>%
  filter(State %in% c("preAML", "AML")) %>%
  group_by(X, State) %>%
  summarise(Mean = mean(Expression), .groups = 'drop') %>%
  pivot_wider(names_from = State, values_from = Mean) %>%
  mutate(Paired_Slope = AML - preAML)

# Transform unpaired data to long format
unpaired_data_long <- unpaired_data %>%
  pivot_longer(cols = -X, names_to = "State", values_to = "Expression")

# Calculate slopes for unpaired data using mean_AML from paired data
unpaired_slopes <- unpaired_data_long %>%
  filter(State %in% c("preAML", "noAML")) %>%
  group_by(X, State) %>%
  summarise(Mean = mean(Expression), .groups = 'drop') %>%
  pivot_wider(names_from = State, values_from = Mean) %>%
  left_join(mean_AML_paired, by = "X") %>%
  mutate(Unpaired_Slope_preAML = mean_AML - preAML,
         Unpaired_Slope_noAML = mean_AML - noAML)

# Combine slopes data
slope_data_consistent <- paired_slopes %>%
  left_join(unpaired_slopes %>% select(X, Unpaired_Slope_preAML), by = "X") %>%
  filter(!is.na(Paired_Slope) & !is.na(Unpaired_Slope_preAML)) %>%
  mutate(Consistent = sign(Paired_Slope) == sign(Unpaired_Slope_preAML))

slope_data_opposite <- paired_slopes %>%
  left_join(unpaired_slopes %>% select(X, Unpaired_Slope_noAML), by = "X") %>%
  filter(!is.na(Paired_Slope) & !is.na(Unpaired_Slope_noAML)) %>%
  mutate(Opposite = sign(Paired_Slope) != sign(Unpaired_Slope_noAML))





Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘reshape2’


The following object is masked from ‘package:tidyr’:

    smiths


Using poppler version 24.04.0


Attaching package: ‘gridExtra’


The following object is masked from ‘package:dplyr’:

    combine




ERROR: [1m[33mError[39m in `mutate()`:[22m
[1m[22m[36mℹ[39m In argument: `Paired_Slope = AML - preAML`.
[1mCaused by error:[22m
[33m![39m object 'AML' not found


In [None]:
# Perform Fisher's Exact Test for consistent
consistent_table <- table(slope_data_consistent$Consistent)
if (length(consistent_table) == 2) {
  fisher_test_consistent <- fisher.test(matrix(consistent_table, ncol = 2))
} else {
  fisher_test_consistent <- NULL
  cat("Not enough data for Fisher's Exact Test on consistent slopes.\n")
}

# Perform Fisher's Exact Test for opposite
opposite_table <- table(slope_data_opposite$Opposite)
if (length(opposite_table) == 2) {
  fisher_test_opposite <- fisher.test(matrix(opposite_table, ncol = 2))
} else {
  fisher_test_opposite <- NULL
  cat("Not enough data for Fisher's Exact Test on opposite slopes.\n")
}
