In [None]:
library(xlsx)
library(ggplot2)
library(rtracklayer)
library(dplyr)
library(tidyr)
library(data.table)
library(matrixStats)
library(igraph)
library(purrr)
library(TIN)

In [None]:
genes.ids.names = read.table("/grehawi/splice-reg-prj/new-data/ARACNE/gene_names_ids_table.txt")
head(genes.ids.names)
dim(genes.ids.names)

In [None]:
tx.ids.names = read.table("/grehawi/splice-reg-prj/new-data/ARACNE/trxs_names_ids_table.txt")
head(tx.ids.names)
dim(tx.ids.names)

## Functions:

In [None]:
save_input <- function (TE.IR.count.data, processed_network, network_name) {   
    TE.IR.df = as.data.frame(TE.IR.count.data)
    row.names(TE.IR.df) = TE.IR.df$samples
    TE.IR.df$samples = NULL
    TE.IR.df = t(TE.IR.df)
    nodes_names = unique(c(processed_network$node1, processed_network$node2))
    TE.IR.df = TE.IR.df[rownames(TE.IR.df) %in% nodes_names, ]
    
    all.nodes = as.data.frame(rownames(TE.IR.df))
    
    TE.node.features = TE.IR.df[rownames(TE.IR.df) %in% genes.ids.names$gene_name, ]
    IR.node.features = TE.IR.df[rownames(TE.IR.df) %in% tx.ids.names$name, ]
    
    node_features_file_name = paste0(network_name, '-net-node-features.txt')
    node_names_file_name = paste0(network_name, '-net-node-names.txt')
    TE_nodes_features_file_name = paste0(network_name, '-net-TE-nodes-features.txt')
    IR_nodes_features_file_name = paste0(network_name, '-net-IR-nodes-features.txt')
    
    fwrite(TE.IR.df, paste0("/grehawi/splice-reg-prj/new-data/Graph-learning/input-data/", node_features_file_name), sep="\t")
    write.table(all.nodes, paste0("/grehawi/splice-reg-prj/new-data/Graph-learning/input-data/", node_names_file_name))
    fwrite(TE.node.features, paste0("/grehawi/splice-reg-prj/new-data/Graph-learning/input-data/", TE_nodes_features_file_name), sep="\t")
    fwrite(IR.node.features, paste0("/grehawi/splice-reg-prj/new-data/Graph-learning/input-data/", IR_nodes_features_file_name), sep="\t")
    
}

## Get Input matrices ready for Graph SAGE

In [None]:
# Read genes/IR feature matrix
TE.IR.count.data.controls = fread("/grehawi/splice-reg-prj/new-data/ARACNE/total_and_ratios_controls.txt")
TE.IR.count.data.cases.all = fread("/grehawi/splice-reg-prj/new-data/ARACNE/total_and_ratios_cases.txt")

In [None]:
head(TE.IR.count.data.controls)

In [None]:
# Read the final processed controls and cases networks after all filtering steps
cases.annotated.filtered.single.edges.thr = read.table('/grehawi/splice-reg-prj/new-data/ARACNE/filtered_thr_cases_withNames_singleEdges.txt')
controls.annotated.filtered.single.edges.thr = read.table('/grehawi/splice-reg-prj/new-data/ARACNE/filtered_thr_controls_withNames_singleEdges.txt')
head(controls.annotated.filtered.single.edges.thr)

In [None]:
save_input(TE.IR.count.data.controls, controls.annotated.filtered.single.edges.thr, 'controls')
save_input(TE.IR.count.data.cases.all, cases.annotated.filtered.single.edges.thr, 'cases')