# info
This script generates a file with filtered DEGs derived from patient-specific DEG calling.

Filters are:
1. Filter on P < 0.05 and Q < 0.1 for DESeq2; Q < 0.05 in sleuth.
2. Filter out genes that in the comparison do not cross the 2 TPM barrier.
3. Discrete abs log2 FC > 1

First we identified the DEGs of a particular patient using aforementioned filters, the we define the coherent DEGs as those DEGs in n-1 patients.

In [12]:
import pandas, os

# 0. user-defined variables

In [10]:
hypotheses = ['A', 'B', 'C', 'D', 'E']

In [13]:
tpm_file = '/home/adrian/projects/hegoi/results/tpm/DESeq2_TPM_values.tsv'
deseq2_folder = '/home/adrian/projects/hegoi/results/subsamples/DESeq2/'

outputdir = '/home/adrian/projects/hegoi/results/subsamples/DEG_filtered/'

# 1. read expression data

In [7]:
def expression_reader():

    expression = pandas.read_csv(tpm_file, sep='\t', index_col=0)
    sample_names = expression.columns.to_list()

    return expression, sample_names

In [8]:
expression, sample_names = expression_reader()

# 2. iterate over hypotheses and patients

In [15]:
for hypothesis in hypotheses:
    print('working with hypothesis {}...'.format(hypothesis))
    
    ### 1. define patients
    elements = os.listdir(deseq2_folder)
    print(elements)
    ### 2. define DEGs
    
    ### 3. define patient-coherent DEGs
    
    ### 4. write a table for this particular hypothesis

working with hypothesis A...
['hypothesis_E_154_up.tsv', 'hypothesis_C_46_down.tsv', 'messages.txt', 'hypothesis_B_154_up.tsv', 'hypothesis_E_109_up.tsv', 'hypothesis_B_46_down.tsv', 'hypothesis_A_154_up.tsv', 'hypothesis_D_153_up.tsv', 'hypothesis_A_154_down.tsv', 'hypothesis_A_46_up.tsv', 'hypothesis_C_154_down.tsv', 'hypothesis_A_94_up.tsv', 'hypothesis_E_46_down.tsv', 'hypothesis_B_46_up.tsv', 'hypothesis_A_153_up.tsv', 'unformatted', 'hypothesis_A_46_down.tsv', 'hypothesis_C_46_up.tsv', 'hypothesis_E_46_up.tsv', 'hypothesis_D_94_down.tsv', 'hypothesis_D_154_down.tsv', 'hypothesis_E_109_down.tsv', 'hypothesis_E_153_up.tsv', 'hypothesis_A_94_down.tsv', 'hypothesis_D_153_down.tsv', 'hypothesis_C_153_down.tsv', 'hypothesis_A_153_down.tsv', 'hypothesis_B_154_down.tsv', 'hypothesis_C_154_up.tsv', 'hypothesis_C_153_up.tsv', 'hypothesis_D_94_up.tsv', 'hypothesis_D_46_down.tsv', 'hypothesis_D_46_up.tsv', 'hypothesis_B_153_down.tsv', 'hypothesis_E_153_down.tsv', 'hypothesis_D_154_up.tsv', '

In [None]:
for hypothesis in hypotheses:

    ### 1. define patients
    
    ### 2. define DEGs
    
    ### 3. define patient-coherent DEGs
    
    define filtered DEGs across patients
    DEGs_across_patients = []
    for patient in patients:

        ## read DEGs
        approach = 'DESeq2'
        DEG_up, DEG_down = DEG_reader(approach)

        ## check the filters
        filtered_DEGs = []
        for DEGs in [DEG_up, DEG_down]:
            filtered_set = DEG_filter(DEGs)
            filtered_DEGs.append(filtered_set)

        ## append filtered DEGs into patient DEG list
        DEGs_across_patients.append(filtered_DEGs)

    ### find consistent set of genes across patients
    consistent_DEGs = consistency_check(DEGs_across_patients)

    ### generate a final table
    for trend in consistent_DEGs:
        table_generator(trend)