In [1]:
import os

In [2]:
import sys

In [3]:
import pandas as pd

In [4]:
from numpy import mean, median

In [5]:
nb_dir = '/data/parastou/RNAdeg/pyRNAdeg/'
if nb_dir not in sys.path:
    sys.path.append(nb_dir)

In [6]:
import Viz

In [7]:
from Viz import prepare_4cat_data

In [8]:
deg1 = Viz.deg1
deg2 = Viz.deg2
nondeg = Viz.non_degraded

--------------------------

In [9]:
source_dir = '/data/parastou/RNAdeg/results/Quantifications/'

-------------

Calculate the `median` and/or `average` of **protein coding** genes, **heterochromatic** genes and **dg/dh** or **tlh/SPAC212.10** for:
- Pol II ChIP
- Pol II RIP
- pA RNA
- **Transcription Efficiency** (TE): Pol II RIP/ChIP
- **RNA Stability** (RS): pA RNA/Pol II RIP

## Load data

#### Pol II ChIP data

In [12]:
chip_df = pd.read_csv('/data/parastou/RNAdeg/results/RipChip/xp_data/chip_pombe_tpm_merged.csv', sep='\t')

In [28]:
chip_df.columns = ['510_ChIP' if x == '591_ChIP' else x for x in list(chip_df.columns)]

#### Pol II RIP and pA RNA data

In [13]:
rna_df = pd.read_csv('/data/parastou/RNAdeg/results/RipRna/xp_data/merged_tpm.csv', sep='\t')

#### pA RNA / Pol II RIP

In [19]:
rs_df = pd.read_csv('/data/parastou/RNAdeg/results/RipRna/xp_data/rna_stability.csv', sep='\t')
rs_df = rs_df.fillna(0)

#### Pol II RIP/ Pol II ChIP

In [90]:
te_df = pd.read_csv('/data/parastou/RNAdeg/results/RipChip/xp_data/transcription_efficiency.csv', sep='\t')
te_df = te_df.fillna(0)

----

### Samples

In [65]:
samples = ['WT', '80', '638', '301', '324', '491', '302', '504', '530', '510']

----

## Calculate means for groups

### Pol II ChIP

In [66]:
chip_samples = [i+'_ChIP' for i in samples]
x, y, z, w = prepare_4cat_data(chip_df, chip_samples)

In [67]:
df = pd.DataFrame(chip_samples, columns=['sample'])

In [68]:
x_means = [mean(i) for i in x]
df['gene mean'] = pd.Series(x_means)

In [69]:
x_medians = [median(i) for i in x]
df['gene median'] = pd.Series(x_medians)

In [70]:
y_means = [mean(i) for i in y[0]]
df['dg,dh mean'] = pd.Series(y_means)

In [71]:
y_medians = [median(i) for i in y[0]]
df['dg,dh median'] = pd.Series(y_medians)

In [72]:
z_means = [mean(i) for i in z[0]]
df['tlh,spc mean'] = pd.Series(z_means)

In [73]:
z_medians = [median(i) for i in z[0]]
df['tlh,spc median'] = pd.Series(z_medians)

In [74]:
w_means = [mean(i) for i in w]
df['het mean'] = pd.Series(w_means)

In [75]:
w_medians = [median(i) for i in w]
df['het median'] = pd.Series(w_means)

In [76]:
df.to_csv(os.path.join(source_dir, 'polIIChIP_quantifications.csv'), sep='\t', index=None)

### RNA Silencing

In [78]:
x, y, z, w = prepare_4cat_data(rs_df, samples)

In [79]:
df = pd.DataFrame(samples, columns=['sample'])

In [80]:
x_means = [mean(i) for i in x]
df['gene mean'] = pd.Series(x_means)

In [81]:
x_medians = [median(i) for i in x]
df['gene median'] = pd.Series(x_medians)

In [82]:
y_means = [mean(i) for i in y[0]]
df['dg,dh mean'] = pd.Series(y_means)

In [83]:
y_medians = [median(i) for i in y[0]]
df['dg,dh median'] = pd.Series(y_medians)

In [84]:
z_means = [mean(i) for i in z[0]]
df['tlh,spc mean'] = pd.Series(z_means)

In [85]:
z_medians = [median(i) for i in z[0]]
df['tlh,spc median'] = pd.Series(z_medians)

In [86]:
w_means = [mean(i) for i in w]
df['het mean'] = pd.Series(w_means)

In [87]:
w_medians = [median(i) for i in w]
df['het median'] = pd.Series(w_means)

In [88]:
df.to_csv(os.path.join(source_dir, 'rs_quantifications.csv'), sep='\t', index=None)

### Transcription Efficiency

In [92]:
x, y, z, w = prepare_4cat_data(te_df, samples)

In [93]:
df = pd.DataFrame(samples, columns=['sample'])

In [94]:
x_means = [mean(i) for i in x]
df['gene mean'] = pd.Series(x_means)

In [95]:
x_medians = [median(i) for i in x]
df['gene median'] = pd.Series(x_medians)

In [96]:
y_means = [mean(i) for i in y[0]]
df['dg,dh mean'] = pd.Series(y_means)

In [97]:
y_medians = [median(i) for i in y[0]]
df['dg,dh median'] = pd.Series(y_medians)

In [98]:
z_means = [mean(i) for i in z[0]]
df['tlh,spc mean'] = pd.Series(z_means)

In [99]:
z_medians = [median(i) for i in z[0]]
df['tlh,spc median'] = pd.Series(z_medians)

In [100]:
w_means = [mean(i) for i in w]
df['het mean'] = pd.Series(w_means)

In [101]:
w_medians = [median(i) for i in w]
df['het median'] = pd.Series(w_means)

In [102]:
df.to_csv(os.path.join(source_dir, 'te_quantifications.csv'), sep='\t', index=None)

### pA RNA

In [104]:
rna_samples = [i+'_RNA' for i in samples]

x, y, z, w = prepare_4cat_data(rna_df, rna_samples)

In [105]:
df = pd.DataFrame(rna_samples, columns=['sample'])

In [107]:
x_means = [mean(i) for i in x]
df['gene mean'] = pd.Series(x_means)

In [108]:
x_medians = [median(i) for i in x]
df['gene median'] = pd.Series(x_medians)

In [109]:
y_means = [mean(i) for i in y[0]]
df['dg,dh mean'] = pd.Series(y_means)

In [110]:
y_medians = [median(i) for i in y[0]]
df['dg,dh median'] = pd.Series(y_medians)

In [111]:
z_means = [mean(i) for i in z[0]]
df['tlh,spc mean'] = pd.Series(z_means)

In [112]:
z_medians = [median(i) for i in z[0]]
df['tlh,spc median'] = pd.Series(z_medians)

In [113]:
w_means = [mean(i) for i in w]
df['het mean'] = pd.Series(w_means)

In [114]:
w_medians = [median(i) for i in w]
df['het median'] = pd.Series(w_means)

In [116]:
df.to_csv(os.path.join(source_dir, 'paRNA_quantifications.csv'), sep='\t', index=None)

### Pol II RIP

In [118]:
rip_samples = [i+'_RIP' for i in samples]

x, y, z, w = prepare_4cat_data(rna_df, rip_samples)

In [119]:
df = pd.DataFrame(rip_samples, columns=['sample'])

In [120]:
x_means = [mean(i) for i in x]
df['gene mean'] = pd.Series(x_means)

In [121]:
x_medians = [median(i) for i in x]
df['gene median'] = pd.Series(x_medians)

In [122]:
y_means = [mean(i) for i in y[0]]
df['dg,dh mean'] = pd.Series(y_means)

In [123]:
y_medians = [median(i) for i in y[0]]
df['dg,dh median'] = pd.Series(y_medians)

In [124]:
z_means = [mean(i) for i in z[0]]
df['tlh,spc mean'] = pd.Series(z_means)

In [125]:
z_medians = [median(i) for i in z[0]]
df['tlh,spc median'] = pd.Series(z_medians)

In [126]:
w_means = [mean(i) for i in w]
df['het mean'] = pd.Series(w_means)

In [127]:
w_medians = [median(i) for i in w]
df['het median'] = pd.Series(w_means)

In [129]:
df.to_csv(os.path.join(source_dir, 'polIIRIP_quantifications.csv'), sep='\t', index=None)

---