# Combine interactions from different replicates

## Setting up the notebook

In [1]:
import sys
import os
import pandas
pandas.set_option('max_colwidth', 400)
sys.path.append("..")
from diachr import DiachromaticInteractionSet

## Reading interactions from different replicates

In [2]:
interaction_set = DiachromaticInteractionSet()

interaction_set.parse_file(i_file="../tests/data/test_01/diachromatic_interaction_file_r1.tsv.gz")
interaction_set.parse_file(i_file="../tests/data/test_01/diachromatic_interaction_file_r2.tsv.gz")
interaction_set.parse_file(i_file="../tests/data/test_01/diachromatic_interaction_file_r3.tsv.gz")
interaction_set.parse_file(i_file="../tests/data/test_01/diachromatic_interaction_file_r4.tsv.gz")

read_file_info_dict = interaction_set.get_read_file_info_dict()
pandas.DataFrame(read_file_info_dict)

Unnamed: 0,I_FILE,I_NUM,I_UNION_NUM
0,../tests/data/test_01/diachromatic_interaction_file_r1.tsv.gz,1,1
1,../tests/data/test_01/diachromatic_interaction_file_r2.tsv.gz,2,2
2,../tests/data/test_01/diachromatic_interaction_file_r3.tsv.gz,3,3
3,../tests/data/test_01/diachromatic_interaction_file_r4.tsv.gz,4,4


## Write interactions that occur in a required number replicates to an interaction file

In [12]:
target_file = "combined_interactions.tsv.gz"
required_replicates = 2
write_file_info_dict = interaction_set.write_diachromatic_interaction_file(target_file=target_file, required_replicates=required_replicates)

In [9]:
pandas.DataFrame(write_file_info_dict)

Unnamed: 0,TARGET_FILE,REQUIRED_REPLICATES,N_INCOMPLETE_DATA,N_COMPLETE_DATA
0,combined_interactions.tsv.gz,3,2,2


In [5]:
print(interaction_set.get_write_file_info_report())

[INFO] Report on writing files:
	[INFO] Wrote interactions that occur in at least 2 replicates to: combined_interactions.tsv.gz
	[INFO] Interactions that occur in at least 2 replicates: 3
	[INFO] Other interactions: 1
[INFO] End of report.



In [10]:
print(interaction_set.get_write_file_info_table_row())

TARGET_FILE	REQUIRED_REPLICATES	N_INCOMPLETE_DATA	N_COMPLETE_DATA
combined_interactions.tsv.gz	3	2	2



In [11]:
df_interaction_file = pandas.read_csv('combined_interactions.tsv.gz', compression='gzip', sep='\t', header=None)
df_interaction_file.columns = ['CHR_D1','STA_D1','END_D1','ENR_CAT_D1',
                               'CHR_D2','STA_D2','END_D2','ENR_CAT_D2',
                               'RP_S:RP_T']
df_interaction_file

Unnamed: 0,CHR_D1,STA_D1,END_D1,ENR_CAT_D1,CHR_D2,STA_D2,END_D2,ENR_CAT_D2,RP_S:RP_T
0,chr1,46297999,46305684,E,chr1,51777391,51781717,N,8:4
1,chr17,72411026,72411616,N,chr17,72712662,72724357,N,9:6
