# Evaluate and categorize interactions

## Setting up the notebook

In [1]:
import sys
import os
import matplotlib.pyplot as plt
import scipy, scipy.stats, numpy
import pandas

%matplotlib inline
sys.path.append("..")

from diachr import DiachromaticInteractionSet

## Read a Diachromatic interaction file

In [2]:
interaction_set = DiachromaticInteractionSet()
interaction_set.parse_file(i_file="../tests/data/test_04/diachromatic_interaction_file.tsv")
file_dict_info = interaction_set.get_file_info_dict()
pandas.DataFrame(file_dict_info)

Unnamed: 0,I_FILE,I_NUM
0,../tests/data/test_04/diachromatic_interaction...,50
1,UNION,50


In [10]:
ls ../tests/data


__init__.py
__init__.pyc
[34m__pycache__[m[m/
[34mdata[m[m/
test_binomial.py
test_binomial.pyc
test_combine_interactions.py
test_diachromatic_interaction.py
test_diachromatic_interaction.pyc
test_eif_parser.py
test_eif_parser.pyc
test_rate_and_categorize_interactions.py


## Calculate P-values and divide into directed and undirected interactions

In [3]:
nln_p_val_thresh = -numpy.log(0.01)
rate_and_cat_report_dict = interaction_set.rate_and_categorize_interactions(nln_pval_thresh=nln_p_val_thresh, verbose=True)

[INFO] Rate and categorize interactions ...
[INFO] ...done.


Use ``pandas`` to print out the dictionary returned by the function ``rate_and_categorize_interactions``.

In [5]:
pandas.DataFrame(rate_and_cat_report_dict)

Unnamed: 0,NLN_PVAL_THRESH,MIN_RP,MIN_RP_PVAL,N_PROCESSED,N_DISCARDED,N_UNDIRECTED,N_DIRECTED
0,4.60517,8,0.007813,50,4,28,18


## Select undirected reference interactions

In [6]:
select_ref_report_dict = interaction_set.select_reference_interactions(verbose=True)

[INFO] Select reference interactions ...
	[INFO] First pass: Count directed interactions for different read pair counts ...
	[INFO] Second pass: Select undirected reference interactions for different read pair counts ...
[INFO] ...done.


Use ``pandas`` to print out the dictionary returned by the function ``select_reference_interactions``.

In [43]:
df_nn = pandas.DataFrame(select_ref_report_dict).transpose()[:4]
df_ne = pandas.DataFrame(select_ref_report_dict).transpose()[4:8]
df_en = pandas.DataFrame(select_ref_report_dict).transpose()[8:12]
df_ee = pandas.DataFrame(select_ref_report_dict).transpose()[12:16]

The first four columns contain the interaction counts for the enrichment category ``NN``. In this category, we have three directed interactions (``DI_NN``) for which three undirected reference interactions (``UIR_NN``) could be selected. Since a reference interaction could be selected for all directed interactions, there are no missing reference interactions (``M_UIR_NN``). Finally, we have three undirected interactions (``UI_NN``).

In [49]:
df_nn.columns = ['Interaction counts']
df_nn

Unnamed: 0,Interaction counts
DI_NN,3
UIR_NN,3
M_UIR_NN,0
UI_NN,3


Columns five to eight contain the interaction counts for the category ``NE``. In this category, there is one directed interaction for which no matching reference interaction could be selected (``M_UIR_NE``). 

In [50]:
df_ne.columns = ['Interaction counts']
df_ne

Unnamed: 0,Interaction counts
DI_NE,4
UIR_NE,3
M_UIR_NE,1
UI_NE,3


Columns nine to twelve contain the interaction counts for the category ``EN``.

In [51]:
df_en.columns = ['Interaction counts']
df_en

Unnamed: 0,Interaction counts
DI_EN,5
UIR_EN,5
M_UIR_EN,0
UI_EN,3


Columns 12 to 16 contain the interaction counts for the category ``EE``. In this category, there is again one directed interaction for which no matching reference interaction could be selected (``M_UIR_EE``).

In [52]:
df_ee.columns = ['Interaction counts']
df_ee

Unnamed: 0,Interaction counts
DI_EE,6
UIR_EE,5
M_UIR_EE,1
UI_EE,3


## Write evaluated and categorized interactions to a Diachromatic interaction file

In [10]:
write_report_dict = interaction_set.write_diachromatic_interaction_file(target_file="evaluated_and_categorized_interactions.tsv.gz")

Use ``pandas`` to print out the dictionary returned by the function ``write_diachromatic_interaction_file``.

In [9]:
pandas.DataFrame(write_report_dict)

Unnamed: 0,TARGET_FILE,INTERACTIONS_NUMBERS,REQUIRED_REPLICATES,HAS_ALL_DATA,INCOMPLETE_DATA
0,evaluated_and_categorized_interactions.tsv.gz,[50],1,46,0


In [83]:
df_interaction_file = pandas.read_csv('evaluated_and_categorized_interactions.tsv.gz', compression='gzip', sep='\t', header=None)
df_interaction_file.columns = ['CHR_D1','STA_D1','END_D1','ENR_CAT_D1', \
                               'CHR_D2','STA_D2','END_D2','ENR_CAT_D2', \
                               'RP_S:RP_T', \
                               'NLN_PVAL', \
                               'I_CAT']
df_interaction_file.head(20)

Unnamed: 0,CHR_D1,STA_D1,END_D1,ENR_CAT_D1,CHR_D2,STA_D2,END_D2,ENR_CAT_D2,RP_S:RP_T,NLN_PVAL,I_CAT
0,chr14,43059116,43059494,N,chr14,43101212,43101810,N,100:1,64.69,DI
1,chr8,129042054,129044258,N,chr8,129121269,129121986,N,100:2,61.44,DI
2,chr15,73467156,73468652,N,chr15,73526903,73528438,N,100:3,58.59,DI
3,chr17,72411026,72411616,N,chr17,72712662,72724357,E,100:1,64.69,DI
4,chr18,38724804,38726198,N,chr18,76794986,76803172,E,100:2,61.44,DI
5,chr11,114362648,114362686,N,chr11,114396073,114404234,E,100:3,58.59,DI
6,chr15,56158017,56158267,N,chr15,56462978,56465983,E,100:4,56.01,DI
7,chr14,34714080,34716362,E,chr14,50135355,50139051,N,100:1,64.69,DI
8,chr1,91022201,91023797,E,chr1,116561813,116566655,N,100:2,61.44,DI
9,chr1,15681566,15697108,E,chr1,19411358,19417940,N,100:3,58.59,DI
