Author: Dan Shea  
Date: 2019.10.02  
#### Examination of N16 NERICA-1 AA Coupling and NLR genes at coupled loci
We have locus information of the coupled Hitomebore <=> Hitomebore loci that passed $\alpha < 0.001$. We also have locus information of all NLR genes identified by `RGAugury`. What we would like to do next, is to correlate NLR loci to coupled loci and return a list of NLR genes at the coupled loci.

Therefore, let's get started!

In [1]:
import pandas as pd
import numpy as np

In [2]:
AA_file = './interchromosomal_linkage_analysis/N16_NERICA_1_AA_COUPLING_raw_markers.tsv'
NLR_file = './NLR-position-cleaned.tsv'

In [3]:
N16_AA = pd.read_csv(AA_file, sep='\t')

In [4]:
NLR_df = pd.read_csv(NLR_file, sep='\t', names=['CHROM', 'start', 'end', 'locus'])

In [5]:
N16_AA[0:10]

Unnamed: 0,CHROM_a,POS_a,CHROM_b,POS_b,AA_obs,AB_obs,BA_obs,BB_obs,chisquare,pvalue,qvalue,significant
0,chr01,15388162,chr02,7222205,35,0,0,0,96.947368,7.043149e-21,6.115999e-17,True
1,chr01,15388162,chr02,7265283,35,0,0,0,96.947368,7.043149e-21,6.115999e-17,True
2,chr01,15388162,chr02,7598355,35,0,0,0,96.947368,7.043149e-21,6.115999e-17,True
3,chr01,15388162,chr02,7801801,35,0,0,0,96.947368,7.043149e-21,6.115999e-17,True
4,chr01,15388162,chr02,7828021,35,0,0,0,96.947368,7.043149e-21,6.115999e-17,True
5,chr01,15388162,chr02,8310508,35,0,0,0,96.947368,7.043149e-21,6.115999e-17,True
6,chr01,15388162,chr02,9053782,35,0,0,0,96.947368,7.043149e-21,6.115999e-17,True
7,chr01,15388162,chr02,9518813,35,0,0,0,96.947368,7.043149e-21,6.115999e-17,True
8,chr01,15388162,chr02,9597883,35,0,0,0,96.947368,7.043149e-21,6.115999e-17,True
9,chr01,18482824,chr02,7222205,38,0,0,0,114.0,1.511359e-24,1.0785869999999999e-19,True


In [6]:
NLR_df[0:10]

Unnamed: 0,CHROM,start,end,locus
0,chr01,689792,694013,LOC_Os01g02250
1,chr01,707340,714322,LOC_Os01g02280
2,chr01,2669952,2672924,LOC_Os01g05600
3,chr01,2682019,2684988,LOC_Os01g05620
4,chr01,8751071,8755448,LOC_Os01g15580
5,chr01,9293171,9299788,LOC_Os01g16370
6,chr01,9306108,9309845,LOC_Os01g16390
7,chr01,9314159,9318060,LOC_Os01g16400
8,chr01,11526651,11529893,LOC_Os01g20720
9,chr01,11855552,11858974,LOC_Os01g21240


In [19]:
a_vals = N16_AA.loc[:, ['CHROM_a', 'POS_a']].drop_duplicates().copy()
b_vals = N16_AA.loc[:, ['CHROM_b', 'POS_b']].sort_values(['POS_b','CHROM_b']).drop_duplicates().copy()

In [20]:
a_vals.reset_index(drop=True, inplace=True)
b_vals.reset_index(drop=True, inplace=True)

In [22]:
a_vals = a_vals.rename(columns={'CHROM_a': 'CHROM', 'POS_a': 'POS',})
b_vals = b_vals.rename(columns={'CHROM_b': 'CHROM', 'POS_b': 'POS',})

In [24]:
N16_AA = pd.concat([a_vals, b_vals]).copy()

In [26]:
N16_AA.reset_index(drop=True, inplace=True)

In [44]:
from IPython.display import display
flank = 50000
for row in N16_AA.itertuples(index=False):
    display(NLR_df.loc[(row.CHROM == NLR_df.CHROM) & (row.POS >= NLR_df.start - flank) & (row.POS <= NLR_df.end + flank), :])

Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus
17,chr01,18531902,18540820,LOC_Os01g33684


Unnamed: 0,CHROM,start,end,locus
22,chr01,24022059,24027535,LOC_Os01g42330


Unnamed: 0,CHROM,start,end,locus
22,chr01,24022059,24027535,LOC_Os01g42330


Unnamed: 0,CHROM,start,end,locus
22,chr01,24022059,24027535,LOC_Os01g42330


Unnamed: 0,CHROM,start,end,locus
22,chr01,24022059,24027535,LOC_Os01g42330


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus
67,chr03,8122245,8126084,LOC_Os03g14900


Unnamed: 0,CHROM,start,end,locus
67,chr03,8122245,8126084,LOC_Os03g14900


Unnamed: 0,CHROM,start,end,locus
67,chr03,8122245,8126084,LOC_Os03g14900


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus
67,chr03,8122245,8126084,LOC_Os03g14900


Unnamed: 0,CHROM,start,end,locus
67,chr03,8122245,8126084,LOC_Os03g14900


Unnamed: 0,CHROM,start,end,locus
67,chr03,8122245,8126084,LOC_Os03g14900


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus


Unnamed: 0,CHROM,start,end,locus
