In [37]:
import pysam
from pysam import VariantFile as vcf
import operator
from math import log2
import pandas as pd
from pandas import DataFrame as dataframe
import matplotlib.pyplot as plt
import numpy as np
from scipy.spatial.distance import pdist, squareform
import scipy
import  os
import os.path
import matplotlib.colors as mcolors
from scipy import stats
import csv
from xml.etree import ElementTree as ET
from statsmodels.stats.multitest import multipletests

In [38]:

mhc_complementary_region={'ACB': {'mhc': [[32589647, 32805573]]},
'All':{'mhc':[[32453110, 32577355], [32589647, 32716541]]},
 'ASW': {'mhc': [[29753369, 29913914],
   [29939668, 30120966],
   [32627859, 32843772]]},
 'BEB': {},
 'CDX': {'mhc': [[31295439, 31432528]]},
 'CEU': {'mhc': [[32627859, 32776791]]},
 'CHB': {},
 'CHS': {'mhc': [[32589647, 32716541]]},
 'CLM': {'mhc': [[32453110, 32577355], [32589647, 32732048]]},
 'ESN': {'mhc': [[29720403, 29896285], [32644320, 32843772]]},
 'FIN': {},
 'GBR': {'mhc': [[32453110, 32577355]]},
 'GIH': {'mhc': [[32589647, 32698571]]},
 'GWD': {'mhc': [[29720403, 29913914],
   [29939668, 30085606],
   [32627859, 32732048]]},
 'IBS': {'mhc': [[32453110, 32577355], [32589647, 32716541]]},
 'ITU': {'mhc': [[32423532, 32554290]]},
 'JPT': {'mhc': [[32589647, 32732048]]},
 'KHV': {'mhc': [[32453110, 32577355], [32589647, 32716541]]},
 'LWK': {'mhc': [[32627859, 32805573]]},
 'MSL': {'mhc': [[29720403, 29913914],
   [31317765, 31528792],
   [32554291, 32776791]]},
 'MXL': {'mhc': [[32589647, 32716541]]},
 'PEL': {'mhc': [[32473902, 32616414]]},
 'PJL': {},
 'PUR': {'mhc': [[32589647, 32683157]]},
 'STU': {'mhc': [[32453110, 32577355], [32589647, 32716541]]},
 'TSI': {'mhc': [[32589647, 32698571]]},
 'YRI': {'mhc': [[29720403, 29913914],
   [29939668, 30120966],
   [32589647, 32882258]]}}

mhc_similarity_region={'ACB': {'mhc': [[29720403, 30011739],
   [30994370, 31528792],
   [32212726, 32882258]]},
 'All':{'mhc':[[29720403, 30048796],[30994370, 31528792],[32212726, 32882258]]},
 'ASW': {'mhc': [[29720403, 29913914],
   [29939668, 30120966],
   [31052133, 31528792],
   [32212726, 32882258]]},
 'BEB': {'mhc': [[29720403, 30048796],
   [30994370, 31528792],
   [32212726, 32882258]]},
 'CDX': {'mhc': [[30994370, 31528792], [32212726, 32923168]]},
 'CEU': {'mhc': [[30994370, 31528792], [32212726, 32882258]]},
 'CHB': {'mhc': [[29720403, 29913914],
   [30994370, 31528792],
   [32212726, 32882258]]},
 'CHS': {'mhc': [[30994370, 31528792], [32397207, 32882258]]},
 'CLM': {'mhc': [[29720403, 29896285],
   [30994370, 31528792],
   [32212726, 32882258]]},
 'ESN': {'mhc': [[29720403, 30120966],
   [30994370, 31528792],
   [32397207, 32882258]]},
 'FIN': {'mhc': [[30994370, 31528792], [32212726, 32882258]]},
 'GBR': {'mhc': [[30994370, 31528792], [32212726, 32882258]]},
 'GIH': {'mhc': [[29720403, 29896285],
   [30994370, 31528792],
   [32212726, 32882258]]},
 'GWD': {'mhc': [[29720403, 30120966],
   [31052133, 31528792],
   [32212726, 32882258]]},
 'IBS': {'mhc': [[29720403, 29896285],
   [30994370, 31528792],
   [32212726, 32882258]]},
 'ITU': {'mhc': [[29720403, 29896285],
   [30994370, 31528792],
   [32212726, 32923168]]},
 'JPT': {'mhc': [[29720403, 29913914],
   [29939668, 30120966],
   [30994370, 31528792],
   [32212726, 32882258]]},
 'KHV': {'mhc': [[29720403, 30120966],
   [30994370, 31528792],
   [32397207, 32923168]]},
 'LWK': {'mhc': [[29720403, 30011739],
   [31052133, 31528792],
   [32212726, 32923168]]},
 'MSL': {'mhc': [[29720403, 30011739],
   [30994370, 31528792],
   [32212726, 32882258]]},
 'MXL': {'mhc': [[29720403, 29913914],
   [30994370, 31528792],
   [32212726, 32882258]]},
 'PEL': {'mhc': [[29720403, 29896285],
   [30959575, 31528792],
   [32288923, 32882258]]},
 'PJL': {'mhc': [[29720403, 29896285],
   [30994370, 31528792],
   [32212726, 32923168]]},
 'PUR': {'mhc': [[29720403, 30011739],
   [30994370, 31528792],
   [32212726, 32882258]]},
 'STU': {'mhc': [[29720403, 30011739],
   [30994370, 31528792],
   [32212726, 32882258]]},
 'TSI': {'mhc': [[29720403, 29896285],
   [30994370, 31528792],
   [32212726, 32882258]]},
 'YRI': {'mhc': [[29720403, 30120966],
   [30994370, 31528792],
   [32212726, 32882258]]}}

In [39]:
temp_complementary_region = mhc_complementary_region.copy()
temp_similarity_region=mhc_similarity_region.copy()

In [40]:
for race, region_dict in temp_complementary_region.items():
    if region_dict=={}:
        del mhc_similarity_region[race]
        del mhc_complementary_region[race]
for race, region_dict in temp_similarity_region.items():
    if region_dict=={}:
        del mhc_complementary_region[race]
        del mhc_similarity_region[race]

In [41]:
mhc_complementary_region

{'ACB': {'mhc': [[32589647, 32805573]]},
 'All': {'mhc': [[32453110, 32577355], [32589647, 32716541]]},
 'ASW': {'mhc': [[29753369, 29913914],
   [29939668, 30120966],
   [32627859, 32843772]]},
 'CDX': {'mhc': [[31295439, 31432528]]},
 'CEU': {'mhc': [[32627859, 32776791]]},
 'CHS': {'mhc': [[32589647, 32716541]]},
 'CLM': {'mhc': [[32453110, 32577355], [32589647, 32732048]]},
 'ESN': {'mhc': [[29720403, 29896285], [32644320, 32843772]]},
 'GBR': {'mhc': [[32453110, 32577355]]},
 'GIH': {'mhc': [[32589647, 32698571]]},
 'GWD': {'mhc': [[29720403, 29913914],
   [29939668, 30085606],
   [32627859, 32732048]]},
 'IBS': {'mhc': [[32453110, 32577355], [32589647, 32716541]]},
 'ITU': {'mhc': [[32423532, 32554290]]},
 'JPT': {'mhc': [[32589647, 32732048]]},
 'KHV': {'mhc': [[32453110, 32577355], [32589647, 32716541]]},
 'LWK': {'mhc': [[32627859, 32805573]]},
 'MSL': {'mhc': [[29720403, 29913914],
   [31317765, 31528792],
   [32554291, 32776791]]},
 'MXL': {'mhc': [[32589647, 32716541]]},
 '

In [42]:
mhc_similarity_region

{'ACB': {'mhc': [[29720403, 30011739],
   [30994370, 31528792],
   [32212726, 32882258]]},
 'All': {'mhc': [[29720403, 30048796],
   [30994370, 31528792],
   [32212726, 32882258]]},
 'ASW': {'mhc': [[29720403, 29913914],
   [29939668, 30120966],
   [31052133, 31528792],
   [32212726, 32882258]]},
 'CDX': {'mhc': [[30994370, 31528792], [32212726, 32923168]]},
 'CEU': {'mhc': [[30994370, 31528792], [32212726, 32882258]]},
 'CHS': {'mhc': [[30994370, 31528792], [32397207, 32882258]]},
 'CLM': {'mhc': [[29720403, 29896285],
   [30994370, 31528792],
   [32212726, 32882258]]},
 'ESN': {'mhc': [[29720403, 30120966],
   [30994370, 31528792],
   [32397207, 32882258]]},
 'GBR': {'mhc': [[30994370, 31528792], [32212726, 32882258]]},
 'GIH': {'mhc': [[29720403, 29896285],
   [30994370, 31528792],
   [32212726, 32882258]]},
 'GWD': {'mhc': [[29720403, 30120966],
   [31052133, 31528792],
   [32212726, 32882258]]},
 'IBS': {'mhc': [[29720403, 29896285],
   [30994370, 31528792],
   [32212726, 32882258

In [43]:
# def find_overlap(arr1, arr2):
#     overlap = []
#     for interval1 in arr1:
#         for interval2 in arr2:
#             start = max(interval1[0], interval2[0])
#             end = min(interval1[1], interval2[1])
#             if start <= end:
#                 overlap.append([start, end])
#     return overlap

# arr1 = [[2, 4], [3, 5], [5, 7]]
# arr2 = [[2, 6], [3, 7], [4, 8]]

# result = find_overlap(arr1, arr2)
# print(result)


[[2, 4], [3, 4], [4, 4], [3, 5], [3, 5], [4, 5], [5, 6], [5, 7], [5, 7]]


In [44]:
def find_overlap(arr1, arr2):
    overlap = set()  
    for interval1 in arr1:
        for interval2 in arr2:
            start = max(interval1[0], interval2[0])
            end = min(interval1[1], interval2[1])
            if start <= end:
                overlap.add((start, end))  

    return list(overlap)  # 

arr1 = [[32453110, 32577355], [32589647, 32716541]]
arr2 = [[29720403, 30048796],
   [30994370, 31528792],
   [32212726, 32882258]]

result = find_overlap(arr1, arr2)
print(result)


[(32453110, 32577355), (32589647, 32716541)]


In [45]:
mhc_repeat_region=dict()#{race:repeat region}
for race, region in mhc_complementary_region.items():
    mhc_repeat_region[race]=find_overlap(region["mhc"],mhc_similarity_region[race]["mhc"])


In [46]:
mhc_repeat_region

{'ACB': [(32589647, 32805573)],
 'All': [(32453110, 32577355), (32589647, 32716541)],
 'ASW': [(32627859, 32843772), (29939668, 30120966), (29753369, 29913914)],
 'CDX': [(31295439, 31432528)],
 'CEU': [(32627859, 32776791)],
 'CHS': [(32589647, 32716541)],
 'CLM': [(32589647, 32732048), (32453110, 32577355)],
 'ESN': [(29720403, 29896285), (32644320, 32843772)],
 'GBR': [(32453110, 32577355)],
 'GIH': [(32589647, 32698571)],
 'GWD': [(29939668, 30085606), (32627859, 32732048), (29720403, 29913914)],
 'IBS': [(32453110, 32577355), (32589647, 32716541)],
 'ITU': [(32423532, 32554290)],
 'JPT': [(32589647, 32732048)],
 'KHV': [(32453110, 32577355), (32589647, 32716541)],
 'LWK': [(32627859, 32805573)],
 'MSL': [(31317765, 31528792), (29720403, 29913914), (32554291, 32776791)],
 'MXL': [(32589647, 32716541)],
 'PEL': [(32473902, 32616414)],
 'PUR': [(32589647, 32683157)],
 'STU': [(32453110, 32577355), (32589647, 32716541)],
 'TSI': [(32589647, 32698571)],
 'YRI': [(29939668, 30120966), (

In [58]:
#non-mhc complementary region and non-mhc similarity region
non_mhc_complementary_region={'ACB': {'split_chr2/xau': [[89762579, 89852967]],
  'split_chr5/xaa': [[676694, 846549]],
  'split_chr7/xan': [[56467013, 56713542]],
  'split_chr11/xaw': [[103206633, 103451621]],
  'split_chr14/xaa': [[19806333, 19976578]],
  'split_chr21/xaa': [[10605321, 10733724]]},
  'All':{'split_chr14_xaa': [[19806333, 19976578]],
 'split_chr7_xbc': [[124810550, 125070920]],
 'split_chr1_xak': [[45762038, 46100296]],
 'split_chr1_xbk': [[188243362, 188513984]]},
 'ASW': {'split_chr2/xau': [[89762579, 89852967]],
  'split_chr5/xaa': [[676694, 846549]],
  'split_chr9/xak': [[42796452, 42969270]],
  'split_chr10/xaj': [[38491573, 38687800], [38725237, 38832458]],
  'split_chr11/xaf': [[23272931, 23495433]],
  'split_chr13/xaa': [[18347994, 18534139]],
  'split_chr14/xak': [[66346187, 66752333]],
  'split_chr21/xaa': [[10605321, 10733724]]},
 'BEB': {'split_chr1/xbx': [[245939605, 246162003]],
  'split_chr2/xan': [[56405225, 56644106]],
  'split_chr3/xav': [[98046342, 98273693]],
  'split_chr3/xbf': [[145365797, 145612467]],
  'split_chr4/xaz': [[115247002, 115485969]],
  'split_chr13/xai': [[55102733, 55443491]],
  'split_chr14/xaa': [[19806333, 19976578]]},
 'CDX': {'split_chr1/xay': [[114816228, 115116610]],
  'split_chr2/xbd': [[130125757, 130339027]],
  'split_chr4/xba': [[119293026, 119539168]],
  'split_chr6/xao': [[64160994, 64523871]],
  'split_chr7/xaf': [[19734421, 19953255]],
  'split_chr9/xau': [[105652970, 105876427]],
  'split_chr10/xan': [[55664607, 55894887]],
  'split_chr12/xbc': [[131390693, 131619328]],
  'split_chr13/xai': [[55102733, 55354096]],
  'split_chr17/xag': [[31068982, 31384157]],
  'split_chr18/xan': [[65996302, 66233011]]},
 'CEU': {'split_chr1/xbe': [[158399387, 158641706]],
  'split_chr6/xac': [[8413699, 8649798]],
  'split_chr7/xbc': [[124810550, 125070920]],
  'split_chr11/xaf': [[24934206, 25144712]],
  'split_chr12/xac': [[10962097, 11205540]],
  'split_chr17/xaf': [[21588491, 21823237]],
  'split_chr17/xaj': [[45620566, 46418024]]},
 'CHB': {'split_chr2/xbd': [[130125757, 130339027]],
  'split_chr4/xba': [[119293026, 119587470]],
  'split_chr4/xaz': [[115247002, 115485969]],
  'split_chr6/xao': [[64202650, 64523871]],
  'split_chr7/xbc': [[124810550, 125070920]],
  'split_chr7/xaf': [[19775051, 19953255]],
  'split_chr10/xar': [[73060579, 73337762]],
  'split_chr11/xav': [[98072700, 98330585]],
  'split_chr22/xaa': [[16175432, 16345129]]},
 'CHS': {'split_chr3/xav': [[98046342, 98245327]],
  'split_chr4/xba': [[119293026, 119539168]],
  'split_chr5/xaa': [[676694, 846549]],
  'split_chr6/xao': [[64202650, 64523871]],
  'split_chr7/xbc': [[124810550, 125070920]],
  'split_chr8/xaz': [[109058766, 109345554]],
  'split_chr14/xat': [[105509278, 105695533]]},
 'CLM': {'split_chr1/xbd': [[152162701, 152471006]],
  'split_chr3/xav': [[95059735, 95322404], [98046342, 98273693]],
  'split_chr3/xbj': [[163862679, 164224214]],
  'split_chr4/xba': [[119293026, 119587470]],
  'split_chr6/xab': [[7345120, 7558507]],
  'split_chr6/xar': [[76169005, 76460170]],
  'split_chr7/xbc': [[124810550, 125070920]],
  'split_chr12/xan': [[61013177, 61213757]],
  'split_chr14/xaa': [[19806333, 19976578]],
  'split_chr14/xat': [[106520496, 106674883]]},
 'ESN': {'split_chr2/xau': [[89581761, 89852967]],
  'split_chr5/xaa': [[676694, 846549]],
  'split_chr5/xai': [[34311870, 34562837]],
  'split_chr6/xbf': [[145443018, 145762353]],
  'split_chr7/xap': [[67044394, 67267582]],
  'split_chr14/xan': [[80889782, 81152442]],
  'split_chr21/xaa': [[10605321, 10733724]]},
 'FIN': {'split_chr3/xbj': [[163862679, 164147845]],
  'split_chr4/xaw': [[97892649, 98142264]],
  'split_chr4/xbh': [[151414350, 151728888]],
  'split_chr6/xat': [[85153819, 85387984]],
  'split_chr6/xbf': [[145485121, 145762353]],
  'split_chr7/xbc': [[124810550, 125070920]],
  'split_chr8/xaz': [[109010721, 109345554]],
  'split_chr9/xav': [[112161299, 112408537]],
  'split_chr11/xbb': [[124171937, 124450010]],
  'split_chr18/xac': [[12017790, 12244103]]},
 'GBR': {'split_chr1/xak': [[45628366, 46147211]],
  'split_chr2/xan': [[56405225, 56644106]],
  'split_chr2/xas': [[78325225, 78577282]],
  'split_chr3/xbf': [[145365797, 145612467]],
  'split_chr3/xbi': [[158122703, 158435520]],
  'split_chr4/xba': [[119243365, 119539168]],
  'split_chr6/xac': [[8413699, 8649798]],
  'split_chr7/xbc': [[124760998, 125070920]],
  'split_chr11/xbb': [[124171937, 124450010]],
  'split_chr12/xac': [[10962097, 11205540]],
  'split_chr17/xaf': [[21654230, 21823237]],
  'split_chr17/xaj': [[45587933, 46418024]]},
 'GIH': {'split_chr3/xav': [[98046342, 98276757]],
  'split_chr4/xbd': [[132580391, 132816794]],
  'split_chr6/xac': [[8413699, 8649798]],
  'split_chr11/xah': [[34650776, 34905225]],
  'split_chr14/xaa': [[19806333, 20015421]],
  'split_chr18/xac': [[12017790, 12244103]]},
 'GWD': {'split_chr1/xak': [[45803471, 46100296]],
  'split_chr2/xai': [[38264369, 38464894]],
  'split_chr2/xau': [[89762579, 89852967]],
  'split_chr5/xaa': [[676694, 846549]],
  'split_chr13/xaa': [[18347994, 18534139]],
  'split_chr21/xaa': [[10511846, 10763989]]},
 'IBS': {'split_chr3/xbf': [[145365797, 145612467]],
  'split_chr6/xbf': [[145485121, 145762353]],
  'split_chr12/xac': [[10962097, 11205540]],
  'split_chr12/xas': [[83579779, 83843102]],
  'split_chr17/xaf': [[21619971, 21823237]],
  'split_chr17/xaj': [[45836662, 46418024]],
  'split_chr18/xac': [[12017790, 12244103]],
  'split_chr22/xaa': [[16131708, 16345129]]},
 'ITU': {'split_chr5/xas': [[84862293, 85266031]],
  'split_chr10/xan': [[55664607, 55894887]],
  'split_chr12/xac': [[10962097, 11162840]],
  'split_chr14/xaa': [[19806333, 20047342]],
  'split_chr17/xak': [[52575391, 52889013]]},
 'JPT': {'split_chr3/xav': [[98046342, 98245327]],
  'split_chr5/xbc': [[131688028, 132052794]],
  'split_chr5/xaa': [[676694, 825167]],
  'split_chr13/xaa': [[18347994, 18534139]],
  'split_chr13/xap': [[85638733, 85884384]],
  'split_chr14/xad': [[31681845, 31909184]],
  'split_chr22/xaa': [[16175432, 16345129]]},
 'KHV': {'split_chr2/xbq': [[192771748, 193054217]],
  'split_chr3/xav': [[98046342, 98245327]],
  'split_chr3/xbj': [[163946649, 164185047]],
  'split_chr4/xba': [[119293026, 119539168]],
  'split_chr7/xbc': [[124810550, 125070920]],
  'split_chr14/xak': [[66298204, 66793088]]},
 'LWK': {'split_chr2/xai': [[38235897, 38464894]],
  'split_chr2/xau': [[89762579, 89852967]],
  'split_chr5/xaa': [[676694, 846549]],
  'split_chr13/xaa': [[18347994, 18534139]],
  'split_chr14/xak': [[66346187, 66752333]],
  'split_chr21/xaa': [[10605321, 10733724]]},
 'MSL': {'split_chr1/xak': [[45803471, 46100296]],
  'split_chr2/xau': [[89762579, 89852967]],
  'split_chr5/xay': [[113365150, 113639918]],
  'split_chr7/xaf': [[19775051, 19953255]],
  'split_chr7/xap': [[67044394, 67267582]],
  'split_chr9/xak': [[42796452, 42969270]],
  'split_chr11/xaf': [[23272931, 23495433]],
  'split_chr11/xaw': [[103206633, 103451621]],
  'split_chr13/xaa': [[18347994, 18534139]],
  'split_chr14/xat': [[105569851, 105695533]],
  'split_chr21/xaa': [[10605321, 10733724]]},
 'MXL': {'split_chr3/xav': [[98046342, 98217489]],
  'split_chr3/xaq': [[74901608, 75192541]],
  'split_chr5/xad': [[11999758, 12263260]],
  'split_chr6/xao': [[64160994, 64404669]],
  'split_chr7/xbc': [[124810550, 125070920]],
  'split_chr11/xaf': [[23200593, 23461651]],
  'split_chr12/xad': [[14846240, 15093187]],
  'split_chr12/xac': [[10962097, 11162840]],
  'split_chr12/xap': [[73338587, 73586502]],
  'split_chr12/xas': [[83579779, 83843102]],
  'split_chr17/xaf': [[21654230, 21823237]]},
 'PEL': {'split_chr1/xak': [[45628366, 46147211]],
  'split_chr2/xbo': [[186062200, 186331668]],
  'split_chr3/xav': [[98046342, 98217489]],
  'split_chr3/xba': [[120872663, 121116904]],
  'split_chr4/xaq': [[69225334, 69589122]],
  'split_chr6/xab': [[7345120, 7695065]],
  'split_chr8/xba': [[112137591, 112486758]],
  'split_chr10/xaq': [[68153912, 68481883]],
  'split_chr11/xaf': [[23168664, 23461651]],
  'split_chr11/xam': [[58630320, 58868016]],
  'split_chr12/xac': [[10962097, 11162840]],
  'split_chr12/xas': [[83579779, 83843102]],
  'split_chr14/xaa': [[19806333, 19976578]],
  'split_chr16/xab': [[5659954, 5828695]]},
 'PJL': {'split_chr12/xac': [[10962097, 11162840]],
  'split_chr14/xac': [[28519266, 28761965]],
  'split_chr14/xaa': [[19806333, 20078222]],
  'split_chr17/xak': [[52575391, 52861780]]},
 'PUR': {'split_chr3/xav': [[98003499, 98273693]],
  'split_chr3/xbj': [[163819938, 164063747]],
  'split_chr12/xac': [[10962097, 11205540]],
  'split_chr12/xan': [[61013177, 61213757]],
  'split_chr12/xas': [[83689105, 83928389]],
  'split_chr17/xaf': [[21654230, 21823237]]},
 'STU': {'split_chr3/xav': [[98046342, 98273693]],
  'split_chr7/xbc': [[124810550, 125070920]],
  'split_chr13/xaa': [[18347994, 18534139]],
  'split_chr13/xai': [[55102733, 55443491]],
  'split_chr14/xaa': [[19806333, 19976578]],
  'split_chr16/xaj': [[48512974, 48799209]],
  'split_chr19/xam': [[51801039, 52098595]]},
 'TSI': {'split_chr4/xaw': [[97892649, 98189302]],
  'split_chr4/xba': [[119293026, 119539168]],
  'split_chr7/xag': [[23951217, 24204307]],
  'split_chr7/xbc': [[124810550, 125070920]],
  'split_chr9/xai': [[31957257, 32172669]],
  'split_chr12/xac': [[10962097, 11162840]],
  'split_chr14/xaa': [[19806333, 19976578]],
  'split_chr17/xaf': [[21654230, 21823237]],
  'split_chr17/xaj': [[45836662, 46544765]]},
 'YRI': {'split_chr2/xau': [[89762579, 89852967]],
  'split_chr5/xaa': [[676694, 846549]],
  'split_chr7/xan': [[56467013, 56713542]],
  'split_chr7/xap': [[67044394, 67267582]],
  'split_chr9/xak': [[42796452, 42969270]],
  'split_chr13/xaa': [[18347994, 18534139]],
  'split_chr16/xao': [[72288822, 72684953]],
  'split_chr21/xaa': [[10605321, 10733724]]}}


non_mhc_similarity_region={'ACB': {},
 'All': {},
 'ASW': {},
 'BEB': {'split_chr3/xav': [[98046342, 98273693]]},
 'CDX': {'split_chr13/xai': [[55102733, 55443491]]},
 'CEU': {'split_chr17/xaj': [[45670177, 46418024]]},
 'CHB': {},
 'CHS': {'split_chr3/xav': [[98046342, 98273693]]},
 'CLM': {'split_chr17/xaj': [[45836662, 46224960]],
  'split_chr3/xav': [[98046342, 98273693]]},
 'ESN': {'split_chr5/xai': [[34264948, 34562837]]},
 'FIN': {},
 'GBR': {'split_chr1/xbt': [[226978066, 227352188]],
  'split_chr17/xaj': [[45670177, 46418024]],
  'split_chr3/xav': [[98046342, 98273693]]},
 'GIH': {'split_chr3/xav': [[98046342, 98276757]]},
 'GWD': {},
 'IBS': {'split_chr17/xaj': [[45670177, 46418024]],
  'split_chr3/xav': [[98046342, 98273693]]},
 'ITU': {'split_chr17/xak': [[52575391, 52889013]]},
 'JPT': {},
 'KHV': {},
 'LWK': {},
 'MSL': {'split_chr5/xai': [[34264948, 34562837]]},
 'MXL': {'split_chr1/xbt': [[227016487, 227352188]],
  'split_chr3/xav': [[98046342, 98273693]]},
 'PEL': {'split_chr11/xaf': [[23168664, 23564494]],
  'split_chr3/xav': [[98046342, 98276757]]},
 'PJL': {},
 'PUR': {'split_chr17/xaj': [[45836662, 46418024]],
  'split_chr3/xav': [[98046342, 98276757]]},
 'STU': {'split_chr13/xai': [[55102733, 55443491]],
  'split_chr3/xav': [[98046342, 98273693]]},
 'TSI': {'split_chr17/xaj': [[45670177, 46418024]]},
 'YRI': {}}

In [59]:
temp_non_mhc_complementary_region = non_mhc_complementary_region.copy()
temp_non_mhc_similarity_region=non_mhc_similarity_region.copy()

In [60]:
for race, region_dict in temp_non_mhc_complementary_region.items():
    if region_dict=={}:
        del non_mhc_similarity_region[race]
        del non_mhc_complementary_region[race]
for race, region_dict in temp_non_mhc_similarity_region.items():
    if region_dict=={}:
        del non_mhc_complementary_region[race]
        del non_mhc_similarity_region[race]

In [63]:
non_mhc_complementary_region

{'BEB': {'split_chr1/xbx': [[245939605, 246162003]],
  'split_chr2/xan': [[56405225, 56644106]],
  'split_chr3/xav': [[98046342, 98273693]],
  'split_chr3/xbf': [[145365797, 145612467]],
  'split_chr4/xaz': [[115247002, 115485969]],
  'split_chr13/xai': [[55102733, 55443491]],
  'split_chr14/xaa': [[19806333, 19976578]]},
 'CDX': {'split_chr1/xay': [[114816228, 115116610]],
  'split_chr2/xbd': [[130125757, 130339027]],
  'split_chr4/xba': [[119293026, 119539168]],
  'split_chr6/xao': [[64160994, 64523871]],
  'split_chr7/xaf': [[19734421, 19953255]],
  'split_chr9/xau': [[105652970, 105876427]],
  'split_chr10/xan': [[55664607, 55894887]],
  'split_chr12/xbc': [[131390693, 131619328]],
  'split_chr13/xai': [[55102733, 55354096]],
  'split_chr17/xag': [[31068982, 31384157]],
  'split_chr18/xan': [[65996302, 66233011]]},
 'CEU': {'split_chr1/xbe': [[158399387, 158641706]],
  'split_chr6/xac': [[8413699, 8649798]],
  'split_chr7/xbc': [[124810550, 125070920]],
  'split_chr11/xaf': [[24934

In [62]:
non_mhc_similarity_region

{'BEB': {'split_chr3/xav': [[98046342, 98273693]]},
 'CDX': {'split_chr13/xai': [[55102733, 55443491]]},
 'CEU': {'split_chr17/xaj': [[45670177, 46418024]]},
 'CHS': {'split_chr3/xav': [[98046342, 98273693]]},
 'CLM': {'split_chr17/xaj': [[45836662, 46224960]],
  'split_chr3/xav': [[98046342, 98273693]]},
 'ESN': {'split_chr5/xai': [[34264948, 34562837]]},
 'GBR': {'split_chr1/xbt': [[226978066, 227352188]],
  'split_chr17/xaj': [[45670177, 46418024]],
  'split_chr3/xav': [[98046342, 98273693]]},
 'GIH': {'split_chr3/xav': [[98046342, 98276757]]},
 'IBS': {'split_chr17/xaj': [[45670177, 46418024]],
  'split_chr3/xav': [[98046342, 98273693]]},
 'ITU': {'split_chr17/xak': [[52575391, 52889013]]},
 'MSL': {'split_chr5/xai': [[34264948, 34562837]]},
 'MXL': {'split_chr1/xbt': [[227016487, 227352188]],
  'split_chr3/xav': [[98046342, 98273693]]},
 'PEL': {'split_chr11/xaf': [[23168664, 23564494]],
  'split_chr3/xav': [[98046342, 98276757]]},
 'PUR': {'split_chr17/xaj': [[45836662, 46418024]

In [64]:
fileName = '../1000_population.tsv'


lines = []
categories = []
samplenames=[]

with open(fileName, 'r') as f:
    text = f.read()

lines = text.split('\n')
for l in lines:
    samplenames.append(l.split('\t')[0])
    categories.append(l.split('\t')[-1])
allcategories=sorted(list(set(categories)))


In [65]:
split_repeat_region=dict()
for a in allcategories:
    split_repeat_region[a]=dict()

In [70]:

for race, region_dict in non_mhc_similarity_region.items():
    print("race",race)
    for split_sim,region_arrs_sim in region_dict.items():        
        if split_sim in non_mhc_complementary_region[race]:
            print("split",split_sim)
            region_arrs_com=non_mhc_complementary_region[race][split_sim]
            split_repeat_region[race][split_sim]=find_overlap(region_arrs_sim,region_arrs_com)
        else:
            continue



race BEB
split split_chr3/xav
race CDX
split split_chr13/xai
race CEU
split split_chr17/xaj
race CHS
split split_chr3/xav
race CLM
split split_chr3/xav
race ESN
split split_chr5/xai
race GBR
split split_chr17/xaj
race GIH
split split_chr3/xav
race IBS
split split_chr17/xaj
race ITU
split split_chr17/xak
race MSL
race MXL
split split_chr3/xav
race PEL
split split_chr11/xaf
split split_chr3/xav
race PUR
split split_chr3/xav
race STU
split split_chr13/xai
split split_chr3/xav
race TSI
split split_chr17/xaj


In [71]:
split_repeat_region

{'ACB': {},
 'ASW': {},
 'BEB': {'split_chr3/xav': [(98046342, 98273693)]},
 'CDX': {'split_chr13/xai': [(55102733, 55354096)]},
 'CEU': {'split_chr17/xaj': [(45670177, 46418024)]},
 'CHB': {},
 'CHS': {'split_chr3/xav': [(98046342, 98245327)]},
 'CLM': {'split_chr3/xav': [(98046342, 98273693)]},
 'ESN': {'split_chr5/xai': [(34311870, 34562837)]},
 'FIN': {},
 'GBR': {'split_chr17/xaj': [(45670177, 46418024)]},
 'GIH': {'split_chr3/xav': [(98046342, 98276757)]},
 'GWD': {},
 'IBS': {'split_chr17/xaj': [(45836662, 46418024)]},
 'ITU': {'split_chr17/xak': [(52575391, 52889013)]},
 'JPT': {},
 'KHV': {},
 'LWK': {},
 'MSL': {},
 'MXL': {'split_chr3/xav': [(98046342, 98217489)]},
 'PEL': {'split_chr11/xaf': [(23168664, 23461651)],
  'split_chr3/xav': [(98046342, 98217489)]},
 'PJL': {},
 'PUR': {'split_chr3/xav': [(98046342, 98273693)]},
 'STU': {'split_chr13/xai': [(55102733, 55443491)],
  'split_chr3/xav': [(98046342, 98273693)]},
 'TSI': {'split_chr17/xaj': [(45836662, 46418024)]},
 'YR