-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
28 changed files
with
2,263 additions
and
1,810 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,21 +1,21 @@ | ||
Name Xref NumVars Chrom OffsetsHg37 OffsetsHg38 VarRef | ||
mh02ZHA-012 3 chr2 "rs949778,rs867005,rs952210" | ||
mh03ZHA-001 4 chr3 "rs4858685,rs4858686,rs75773180,rs9838878" | ||
mh04ZHA-001 4 chr4 "rs6830692,rs9714725,rs12501341,rs10939388" | ||
mh04ZHA-002 4 chr4 "rs10939597,rs79276692,rs62409414,rs62409415" | ||
mh04ZHA-004 4 chr4 "rs10049992,rs1914740,rs1714017,rs6835177" | ||
mh04ZHA-007 3 chr4 "rs6819048,rs62308082,rs74383997" | ||
mh05ZHA-004 3 chr5 "rs2457087,rs2644662,rs2662178" | ||
mh07ZHA-003 4 chr7 "rs4724041,rs378367,rs433709,rs404569" | ||
mh07ZHA-004 3 chr7 "rs6971410,rs2971679,rs3808323" | ||
mh07ZHA-009 4 chr7 "rs144858626,rs149890778,rs11773043,rs7792859" | ||
mh08ZHA-011 4 chr8 "rs4831247,rs13265601,rs4831248,rs13268053" | ||
mh09ZHA-008 3 chr9 "rs11506774,rs10981667,rs10739387" | ||
mh10ZHA-002 4 chr10 "rs10764175,rs148665640,rs10827896,rs10827897" | ||
mh11ZHA-006a 4 chr11 "rs3809057,rs3809056,rs3809055,rs3809054" | ||
mh14ZHA-003 3 chr14 "rs4902946,rs8012670,rs4902947" | ||
mh16ZHA-009 4 chr16 "rs76047588,rs11641186,rs11641193,rs80213582" | ||
mh17ZHA-001 3 chr17 "rs56023444,rs4131415,rs4260117" | ||
mh19ZHA-007 4 chr19 "rs8106726,rs8102417,rs59490836,rs10406130" | ||
mh19ZHA-009 5 chr19 "rs74178308,rs8108729,rs8107824,rs8108835,rs2560950" | ||
mh22ZHA-008 3 chr22 "rs11568183,rs8142282,rs8136173" | ||
Name Xref NumVars Chrom OffsetsHg37 OffsetsHg38 VarRef | ||
mh02ZHA-012 3 chr2 "rs949778,rs867005,rs952210" | ||
mh03ZHA-001 4 chr3 "rs4858685,rs4858686,rs75773180,rs9838878" | ||
mh04ZHA-001 4 chr4 "rs6830692,rs9714725,rs12501341,rs10939388" | ||
mh04ZHA-002 4 chr4 "rs10939597,rs79276692,rs62409414,rs62409415" | ||
mh04ZHA-004 4 chr4 "rs10049992,rs1914740,rs1714017,rs6835177" | ||
mh04ZHA-007 3 chr4 "rs6819048,rs62308082,rs74383997" | ||
mh05ZHA-004 3 chr5 "rs2457087,rs2644662,rs2662178" | ||
mh07ZHA-003 4 chr7 "rs4724041,rs378367,rs433709,rs404569" | ||
mh07ZHA-004 3 chr7 "rs6971410,rs2971679,rs3808323" | ||
mh07ZHA-009 4 chr7 "rs144858626,rs149890778,rs11773043,rs7792859" | ||
mh08ZHA-011 4 chr8 "rs4831247,rs13265601,rs4831248,rs13268053" | ||
mh09ZHA-008 3 chr9 "rs11506774,rs10981667,rs10739387" | ||
mh10ZHA-002 4 chr10 "rs10764175,rs148665640,rs10827896,rs10827897" | ||
mh11ZHA-006a 4 chr11 "rs3809057,rs3809056,rs3809055,rs3809054" | ||
mh14ZHA-003 3 chr14 "rs4902946,rs8012670,rs4902947" | ||
mh16ZHA-009 4 chr16 "rs76047588,rs11641186,rs11641193,rs80213582" | ||
mh17ZHA-001 3 chr17 "rs56023444,rs4131415,rs4260117" | ||
mh19ZHA-007 4 chr19 "rs8106726,rs8102417,rs59490836,rs10406130" | ||
mh19ZHA-009 5 chr19 "rs74178308,rs8108729,rs8107824,rs8108835,rs2560950" | ||
mh22ZHA-008 3 chr22 "rs11568183,rs8142282,rs8136173" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,58 +1,102 @@ | ||
#!/usr/bin/env python3 | ||
# ------------------------------------------------------------------------------------------------- | ||
# Copyright (c) 2018, DHS. | ||
# | ||
# ----------------------------------------------------------------------------- | ||
# Copyright (c) 2018, Battelle National Biodefense Institute. | ||
# This file is part of MicroHapDB (http://github.com/bioforensics/MicroHapDB) and is licensed under | ||
# the BSD license: see LICENSE.txt. | ||
# | ||
# This file is part of MicroHapDB (http://github.com/bioforensics/microhapdb) | ||
# and is licensed under the BSD license: see LICENSE.txt. | ||
# ----------------------------------------------------------------------------- | ||
# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National | ||
# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the | ||
# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and | ||
# Development Center. | ||
# ------------------------------------------------------------------------------------------------- | ||
|
||
|
||
from microhapdb.util import data_file | ||
from .tables import markers, populations, frequencies, variantmap, idmap, sequences, indels | ||
from .population import Population | ||
from .marker import Marker | ||
from microhapdb import cli | ||
from microhapdb import retrieve | ||
from microhapdb import marker | ||
from microhapdb import panel | ||
from microhapdb import population | ||
import os | ||
import pandas as pd | ||
from pkg_resources import resource_filename | ||
import pandas | ||
from ._version import get_versions | ||
__version__ = get_versions()['version'] | ||
|
||
__version__ = get_versions()["version"] | ||
del get_versions | ||
|
||
|
||
def data_file(path): | ||
return resource_filename("microhapdb", f"data/{path}") | ||
|
||
|
||
def set_ae_population(popid=None): | ||
global markers | ||
columns = ['Name', 'PermID', 'Reference', 'Chrom', 'Offsets', 'Ae', 'In', 'Fst', 'Source'] | ||
columns = ["Name", "PermID", "Reference", "Chrom", "Offsets", "Ae", "In", "Fst", "Source"] | ||
if popid is None: | ||
defaults = pandas.read_csv(data_file('marker.tsv'), sep='\t') | ||
defaults = defaults[['Name', 'Ae']] | ||
markers = markers.drop(columns=['Ae']).join(defaults.set_index('Name'), on='Name')[columns] | ||
defaults = pd.read_csv(data_file("marker.tsv"), sep="\t") | ||
defaults = defaults[["Name", "Ae"]] | ||
markers = markers.drop(columns=["Ae"]).join(defaults.set_index("Name"), on="Name")[columns] | ||
else: | ||
aes = pandas.read_csv(data_file('marker-aes.tsv'), sep='\t') | ||
aes = pd.read_csv(data_file("marker-aes.tsv"), sep="\t") | ||
if popid not in aes.Population.unique(): | ||
raise ValueError(f'no Ae data for population "{popid}"') | ||
popaes = aes[aes.Population == popid].drop(columns=['Population']) | ||
markers = markers.drop(columns=['Ae']).join(popaes.set_index('Marker'), on='Name')[columns] | ||
popaes = aes[aes.Population == popid].drop(columns=["Population"]) | ||
markers = markers.drop(columns=["Ae"]).join(popaes.set_index("Marker"), on="Name")[columns] | ||
|
||
|
||
def set_reference(refr): | ||
global markers | ||
assert refr in (37, 38) | ||
columns = ['Name', 'PermID', 'Reference', 'Chrom', 'Offsets', 'Ae', 'In', 'Fst', 'Source'] | ||
columns = ["Name", "PermID", "Reference", "Chrom", "Offsets", "Ae", "In", "Fst", "Source"] | ||
if refr == 38: | ||
defaults = pandas.read_csv(data_file('marker.tsv'), sep='\t')[['Name', 'Reference', 'Offsets']] | ||
markers = markers.drop(columns=['Reference', 'Offsets']).join(defaults.set_index('Name'), on='Name')[columns] | ||
defaults = pd.read_csv(data_file("marker.tsv"), sep="\t")[["Name", "Reference", "Offsets"]] | ||
markers = markers.drop(columns=["Reference", "Offsets"]).join( | ||
defaults.set_index("Name"), on="Name" | ||
)[columns] | ||
else: | ||
o37 = pandas.read_csv(data_file('marker-offsets-GRCh37.tsv'), sep='\t') | ||
markers = markers.drop(columns=['Reference', 'Offsets']).join(o37.set_index('Marker'), on='Name')[columns] | ||
o37 = pd.read_csv(data_file("marker-offsets-GRCh37.tsv"), sep="\t") | ||
markers = markers.drop(columns=["Reference", "Offsets"]).join( | ||
o37.set_index("Marker"), on="Name" | ||
)[columns] | ||
|
||
|
||
markers = pandas.read_csv(data_file('marker.tsv'), sep='\t') | ||
populations = pandas.read_csv(data_file('population.tsv'), sep='\t') | ||
frequencies = pandas.read_csv(data_file('frequency.tsv'), sep='\t') | ||
variantmap = pandas.read_csv(data_file('variantmap.tsv'), sep='\t') | ||
idmap = pandas.read_csv(data_file('idmap.tsv'), sep='\t') | ||
sequences = pandas.read_csv(data_file('sequences.tsv'), sep='\t') | ||
indels = pandas.read_csv(data_file('indels.tsv'), sep='\t') | ||
def retrieve_by_id(ident): | ||
"""Retrieve records by name or identifier | ||
>>> retrieve_by_id("mh17KK-014") | ||
Name PermID Reference Chrom Offsets Ae In Fst Source | ||
510 mh17KK-014 MHDBM-83a239de GRCh38 chr17 4497060,4497088,4497096 2.0215 0.6423 0.3014 ALFRED | ||
>>> retrieve_by_id("SI664726F") | ||
Name PermID Reference Chrom Offsets Ae In Fst Source | ||
510 mh17KK-014 MHDBM-83a239de GRCh38 chr17 4497060,4497088,4497096 2.0215 0.6423 0.3014 ALFRED | ||
>>> retrieve_by_id("MHDBM-ea520d26") | ||
Name PermID Reference Chrom Offsets Ae In Fst Source | ||
539 mh18KK-285 MHDBM-ea520d26 GRCh38 chr18 24557354,24557431,24557447,24557489 2.7524 0.1721 0.0836 ALFRED | ||
>>> retrieve_by_id("PJL") | ||
ID Name Source | ||
82 PJL Punjabi from Lahore, Pakistan 1KGP | ||
>>> retrieve_by_id("Asia") | ||
ID Name Source | ||
7 MHDBP-936bc36f79 Asia 10.1016/j.fsigen.2018.05.008 | ||
>>> retrieve_by_id("Japanese") | ||
ID Name Source | ||
45 MHDBP-63967b883e Japanese 10.1016/j.legalmed.2015.06.003 | ||
46 SA000010B Japanese ALFRED | ||
""" | ||
|
||
def id_in_series(ident, series): | ||
return series.str.contains(ident).any() | ||
|
||
if id_in_series(ident, idmap.Xref): | ||
result = idmap[idmap.Xref == ident] | ||
assert len(result) == 1 | ||
ident = result.ID.iloc[0] | ||
id_in_pop_ids = id_in_series(ident, populations.ID) | ||
id_in_pop_names = id_in_series(ident, populations.Name) | ||
id_in_variants = id_in_series(ident, variantmap.Variant) | ||
id_in_marker_names = id_in_series(ident, markers.Name) | ||
id_in_marker_permids = id_in_series(ident, markers.PermID) | ||
if id_in_pop_ids or id_in_pop_names: | ||
return Population.table_from_ids([ident]) | ||
elif id_in_variants or id_in_marker_names or id_in_marker_permids: | ||
return Marker.table_from_ids([ident]) | ||
else: | ||
raise ValueError(f'identifier "{ident}" not found in MicroHapDB') |
Oops, something went wrong.