Skip to content

Commit

Permalink
Merge pull request #171 from arvkevi/update_ezancestry
Browse files Browse the repository at this point in the history
Update ezancestry, works local
  • Loading branch information
apriha committed Mar 5, 2024
2 parents a385888 + 75f9a90 commit 1d4bbd6
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 39 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-latest]
python-version: ['3.8']
python-version: ['3.8', '3.9', '3.10', '3.11']

steps:
- uses: actions/checkout@v4
Expand Down
29 changes: 5 additions & 24 deletions src/snps/snps.py
Original file line number Diff line number Diff line change
Expand Up @@ -1848,11 +1848,6 @@ def predict_ancestry(
write_predictions=False,
models_directory=None,
aisnps_directory=None,
n_components=None,
k=None,
thousand_genomes_directory=None,
samples_directory=None,
algorithm=None,
aisnps_set=None,
):
"""Predict genetic ancestry for SNPs.
Expand All @@ -1875,14 +1870,10 @@ def predict_ancestry(
`population_code` (str)
max predicted population for the sample
`population_description` (str)
descriptive name of the population
`population_percent` (float)
predicted probability for the max predicted population
`superpopulation_code` (str)
max predicted super population (continental) for the sample
`superpopulation_description` (str)
descriptive name of the super population
`superpopulation_percent` (float)
predicted probability for the max predicted super population
`ezancestry_df` (pandas.DataFrame)
Expand All @@ -1891,16 +1882,15 @@ def predict_ancestry(
`component1`, `component2`, `component3`
The coordinates of the sample in the dimensionality-reduced component space. Can be
used as (x, y, z,) coordinates for plotting in a 3d scatter plot.
`predicted_population_population`
`predicted_ancestry_population`
The max predicted population for the sample.
`ACB`, `ASW`, `BEB`, `CDX`, `CEU`, `CHB`, `CHS`, `CLM`, `ESN`, `FIN`, `GBR`, `GIH`, `GWD`, `IBS`, `ITU`, `JPT`, `KHV`, `LWK`, `MSL`, `MXL`, `PEL`, `PJL`, `PUR`, `STU`, `TSI`, `YRI`
Predicted probabilities for each of the populations. These sum to 1.0.
`predicted_population_superpopulation`
`predicted_ancestry_superpopulation`
The max predicted super population (continental) for the sample.
`AFR`, `AMR`, `EAS`, `EUR`, `SAS`
Predicted probabilities for each of the super populations. These sum to 1.0.
`population_description`, `superpopulation_name`
Descriptive names of the population and super population.
"""
if not self.valid:
Expand All @@ -1914,19 +1904,15 @@ def predict_ancestry(
)

def max_pop(row):
popcode = row["predicted_population_population"]
popdesc = row["population_description"]
popcode = row["predicted_ancestry_population"]
poppct = row[popcode]
superpopcode = row["predicted_population_superpopulation"]
superpopdesc = row["superpopulation_name"]
superpopcode = row["predicted_ancestry_superpopulation"]
superpoppct = row[superpopcode]

return {
"population_code": popcode,
"population_description": popdesc,
"population_percent": poppct,
"superpopulation_code": superpopcode,
"superpopulation_description": superpopdesc,
"superpopulation_percent": superpoppct,
}

Expand All @@ -1936,11 +1922,6 @@ def max_pop(row):
write_predictions,
models_directory,
aisnps_directory,
n_components,
k,
thousand_genomes_directory,
samples_directory,
algorithm,
aisnps_set,
)

Expand Down
27 changes: 13 additions & 14 deletions tests/test_snps.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,15 +462,13 @@ def test_count_no_snps(self):
self.assertTrue(snps.snps.empty)

def _make_ancestry_assertions(self, d):
self.assertEqual(d["population_code"], "ITU")
self.assertEqual(d["population_description"], "Indian Telugu in the UK")
self.assertAlmostEqual(d["population_percent"], 0.2992757864426246)
self.assertEqual(d["superpopulation_code"], "SAS")
self.assertEqual(d["superpopulation_description"], "South Asian Ancestry")
self.assertAlmostEqual(d["superpopulation_percent"], 0.827977563875996)
self.assertTrue("predicted_population_population" in d["ezancestry_df"].keys())
self.assertEqual(d["population_code"], "PUR")
self.assertAlmostEqual(d["population_percent"], 0.2661437765657918)
self.assertEqual(d["superpopulation_code"], "AMR")
self.assertAlmostEqual(d["superpopulation_percent"], 0.9642857142857143)
self.assertTrue("predicted_ancestry_population" in d["ezancestry_df"].keys())
self.assertTrue(
"predicted_population_superpopulation" in d["ezancestry_df"].keys()
"predicted_ancestry_superpopulation" in d["ezancestry_df"].keys()
)

def test_ancestry(self):
Expand All @@ -496,12 +494,10 @@ def pop_modules(modules):
sys.modules["ezancestry.commands"].predict = Mock(
return_value=pd.DataFrame(
{
"predicted_population_population": ["ITU"],
"population_description": ["Indian Telugu in the UK"],
"ITU": [0.2992757864426246],
"predicted_population_superpopulation": ["SAS"],
"superpopulation_name": ["South Asian Ancestry"],
"SAS": [0.827977563875996],
"predicted_ancestry_population": ["PUR"],
"PUR": [0.2661437765657918],
"predicted_ancestry_superpopulation": ["AMR"],
"AMR": [0.9642857142857143],
}
)
)
Expand Down Expand Up @@ -1080,6 +1076,9 @@ def test_merge_chrom(self):

class TestDeprecatedMethods(TestSnps):
def run_deprecated_test(self, f, msg):
if pd.__version__ == "1.5.3":
self.skipTest("Skipping test for pandas version 1.5.3")

with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
f()
Expand Down

0 comments on commit 1d4bbd6

Please sign in to comment.