Merge pull request #171 from arvkevi/update_ezancestry

Update ezancestry, works local
apriha · Mar 5, 2024 · 1d4bbd6 · 1d4bbd6
2 parents a385888 + 75f9a90
commit 1d4bbd6
Show file tree

Hide file tree

Showing 3 changed files with 19 additions and 39 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -111,7 +111,7 @@ jobs:
     strategy:
       matrix:
         os: [ubuntu-latest]
-        python-version: ['3.8']
+        python-version: ['3.8', '3.9', '3.10', '3.11']
 
     steps:
     - uses: actions/checkout@v4

diff --git a/src/snps/snps.py b/src/snps/snps.py
@@ -1848,11 +1848,6 @@ def predict_ancestry(
         write_predictions=False,
         models_directory=None,
         aisnps_directory=None,
-        n_components=None,
-        k=None,
-        thousand_genomes_directory=None,
-        samples_directory=None,
-        algorithm=None,
         aisnps_set=None,
     ):
         """Predict genetic ancestry for SNPs.
@@ -1875,14 +1870,10 @@ def predict_ancestry(
 
             `population_code` (str)
               max predicted population for the sample
-            `population_description` (str)
-              descriptive name of the population
             `population_percent` (float)
               predicted probability for the max predicted population
             `superpopulation_code` (str)
               max predicted super population (continental) for the sample
-            `superpopulation_description` (str)
-              descriptive name of the super population
             `superpopulation_percent` (float)
               predicted probability for the max predicted super population
             `ezancestry_df` (pandas.DataFrame)
@@ -1891,16 +1882,15 @@ def predict_ancestry(
               `component1`, `component2`, `component3`
                 The coordinates of the sample in the dimensionality-reduced component space. Can be
                 used as (x, y, z,) coordinates for plotting in a 3d scatter plot.
-              `predicted_population_population`
+              `predicted_ancestry_population`
                 The max predicted population for the sample.
               `ACB`, `ASW`, `BEB`, `CDX`, `CEU`, `CHB`, `CHS`, `CLM`, `ESN`, `FIN`, `GBR`, `GIH`, `GWD`, `IBS`, `ITU`, `JPT`, `KHV`, `LWK`, `MSL`, `MXL`, `PEL`, `PJL`, `PUR`, `STU`, `TSI`, `YRI`
                 Predicted probabilities for each of the populations. These sum to 1.0.
-              `predicted_population_superpopulation`
+              `predicted_ancestry_superpopulation`
                 The max predicted super population (continental) for the sample.
               `AFR`, `AMR`, `EAS`, `EUR`, `SAS`
                 Predicted probabilities for each of the super populations. These sum to 1.0.
-              `population_description`, `superpopulation_name`
-                Descriptive names of the population and super population.
+
 
         """
         if not self.valid:
@@ -1914,19 +1904,15 @@ def predict_ancestry(
             )
 
         def max_pop(row):
-            popcode = row["predicted_population_population"]
-            popdesc = row["population_description"]
+            popcode = row["predicted_ancestry_population"]
             poppct = row[popcode]
-            superpopcode = row["predicted_population_superpopulation"]
-            superpopdesc = row["superpopulation_name"]
+            superpopcode = row["predicted_ancestry_superpopulation"]
             superpoppct = row[superpopcode]
 
             return {
                 "population_code": popcode,
-                "population_description": popdesc,
                 "population_percent": poppct,
                 "superpopulation_code": superpopcode,
-                "superpopulation_description": superpopdesc,
                 "superpopulation_percent": superpoppct,
             }
 
@@ -1936,11 +1922,6 @@ def max_pop(row):
             write_predictions,
             models_directory,
             aisnps_directory,
-            n_components,
-            k,
-            thousand_genomes_directory,
-            samples_directory,
-            algorithm,
             aisnps_set,
         )
 

diff --git a/tests/test_snps.py b/tests/test_snps.py
@@ -462,15 +462,13 @@ def test_count_no_snps(self):
             self.assertTrue(snps.snps.empty)
 
     def _make_ancestry_assertions(self, d):
-        self.assertEqual(d["population_code"], "ITU")
-        self.assertEqual(d["population_description"], "Indian Telugu in the UK")
-        self.assertAlmostEqual(d["population_percent"], 0.2992757864426246)
-        self.assertEqual(d["superpopulation_code"], "SAS")
-        self.assertEqual(d["superpopulation_description"], "South Asian Ancestry")
-        self.assertAlmostEqual(d["superpopulation_percent"], 0.827977563875996)
-        self.assertTrue("predicted_population_population" in d["ezancestry_df"].keys())
+        self.assertEqual(d["population_code"], "PUR")
+        self.assertAlmostEqual(d["population_percent"], 0.2661437765657918)
+        self.assertEqual(d["superpopulation_code"], "AMR")
+        self.assertAlmostEqual(d["superpopulation_percent"], 0.9642857142857143)
+        self.assertTrue("predicted_ancestry_population" in d["ezancestry_df"].keys())
         self.assertTrue(
-            "predicted_population_superpopulation" in d["ezancestry_df"].keys()
+            "predicted_ancestry_superpopulation" in d["ezancestry_df"].keys()
         )
 
     def test_ancestry(self):
@@ -496,12 +494,10 @@ def pop_modules(modules):
         sys.modules["ezancestry.commands"].predict = Mock(
             return_value=pd.DataFrame(
                 {
-                    "predicted_population_population": ["ITU"],
-                    "population_description": ["Indian Telugu in the UK"],
-                    "ITU": [0.2992757864426246],
-                    "predicted_population_superpopulation": ["SAS"],
-                    "superpopulation_name": ["South Asian Ancestry"],
-                    "SAS": [0.827977563875996],
+                    "predicted_ancestry_population": ["PUR"],
+                    "PUR": [0.2661437765657918],
+                    "predicted_ancestry_superpopulation": ["AMR"],
+                    "AMR": [0.9642857142857143],
                 }
             )
         )
@@ -1080,6 +1076,9 @@ def test_merge_chrom(self):
 
 class TestDeprecatedMethods(TestSnps):
     def run_deprecated_test(self, f, msg):
+        if pd.__version__ == "1.5.3":
+            self.skipTest("Skipping test for pandas version 1.5.3")
+
         with warnings.catch_warnings(record=True) as w:
             warnings.simplefilter("always")
             f()