From f98387dc2224a2e413254f8eb18376d8dbc125c6 Mon Sep 17 00:00:00 2001 From: Chris Little Date: Mon, 20 May 2019 21:43:29 -0700 Subject: [PATCH] formatting --- tests/distance/test_distance_dennis.py | 44 ++++-- tests/tokenizer/test_tokenizer__tokenizer.py | 139 ++++++++++--------- tests/tokenizer/test_tokenizer_q_grams.py | 28 ++-- 3 files changed, 118 insertions(+), 93 deletions(-) diff --git a/tests/distance/test_distance_dennis.py b/tests/distance/test_distance_dennis.py index 62ce2313a..ff352c586 100644 --- a/tests/distance/test_distance_dennis.py +++ b/tests/distance/test_distance_dennis.py @@ -51,7 +51,9 @@ def test_dennis_sim(self): self.assertAlmostEqual(self.cmp.sim('abc', ''), 0.3333333333333333) self.assertAlmostEqual(self.cmp.sim('', 'abc'), 0.3333333333333333) self.assertAlmostEqual(self.cmp.sim('abc', 'abc'), 0.9965986394557826) - self.assertAlmostEqual(self.cmp.sim('abcd', 'efgh'), 0.32908163265306134) + self.assertAlmostEqual( + self.cmp.sim('abcd', 'efgh'), 0.32908163265306134 + ) self.assertAlmostEqual(self.cmp.sim('Nigel', 'Niall'), 0.6615646259) self.assertAlmostEqual(self.cmp.sim('Niall', 'Nigel'), 0.6615646259) @@ -65,9 +67,15 @@ def test_dennis_sim(self): self.assertAlmostEqual(self.cmp_no_d.sim('', ''), 0.3333333333333333) self.assertAlmostEqual(self.cmp_no_d.sim('a', ''), 0.3333333333333333) self.assertAlmostEqual(self.cmp_no_d.sim('', 'a'), 0.3333333333333333) - self.assertAlmostEqual(self.cmp_no_d.sim('abc', ''), 0.3333333333333333) - self.assertAlmostEqual(self.cmp_no_d.sim('', 'abc'), 0.3333333333333333) - self.assertAlmostEqual(self.cmp_no_d.sim('abc', 'abc'), 0.3333333333333333) + self.assertAlmostEqual( + self.cmp_no_d.sim('abc', ''), 0.3333333333333333 + ) + self.assertAlmostEqual( + self.cmp_no_d.sim('', 'abc'), 0.3333333333333333 + ) + self.assertAlmostEqual( + self.cmp_no_d.sim('abc', 'abc'), 0.3333333333333333 + ) self.assertAlmostEqual(self.cmp_no_d.sim('abcd', 'efgh'), 0.0) self.assertAlmostEqual( @@ -94,8 +102,12 @@ def test_dennis_dist(self): self.assertAlmostEqual(self.cmp.dist('', 'a'), 0.6666666666666667) self.assertAlmostEqual(self.cmp.dist('abc', ''), 0.6666666666666667) self.assertAlmostEqual(self.cmp.dist('', 'abc'), 0.6666666666666667) - self.assertAlmostEqual(self.cmp.dist('abc', 'abc'), 0.003401360544217358) - self.assertAlmostEqual(self.cmp.dist('abcd', 'efgh'), 0.6709183673469387) + self.assertAlmostEqual( + self.cmp.dist('abc', 'abc'), 0.003401360544217358 + ) + self.assertAlmostEqual( + self.cmp.dist('abcd', 'efgh'), 0.6709183673469387 + ) self.assertAlmostEqual(self.cmp.dist('Nigel', 'Niall'), 0.3384353741) self.assertAlmostEqual(self.cmp.dist('Niall', 'Nigel'), 0.3384353741) @@ -109,9 +121,15 @@ def test_dennis_dist(self): self.assertAlmostEqual(self.cmp_no_d.dist('', ''), 0.6666666666666667) self.assertAlmostEqual(self.cmp_no_d.dist('a', ''), 0.6666666666666667) self.assertAlmostEqual(self.cmp_no_d.dist('', 'a'), 0.6666666666666667) - self.assertAlmostEqual(self.cmp_no_d.dist('abc', ''), 0.6666666666666667) - self.assertAlmostEqual(self.cmp_no_d.dist('', 'abc'), 0.6666666666666667) - self.assertAlmostEqual(self.cmp_no_d.dist('abc', 'abc'), 0.6666666666666667) + self.assertAlmostEqual( + self.cmp_no_d.dist('abc', ''), 0.6666666666666667 + ) + self.assertAlmostEqual( + self.cmp_no_d.dist('', 'abc'), 0.6666666666666667 + ) + self.assertAlmostEqual( + self.cmp_no_d.dist('abc', 'abc'), 0.6666666666666667 + ) self.assertAlmostEqual(self.cmp_no_d.dist('abcd', 'efgh'), 1.0) self.assertAlmostEqual( @@ -138,7 +156,9 @@ def test_dennis_sim_score(self): self.assertEqual(self.cmp.sim_score('', 'a'), 0.0) self.assertEqual(self.cmp.sim_score('abc', ''), 0.0) self.assertEqual(self.cmp.sim_score('', 'abc'), 0.0) - self.assertAlmostEqual(self.cmp.sim_score('abc', 'abc'), 27.85714285714286) + self.assertAlmostEqual( + self.cmp.sim_score('abc', 'abc'), 27.85714285714286 + ) self.assertAlmostEqual( self.cmp.sim_score('abcd', 'efgh'), -0.17857142857142858 ) @@ -187,7 +207,9 @@ def test_dennis_corr(self): self.assertEqual(self.cmp.corr('abc', ''), 0.0) self.assertEqual(self.cmp.corr('', 'abc'), 0.0) self.assertAlmostEqual(self.cmp.corr('abc', 'abc'), 0.994897959183674) - self.assertAlmostEqual(self.cmp.corr('abcd', 'efgh'), -0.006377551020408) + self.assertAlmostEqual( + self.cmp.corr('abcd', 'efgh'), -0.006377551020408 + ) self.assertAlmostEqual(self.cmp.corr('Nigel', 'Niall'), 0.4923469388) self.assertAlmostEqual(self.cmp.corr('Niall', 'Nigel'), 0.4923469388) diff --git a/tests/tokenizer/test_tokenizer__tokenizer.py b/tests/tokenizer/test_tokenizer__tokenizer.py index c2ae01d92..3403f2b08 100644 --- a/tests/tokenizer/test_tokenizer__tokenizer.py +++ b/tests/tokenizer/test_tokenizer__tokenizer.py @@ -112,75 +112,76 @@ def test__tokenizer(self): nelson_log = QSkipgrams(qval=3, scaler=log1p).tokenize('NELSON') gold_standard = Counter( - { - '$$N': 1.0986122886681096, - '$$E': 0.6931471805599453, - '$$L': 0.6931471805599453, - '$$S': 0.6931471805599453, - '$$O': 0.6931471805599453, - '$$#': 1.0986122886681096, - '$NE': 1.0986122886681096, - '$NL': 1.0986122886681096, - '$NS': 1.0986122886681096, - '$NO': 1.0986122886681096, - '$NN': 1.0986122886681096, - '$N#': 2.1972245773362196, - '$EL': 1.0986122886681096, - '$ES': 1.0986122886681096, - '$EO': 1.0986122886681096, - '$EN': 1.0986122886681096, - '$E#': 1.6094379124341003, - '$LS': 1.0986122886681096, - '$LO': 1.0986122886681096, - '$LN': 1.0986122886681096, - '$L#': 1.6094379124341003, - '$SO': 1.0986122886681096, - '$SN': 1.0986122886681096, - '$S#': 1.6094379124341003, - '$ON': 1.0986122886681096, - '$O#': 1.6094379124341003, - '$##': 1.0986122886681096, - 'NEL': 0.6931471805599453, - 'NES': 0.6931471805599453, - 'NEO': 0.6931471805599453, - 'NEN': 0.6931471805599453, - 'NE#': 1.0986122886681096, - 'NLS': 0.6931471805599453, - 'NLO': 0.6931471805599453, - 'NLN': 0.6931471805599453, - 'NL#': 1.0986122886681096, - 'NSO': 0.6931471805599453, - 'NSN': 0.6931471805599453, - 'NS#': 1.0986122886681096, - 'NON': 0.6931471805599453, - 'NO#': 1.0986122886681096, - 'NN#': 1.0986122886681096, - 'N##': 1.0986122886681096, - 'ELS': 0.6931471805599453, - 'ELO': 0.6931471805599453, - 'ELN': 0.6931471805599453, - 'EL#': 1.0986122886681096, - 'ESO': 0.6931471805599453, - 'ESN': 0.6931471805599453, - 'ES#': 1.0986122886681096, - 'EON': 0.6931471805599453, - 'EO#': 1.0986122886681096, - 'EN#': 1.0986122886681096, - 'E##': 0.6931471805599453, - 'LSO': 0.6931471805599453, - 'LSN': 0.6931471805599453, - 'LS#': 1.0986122886681096, - 'LON': 0.6931471805599453, - 'LO#': 1.0986122886681096, - 'LN#': 1.0986122886681096, - 'L##': 0.6931471805599453, - 'SON': 0.6931471805599453, - 'SO#': 1.0986122886681096, - 'SN#': 1.0986122886681096, - 'S##': 0.6931471805599453, - 'ON#': 1.0986122886681096, - 'O##': 0.6931471805599453, - }) + { + '$$N': 1.0986122886681096, + '$$E': 0.6931471805599453, + '$$L': 0.6931471805599453, + '$$S': 0.6931471805599453, + '$$O': 0.6931471805599453, + '$$#': 1.0986122886681096, + '$NE': 1.0986122886681096, + '$NL': 1.0986122886681096, + '$NS': 1.0986122886681096, + '$NO': 1.0986122886681096, + '$NN': 1.0986122886681096, + '$N#': 2.1972245773362196, + '$EL': 1.0986122886681096, + '$ES': 1.0986122886681096, + '$EO': 1.0986122886681096, + '$EN': 1.0986122886681096, + '$E#': 1.6094379124341003, + '$LS': 1.0986122886681096, + '$LO': 1.0986122886681096, + '$LN': 1.0986122886681096, + '$L#': 1.6094379124341003, + '$SO': 1.0986122886681096, + '$SN': 1.0986122886681096, + '$S#': 1.6094379124341003, + '$ON': 1.0986122886681096, + '$O#': 1.6094379124341003, + '$##': 1.0986122886681096, + 'NEL': 0.6931471805599453, + 'NES': 0.6931471805599453, + 'NEO': 0.6931471805599453, + 'NEN': 0.6931471805599453, + 'NE#': 1.0986122886681096, + 'NLS': 0.6931471805599453, + 'NLO': 0.6931471805599453, + 'NLN': 0.6931471805599453, + 'NL#': 1.0986122886681096, + 'NSO': 0.6931471805599453, + 'NSN': 0.6931471805599453, + 'NS#': 1.0986122886681096, + 'NON': 0.6931471805599453, + 'NO#': 1.0986122886681096, + 'NN#': 1.0986122886681096, + 'N##': 1.0986122886681096, + 'ELS': 0.6931471805599453, + 'ELO': 0.6931471805599453, + 'ELN': 0.6931471805599453, + 'EL#': 1.0986122886681096, + 'ESO': 0.6931471805599453, + 'ESN': 0.6931471805599453, + 'ES#': 1.0986122886681096, + 'EON': 0.6931471805599453, + 'EO#': 1.0986122886681096, + 'EN#': 1.0986122886681096, + 'E##': 0.6931471805599453, + 'LSO': 0.6931471805599453, + 'LSN': 0.6931471805599453, + 'LS#': 1.0986122886681096, + 'LON': 0.6931471805599453, + 'LO#': 1.0986122886681096, + 'LN#': 1.0986122886681096, + 'L##': 0.6931471805599453, + 'SON': 0.6931471805599453, + 'SO#': 1.0986122886681096, + 'SN#': 1.0986122886681096, + 'S##': 0.6931471805599453, + 'ON#': 1.0986122886681096, + 'O##': 0.6931471805599453, + } + ) test_counter = nelson_log.get_counter() for key in test_counter: self.assertAlmostEqual(test_counter[key], gold_standard[key]) diff --git a/tests/tokenizer/test_tokenizer_q_grams.py b/tests/tokenizer/test_tokenizer_q_grams.py index 83e0ecac1..af5995563 100644 --- a/tests/tokenizer/test_tokenizer_q_grams.py +++ b/tests/tokenizer/test_tokenizer_q_grams.py @@ -338,19 +338,21 @@ def test_qgrams_counts(self): ) gold_standard = Counter( - { - '$A': 0.6931471805599453, - 'AC': 1.3862943611198906, - 'CA': 1.0986122886681096, - 'AA': 0.6931471805599453, - 'CC': 0.6931471805599453, - 'CT': 0.6931471805599453, - 'TA': 0.6931471805599453, - 'AG': 0.6931471805599453, - 'G#': 0.6931471805599453, - } - ) - test_counter = QGrams(scaler=log1p).tokenize('ACAACACCTAG').get_counter() + { + '$A': 0.6931471805599453, + 'AC': 1.3862943611198906, + 'CA': 1.0986122886681096, + 'AA': 0.6931471805599453, + 'CC': 0.6931471805599453, + 'CT': 0.6931471805599453, + 'TA': 0.6931471805599453, + 'AG': 0.6931471805599453, + 'G#': 0.6931471805599453, + } + ) + test_counter = ( + QGrams(scaler=log1p).tokenize('ACAACACCTAG').get_counter() + ) for key in test_counter: self.assertAlmostEqual(test_counter[key], gold_standard[key])