From 34fba645ff35cd391c87f01a8c394aa4a2f3254f Mon Sep 17 00:00:00 2001
From: Elias Dabbas <eliasdabbas@gmail.com>
Date: Tue, 2 Apr 2024 23:52:19 +0300
Subject: [PATCH] Reformat and lint test_extract.py

---
 tests/test_extract.py | 673 ++++++++++++++++++++++++++++--------------
 1 file changed, 452 insertions(+), 221 deletions(-)

diff --git a/tests/test_extract.py b/tests/test_extract.py
index 535defd3..5de3f411 100644
--- a/tests/test_extract.py
+++ b/tests/test_extract.py
@@ -1,120 +1,250 @@
 import pytest
 
-# from advertools.extract import *  (extract, extract_currency, extract_emoji,
-#                                 extract_exclamations, extract_hashtags,
-#                                 extract_intense_words, extract_mentions,
-#                                 extract_questions, extract_words, extract_urls)
-from advertools.emoji import *
-
-number_posts = ['before123,000', '123after', 'comma 123,456', 'dot 123.234.3',
-                'skip 123-', 'nothing', 'two 123 456,789']
+from advertools.emoji import extract_emoji
+from advertools.extract import (
+    extract,
+    extract_currency,
+    extract_exclamations,
+    extract_hashtags,
+    extract_intense_words,
+    extract_mentions,
+    extract_numbers,
+    extract_questions,
+    extract_urls,
+    extract_words,
+)
+
+number_posts = [
+    "before123,000",
+    "123after",
+    "comma 123,456",
+    "dot 123.234.3",
+    "skip 123-",
+    "nothing",
+    "two 123 456,789",
+]
 
 number_summary = extract_numbers(number_posts)
 
-number_test_keys = ['numbers', 'numbers_flat', 'number_counts',
-                     'number_freq', 'top_numbers', 'overview']
+number_test_keys = [
+    "numbers",
+    "numbers_flat",
+    "number_counts",
+    "number_freq",
+    "top_numbers",
+    "overview",
+]
 
-mention_posts = ['hello @name', 'email@domain.com', '@oneword',
-                 'hi @nam-e and @name', '@first @last', 'an @under_score',
-                 'a @dot.one', 'non latin @مرحبا', 'other at ＠sign',
-                 '@one.@two three', 'number @123text', '@_before @after_']
+mention_posts = [
+    "hello @name",
+    "email@domain.com",
+    "@oneword",
+    "hi @nam-e and @name",
+    "@first @last",
+    "an @under_score",
+    "a @dot.one",
+    "non latin @مرحبا",
+    "other at ＠sign",
+    "@one.@two three",
+    "number @123text",
+    "@_before @after_",
+]
 
 mention_summary = extract_mentions(mention_posts)
 
-mention_test_keys = ['mentions', 'mentions_flat', 'mention_counts',
-                     'mention_freq', 'top_mentions', 'overview']
+mention_test_keys = [
+    "mentions",
+    "mentions_flat",
+    "mention_counts",
+    "mention_freq",
+    "top_mentions",
+    "overview",
+]
 
-hashtag_posts = ['hello #name', 'email#domain.com', '#oneword',
-                 'hi #nam-e and #name', '#first #last', 'an #under_score',
-                 'a #dot.one', 'non latin #مرحبا', 'other hash ＃sign',
-                 '#one.#two three', 'number #123text', '#_before #after_']
+hashtag_posts = [
+    "hello #name",
+    "email#domain.com",
+    "#oneword",
+    "hi #nam-e and #name",
+    "#first #last",
+    "an #under_score",
+    "a #dot.one",
+    "non latin #مرحبا",
+    "other hash ＃sign",
+    "#one.#two three",
+    "number #123text",
+    "#_before #after_",
+]
 
 hashtag_summary = extract_hashtags(hashtag_posts)
 
-hashtag_test_keys = ['hashtags', 'hashtags_flat', 'hashtag_counts',
-                     'hashtag_freq', 'top_hashtags', 'overview']
+hashtag_test_keys = [
+    "hashtags",
+    "hashtags_flat",
+    "hashtag_counts",
+    "hashtag_freq",
+    "top_hashtags",
+    "overview",
+]
 
-emoji_posts = ['one smiley 😀', 'one smiley 😀 one wink 😉', 'no emoji']
+emoji_posts = ["one smiley 😀", "one smiley 😀 one wink 😉", "no emoji"]
 
 emoji_summary = extract_emoji(emoji_posts)
 
-emoji_test_keys = ['emoji', 'emoji_text', 'emoji_flat', 'emoji_flat_text',
-                   'emoji_counts', 'emoji_freq', 'top_emoji', 'top_emoji_text',
-                   'top_emoji_groups', 'top_emoji_sub_groups', 'overview']
+emoji_test_keys = [
+    "emoji",
+    "emoji_text",
+    "emoji_flat",
+    "emoji_flat_text",
+    "emoji_counts",
+    "emoji_freq",
+    "top_emoji",
+    "top_emoji_text",
+    "top_emoji_groups",
+    "top_emoji_sub_groups",
+    "overview",
+]
 
-word_posts = ['today it is raining', 'i like rain and raining',
-              'today it is snowing', 'now snowing and raining',
-              'training is good with snow', 'RaIn and SNoW',
-              'none of the words', '@rain, #snow rain']
+word_posts = [
+    "today it is raining",
+    "i like rain and raining",
+    "today it is snowing",
+    "now snowing and raining",
+    "training is good with snow",
+    "RaIn and SNoW",
+    "none of the words",
+    "@rain, #snow rain",
+]
 
-word_summary_full = extract_words(word_posts, ['rain', 'snow'], True)
+word_summary_full = extract_words(word_posts, ["rain", "snow"], True)
 
-word_summary_not_full = extract_words(word_posts, ['rain', 'snow'], False)
+word_summary_not_full = extract_words(word_posts, ["rain", "snow"], False)
 
-word_test_keys = ['words', 'words_flat', 'word_counts',
-                  'word_freq', 'top_words', 'overview']
+word_test_keys = [
+    "words",
+    "words_flat",
+    "word_counts",
+    "word_freq",
+    "top_words",
+    "overview",
+]
 
-currency_posts = ['$5.0 beginning', 'mid £5.0 price', 'end of str €',
-                  'more $ than £ one', 'other symbols ₣ ₤ ₥ ₦ ₧',
-                  'no symbols']
+currency_posts = [
+    "$5.0 beginning",
+    "mid £5.0 price",
+    "end of str €",
+    "more $ than £ one",
+    "other symbols ₣ ₤ ₥ ₦ ₧",
+    "no symbols",
+]
 
 currency_summary = extract_currency(currency_posts)
 
-currency_test_keys = ['currency_symbols', 'currency_symbols_flat',
-                      'currency_symbol_counts', 'currency_symbol_freq',
-                      'top_currency_symbols', 'overview',
-                      'currency_symbol_names', 'surrounding_text']
+currency_test_keys = [
+    "currency_symbols",
+    "currency_symbols_flat",
+    "currency_symbol_counts",
+    "currency_symbol_freq",
+    "top_currency_symbols",
+    "overview",
+    "currency_symbol_names",
+    "surrounding_text",
+]
 
-intense_word_posts = ['i looooove this', 'goooood mooorning',
-                      'normal text', 'in thhhhhhe middle',
-                      'innnnnn the beginning', 'at the end!!!!!!']
+intense_word_posts = [
+    "i looooove this",
+    "goooood mooorning",
+    "normal text",
+    "in thhhhhhe middle",
+    "innnnnn the beginning",
+    "at the end!!!!!!",
+]
 
 intense_word_summary = extract_intense_words(intense_word_posts)
 
-intense_word_test_keys = ['intense_words', 'intense_words_flat',
-                          'intense_word_counts', 'intense_word_freq',
-                          'top_intense_words', 'overview']
+intense_word_test_keys = [
+    "intense_words",
+    "intense_words_flat",
+    "intense_word_counts",
+    "intense_word_freq",
+    "top_intense_words",
+    "overview",
+]
 
-question_posts = ['how are you?', 'no question', 'no! what about you?',
-                  'Hola, ¿cómo estás?', 'Πώς είσαι;']
+question_posts = [
+    "how are you?",
+    "no question",
+    "no! what about you?",
+    "Hola, ¿cómo estás?",
+    "Πώς είσαι;",
+]
 
 question_summary = extract_questions(question_posts)
 
-question_test_keys = ['question_marks', 'question_marks_flat',
-                      'question_mark_counts', 'question_mark_freq',
-                      'top_question_marks', 'overview',
-                      'question_mark_names', 'question_text']
+question_test_keys = [
+    "question_marks",
+    "question_marks_flat",
+    "question_mark_counts",
+    "question_mark_freq",
+    "top_question_marks",
+    "overview",
+    "question_mark_names",
+    "question_text",
+]
 
-exclamation_posts = ['how dare you!', 'no exclamation', 'no! do not do this!',
-                     '¡Hola!  ¿cómo estás?', 'مرحبا. لا تذهب!']
+exclamation_posts = [
+    "how dare you!",
+    "no exclamation",
+    "no! do not do this!",
+    "¡Hola!  ¿cómo estás?",
+    "مرحبا. لا تذهب!",
+]
 
 exclamation_summary = extract_exclamations(exclamation_posts)
 
-exclamation_test_keys = ['exclamation_marks', 'exclamation_marks_flat',
-                         'exclamation_mark_counts', 'exclamation_mark_freq',
-                         'top_exclamation_marks', 'overview',
-                         'exclamation_mark_names', 'exclamation_text']
+exclamation_test_keys = [
+    "exclamation_marks",
+    "exclamation_marks_flat",
+    "exclamation_mark_counts",
+    "exclamation_mark_freq",
+    "top_exclamation_marks",
+    "overview",
+    "exclamation_mark_names",
+    "exclamation_text",
+]
 
-url_posts = ['one https://www.a.com', 'two www.a.com www.b.com',
-             'nothing', 'long https://example.com/one?a=b#nothing']
+url_posts = [
+    "one https://www.a.com",
+    "two www.a.com www.b.com",
+    "nothing",
+    "long https://example.com/one?a=b#nothing",
+]
 
 url_summary = extract_urls(url_posts)
 
-url_test_keys = ['urls', 'urls_flat', 'url_counts', 'url_freq',
-                 'top_urls', 'overview', 'top_domains', 'top_tlds']
+url_test_keys = [
+    "urls",
+    "urls_flat",
+    "url_counts",
+    "url_freq",
+    "top_urls",
+    "overview",
+    "top_domains",
+    "top_tlds",
+]
 
 test_ids = [
-    'currency',
-    'emoji',
-    'exclamation',
-    'hashtag',
-    'intense',
-    'mention',
-    'number',
-    'question',
-    'word_full',
-    'word_not_full',
-    'url',
+    "currency",
+    "emoji",
+    "exclamation",
+    "hashtag",
+    "intense",
+    "mention",
+    "number",
+    "question",
+    "word_full",
+    "word_not_full",
+    "url",
 ]
 
 
@@ -133,185 +263,285 @@
 ]
 
 
-@pytest.mark.parametrize('summary_key, test_key',
-                         zip([x[0] for x in summaries_keys],
-                             [x[1] for x in summaries_keys]),
-                         ids=test_ids)
+@pytest.mark.parametrize(
+    "summary_key, test_key",
+    zip([x[0] for x in summaries_keys], [x[1] for x in summaries_keys]),
+    ids=test_ids,
+)
 def test_has_correct_keys(summary_key, test_key):
     assert set(summary_key) == set(test_key)
 
 
 summaries = {
-    'currency': currency_summary,
-    'emoji': emoji_summary,
-    'exclamation': exclamation_summary,
-    'hashtag': hashtag_summary,
-    'intense': intense_word_summary,
-    'mention': mention_summary,
-    'number': number_summary,
-    'question': question_summary,
-    'word_full': word_summary_full,
-    'word_not_full': word_summary_not_full,
-    'url': url_summary,
+    "currency": currency_summary,
+    "emoji": emoji_summary,
+    "exclamation": exclamation_summary,
+    "hashtag": hashtag_summary,
+    "intense": intense_word_summary,
+    "mention": mention_summary,
+    "number": number_summary,
+    "question": question_summary,
+    "word_full": word_summary_full,
+    "word_not_full": word_summary_not_full,
+    "url": url_summary,
 }
 
 
-summary_counts = {k: [d[key] for key in d if 'count' in key][0]
-                  for k, d in summaries.items()}
+summary_counts = {
+    k: [d[key] for key in d if "count" in key][0] for k, d in summaries.items()
+}
 test_counts = {
-    'currency': [1, 1, 1, 2, 5, 0],
-    'emoji': [1, 2, 0],
-    'exclamation': [1, 0, 2, 2, 1],
-    'hashtag': [1, 0, 1, 2, 2, 1, 1, 1, 1, 2, 1, 2],
-    'intense': [1, 2, 0, 1, 1, 1],
-    'mention': [1, 0, 1, 2, 2, 1, 1, 0, 1, 2, 1, 2],
-    'number': [1, 1, 1, 1, 10, 2],
-    'question': [1, 0, 1, 2, 1],
-    'word_full': [0, 1, 0, 0, 1, 2, 0, 3],
-    'word_not_full': [1, 2, 1, 2, 2, 2, 0, 3],
-    'url': [1, 2, 0, 1]
+    "currency": [1, 1, 1, 2, 5, 0],
+    "emoji": [1, 2, 0],
+    "exclamation": [1, 0, 2, 2, 1],
+    "hashtag": [1, 0, 1, 2, 2, 1, 1, 1, 1, 2, 1, 2],
+    "intense": [1, 2, 0, 1, 1, 1],
+    "mention": [1, 0, 1, 2, 2, 1, 1, 0, 1, 2, 1, 2],
+    "number": [1, 1, 1, 1, 10, 2],
+    "question": [1, 0, 1, 2, 1],
+    "word_full": [0, 1, 0, 0, 1, 2, 0, 3],
+    "word_not_full": [1, 2, 1, 2, 2, 2, 0, 3],
+    "url": [1, 2, 0, 1],
 }
 
 
-@pytest.mark.parametrize('summary_count, test_count',
-                         zip(sorted(summary_counts.items()),
-                             sorted(test_counts.items())),
-                         ids=test_ids)
+@pytest.mark.parametrize(
+    "summary_count, test_count",
+    zip(sorted(summary_counts.items()), sorted(test_counts.items())),
+    ids=test_ids,
+)
 def test_has_correct_counts(summary_count, test_count):
     assert summary_count == summary_count
 
 
-summary_freqs = {k: [d[key] for key in d if 'freq' in key][0]
-                 for k, d in summaries.items()}
+summary_freqs = {
+    k: [d[key] for key in d if "freq" in key][0] for k, d in summaries.items()
+}
 
 test_freqs = {
-    'currency': [(0, 1), (1, 3), (2, 1), (5, 1)],
-    'emoji': [(0, 1), (1, 1), (2, 1)],
-    'exclamation': [(0, 1), (1, 2), (2, 2)],
-    'hashtag': [(0, 1), (1, 7), (2, 4)],
-    'intense': [(0, 1), (1, 4), (2, 1)],
-    'mention': [(0, 2), (1, 6), (2, 4)],
-    'number': [(0, 1), (1, 5), (2, 1)],
-    'question': [(0, 1), (1, 3), (2, 1)],
-    'word_full': [(0, 4), (1, 2), (2, 1), (3, 1)],
-    'word_not_full': [(0, 1), (1, 2), (2, 4), (3, 1)],
-    'url': [(0, 1), (1, 2), (2, 1)],
+    "currency": [(0, 1), (1, 3), (2, 1), (5, 1)],
+    "emoji": [(0, 1), (1, 1), (2, 1)],
+    "exclamation": [(0, 1), (1, 2), (2, 2)],
+    "hashtag": [(0, 1), (1, 7), (2, 4)],
+    "intense": [(0, 1), (1, 4), (2, 1)],
+    "mention": [(0, 2), (1, 6), (2, 4)],
+    "number": [(0, 1), (1, 5), (2, 1)],
+    "question": [(0, 1), (1, 3), (2, 1)],
+    "word_full": [(0, 4), (1, 2), (2, 1), (3, 1)],
+    "word_not_full": [(0, 1), (1, 2), (2, 4), (3, 1)],
+    "url": [(0, 1), (1, 2), (2, 1)],
 }
 
 
-@pytest.mark.parametrize('summary_freq, test_freq',
-                         zip(sorted(summary_freqs.items()),
-                             sorted(test_freqs.items())),
-                         ids=test_ids)
+@pytest.mark.parametrize(
+    "summary_freq, test_freq",
+    zip(sorted(summary_freqs.items()), sorted(test_freqs.items())),
+    ids=test_ids,
+)
 def test_has_correct_freq(summary_freq, test_freq):
     assert summary_freq == summary_freq
 
 
-summary_flats = {k: [d[key] for key in d if 'flat' in key
-                     and 'emoji_flat_text' not in key][0]
-                 for k, d in summaries.items()}
+summary_flats = {
+    k: [d[key] for key in d if "flat" in key and "emoji_flat_text" not in key][0]
+    for k, d in summaries.items()
+}
 test_flats = {
-    'currency': ['$', '£', '€', '$', '£', '₣', '₤', '₥', '₦', '₧'],
-    'emoji': ['😀', '😀', '😉'],
-    'exclamation': ['!', '!', '!', '¡', '!', '!'],
-    'hashtag': ['#name', '#oneword', '#nam', '#name', '#first', '#last',
-                '#under_score', '#dot', '#مرحبا', '＃sign', '#one',
-                '#two', '#123text', '#_before', '#after_'],
-    'intense': ['looooove', 'goooood', 'mooorning', 'thhhhhhe',
-                'innnnnn', 'end!!!!!!'],
-    'mention': ['@name', '@oneword', '@nam', '@name', '@first',
-                '@last', '@under_score', '@dot', '＠sign', '@one',
-                '@two', '@123text', '@_before', '@after_'],
-    'number': ['123,000', '123', '123,456', '123.234.3', '123',
-               '123', '456,789'],
-    'question': ['?', '?', '¿', '?', ';'],
-    'word_full': ['rain', 'snow', 'rain', 'snow', 'rain', 'snow', 'rain'],
-    'word_not_full': ['raining', 'rain', 'raining', 'snowing',
-                      'snowing', 'raining', 'training', 'snow',
-                      'rain', 'snow', '@rain,', '#snow', 'rain'],
-    'url': ['https://www.a.com', 'http://www.a.com',
-            'http://www.b.com', 'https://example.com/one?a=b#nothing']
+    "currency": ["$", "£", "€", "$", "£", "₣", "₤", "₥", "₦", "₧"],
+    "emoji": ["😀", "😀", "😉"],
+    "exclamation": ["!", "!", "!", "¡", "!", "!"],
+    "hashtag": [
+        "#name",
+        "#oneword",
+        "#nam",
+        "#name",
+        "#first",
+        "#last",
+        "#under_score",
+        "#dot",
+        "#مرحبا",
+        "＃sign",
+        "#one",
+        "#two",
+        "#123text",
+        "#_before",
+        "#after_",
+    ],
+    "intense": ["looooove", "goooood", "mooorning", "thhhhhhe", "innnnnn", "end!!!!!!"],
+    "mention": [
+        "@name",
+        "@oneword",
+        "@nam",
+        "@name",
+        "@first",
+        "@last",
+        "@under_score",
+        "@dot",
+        "＠sign",
+        "@one",
+        "@two",
+        "@123text",
+        "@_before",
+        "@after_",
+    ],
+    "number": ["123,000", "123", "123,456", "123.234.3", "123", "123", "456,789"],
+    "question": ["?", "?", "¿", "?", ";"],
+    "word_full": ["rain", "snow", "rain", "snow", "rain", "snow", "rain"],
+    "word_not_full": [
+        "raining",
+        "rain",
+        "raining",
+        "snowing",
+        "snowing",
+        "raining",
+        "training",
+        "snow",
+        "rain",
+        "snow",
+        "@rain,",
+        "#snow",
+        "rain",
+    ],
+    "url": [
+        "https://www.a.com",
+        "http://www.a.com",
+        "http://www.b.com",
+        "https://example.com/one?a=b#nothing",
+    ],
 }
 
 
-@pytest.mark.parametrize('summary_flat, test_flat',
-                         zip(sorted(summary_flats.items()),
-                             sorted(test_flats.items())),
-                         ids=test_ids)
+@pytest.mark.parametrize(
+    "summary_flat, test_flat",
+    zip(sorted(summary_flats.items()), sorted(test_flats.items())),
+    ids=test_ids,
+)
 def test_has_correct_flat(summary_flat, test_flat):
     assert summary_flat == test_flat
 
 
-summary_tops = {k: [d[key] for key in d if 'top' in key]
-                for k, d in summaries.items()}
+summary_tops = {k: [d[key] for key in d if "top" in key] for k, d in summaries.items()}
 
-summary_tops = {k: d[0] if len(d) == 1 else d
-                for k, d in summary_tops.items()}
+summary_tops = {k: d[0] if len(d) == 1 else d for k, d in summary_tops.items()}
 
 test_tops = {
-    'currency': [('$', 2), ('£', 2), ('€', 1), ('₣', 1), ('₤', 1),
-                 ('₥', 1), ('₦', 1), ('₧', 1)],
-    'emoji': [[('😀', 2), ('😉', 1)],
-              [('grinning face', 2), ('winking face', 1)],
-              [('Smileys & Emotion', 3)],
-              [('face-smiling', 3)]],
-    'exclamation': [('!', 5), ('¡', 1)],
-    'hashtag': [('#name', 2), ('#oneword', 1), ('#nam', 1),
-                ('#first', 1), ('#last', 1), ('#under_score', 1),
-                ('#dot', 1), ('#مرحبا', 1), ('＃sign', 1), ('#one', 1),
-                ('#two', 1), ('#123text', 1), ('#_before', 1),
-                ('#after_', 1)],
-    'intense': [('looooove', 1), ('goooood', 1), ('mooorning', 1),
-                ('thhhhhhe', 1), ('innnnnn', 1), ('end!!!!!!', 1)],
-    'mention': [('@name', 2), ('@oneword', 1), ('@nam', 1), ('@first', 1),
-                ('@last', 1), ('@under_score', 1), ('@dot', 1),
-                ('＠sign', 1), ('@one', 1), ('@two', 1), ('@123text', 1),
-                ('@_before', 1), ('@after_', 1)],
-    'number': [('123', 3), ('123,000', 1), ('123,456', 1), ('123.234.3', 1),
-               ('456,789', 1)],
-    'question': [('?', 3), ('¿', 1), (';', 1)],
-    'word_full': [('rain', 4), ('snow', 3)],
-    'word_not_full': [('raining', 3), ('rain', 3), ('snowing', 2),
-                      ('snow', 2), ('training', 1), ('@rain,', 1),
-                      ('#snow', 1)],
-    'url': [[('https://www.a.com', 1), ('http://www.a.com', 1),
-             ('http://www.b.com', 1),
-             ('https://example.com/one?a=b#nothing', 1)],
-            [('www.a.com', 2), ('www.b.com', 1), ('example.com', 1)],
-            [('com', 4)]]
+    "currency": [
+        ("$", 2),
+        ("£", 2),
+        ("€", 1),
+        ("₣", 1),
+        ("₤", 1),
+        ("₥", 1),
+        ("₦", 1),
+        ("₧", 1),
+    ],
+    "emoji": [
+        [("😀", 2), ("😉", 1)],
+        [("grinning face", 2), ("winking face", 1)],
+        [("Smileys & Emotion", 3)],
+        [("face-smiling", 3)],
+    ],
+    "exclamation": [("!", 5), ("¡", 1)],
+    "hashtag": [
+        ("#name", 2),
+        ("#oneword", 1),
+        ("#nam", 1),
+        ("#first", 1),
+        ("#last", 1),
+        ("#under_score", 1),
+        ("#dot", 1),
+        ("#مرحبا", 1),
+        ("＃sign", 1),
+        ("#one", 1),
+        ("#two", 1),
+        ("#123text", 1),
+        ("#_before", 1),
+        ("#after_", 1),
+    ],
+    "intense": [
+        ("looooove", 1),
+        ("goooood", 1),
+        ("mooorning", 1),
+        ("thhhhhhe", 1),
+        ("innnnnn", 1),
+        ("end!!!!!!", 1),
+    ],
+    "mention": [
+        ("@name", 2),
+        ("@oneword", 1),
+        ("@nam", 1),
+        ("@first", 1),
+        ("@last", 1),
+        ("@under_score", 1),
+        ("@dot", 1),
+        ("＠sign", 1),
+        ("@one", 1),
+        ("@two", 1),
+        ("@123text", 1),
+        ("@_before", 1),
+        ("@after_", 1),
+    ],
+    "number": [
+        ("123", 3),
+        ("123,000", 1),
+        ("123,456", 1),
+        ("123.234.3", 1),
+        ("456,789", 1),
+    ],
+    "question": [("?", 3), ("¿", 1), (";", 1)],
+    "word_full": [("rain", 4), ("snow", 3)],
+    "word_not_full": [
+        ("raining", 3),
+        ("rain", 3),
+        ("snowing", 2),
+        ("snow", 2),
+        ("training", 1),
+        ("@rain,", 1),
+        ("#snow", 1),
+    ],
+    "url": [
+        [
+            ("https://www.a.com", 1),
+            ("http://www.a.com", 1),
+            ("http://www.b.com", 1),
+            ("https://example.com/one?a=b#nothing", 1),
+        ],
+        [("www.a.com", 2), ("www.b.com", 1), ("example.com", 1)],
+        [("com", 4)],
+    ],
 }
 
 
 def make_hashable(iterable):
     """Some sub-elements are lists and need to be converted sorted tuples."""
-    return tuple(tuple(sorted(x)) if isinstance(x, list)else
-                 x for x in iterable)
+    return tuple(tuple(sorted(x)) if isinstance(x, list) else x for x in iterable)
 
 
-@pytest.mark.parametrize('summary_top, test_top',
-                         zip(sorted(summary_tops.items()),
-                             sorted(test_tops.items())),
-                         ids=test_ids)
+@pytest.mark.parametrize(
+    "summary_top, test_top",
+    zip(sorted(summary_tops.items()), sorted(test_tops.items())),
+    ids=test_ids,
+)
 def test_has_correct_top(summary_top, test_top):
-    assert (set(make_hashable(summary_top[1])) ==
-            set(make_hashable(test_top[1])))
+    assert set(make_hashable(summary_top[1])) == set(make_hashable(test_top[1]))
 
 
-summary_overviews = {k: [d[key] for key in d if 'overview' in key][0]
-                     for k, d in summaries.items()}
+summary_overviews = {
+    k: [d[key] for key in d if "overview" in key][0] for k, d in summaries.items()
+}
 
 test_overviews = {
-    'currency': [6, 10, 10/6, 8],
-    'emoji': [3, 3, 1.0, 2],
-    'exclamation': [5, 6, 1.2, 2],
-    'hashtag': [12, 15, 1.25, 14],
-    'intense': [6, 6, 1.0, 6],
-    'mention': [12, 14, 14/12, 13],
-    'number': [7, 7, 7/7, 5],
-    'question': [5, 5, 1.0, 3],
-    'word_full': [8, 7, 7/8, 2],
-    'word_not_full': [8, 13, 13/8, 7],
-    'url': [4, 4, 1.0, 4],
+    "currency": [6, 10, 10 / 6, 8],
+    "emoji": [3, 3, 1.0, 2],
+    "exclamation": [5, 6, 1.2, 2],
+    "hashtag": [12, 15, 1.25, 14],
+    "intense": [6, 6, 1.0, 6],
+    "mention": [12, 14, 14 / 12, 13],
+    "number": [7, 7, 7 / 7, 5],
+    "question": [5, 5, 1.0, 3],
+    "word_full": [8, 7, 7 / 8, 2],
+    "word_not_full": [8, 13, 13 / 8, 7],
+    "url": [4, 4, 1.0, 4],
 }
 
 
@@ -319,40 +549,41 @@ def dict2overview_list(d):
     """Convert an overview dict to a list based on its keys."""
     result = [0, 0, 0, 0]
     for key in d:
-        if 'num_posts' in key:
+        if "num_posts" in key:
             result[0] = d[key]
-        if 'num_' in key and 'post' not in key:
+        if "num_" in key and "post" not in key:
             result[1] = d[key]
-        if 'per' in key:
+        if "per" in key:
             result[2] = d[key]
-        if 'unique' in key:
+        if "unique" in key:
             result[3] = d[key]
     return result
 
 
-@pytest.mark.parametrize('summary_overview, test_overview',
-                         zip(sorted(summary_overviews.items()),
-                             sorted(test_overviews.items())),
-                         ids=test_ids)
+@pytest.mark.parametrize(
+    "summary_overview, test_overview",
+    zip(sorted(summary_overviews.items()), sorted(test_overviews.items())),
+    ids=test_ids,
+)
 def test_has_correct_overview(summary_overview, test_overview):
     assert dict2overview_list(summary_overview[1]) == test_overview[1]
 
 
 def test_extract_puts_str_in_list():
-    result = extract('#one #two #three', regex=r'#\w+', key_name='hashtag')
-    assert result['hashtags'] == ['#one #two #three'.split()]
+    result = extract("#one #two #three", regex=r"#\w+", key_name="hashtag")
+    assert result["hashtags"] == ["#one #two #three".split()]
 
 
 def test_extract_words_puts_str_in_list():
-    word_summary_str = extract_words(word_posts, 'rain',  True)
-    assert word_summary_str['top_words'][0][0] == 'rain'
+    word_summary_str = extract_words(word_posts, "rain", True)
+    assert word_summary_str["top_words"][0][0] == "rain"
 
 
 def test_extract_numbers_works_without_separators():
-    result = extract_numbers('123,456 hello ', number_separators=None)
-    assert result['numbers'] == [['123', '456']]
+    result = extract_numbers("123,456 hello ", number_separators=None)
+    assert result["numbers"] == [["123", "456"]]
 
 
 def test_extract_numbers_handles_dash_in_the_middle_of_seps():
-    result = extract_numbers('123,456-789', number_separators=('.', '-', ','))
-    assert result['numbers'] == [['123,456-789']]
+    result = extract_numbers("123,456-789", number_separators=(".", "-", ","))
+    assert result["numbers"] == [["123,456-789"]]