In [1]:
import os
os.environ["OPENBLAS_NUM_THREADS"] = "1"
os.environ["OMP_NUM_THREADS"] = "1"
from bertopic import BERTopic
from hdbscan import HDBSCAN
from umap import UMAP
from sentence_transformers import SentenceTransformer
from sklearn.feature_extraction.text import CountVectorizer
import plotly.express as px
import torch
import pandas as pd
import time
from tqdm import tqdm
from glob import glob
import numpy as np
import sqlite3
from glob import glob
import matplotlib.pyplot as plt
from tqdm import tqdm



  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# DA PORTATILE SCOMMENTARE QUESTO
#extracted_dir = os.path.join("..", "material", "extracted")

#------------------------------------------------
#DA JUPYTER CUSTER SCOMMENTARE QUESTO
extracted_dir = os.path.expanduser("~/telegram_2024/usc-tg-24-us-election/extracted")

In [3]:
chats_path = '../material/chats.db'
conn = sqlite3.connect(chats_path)
cursor=conn.cursor()
cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
tables=cursor.fetchall()

# ========================
# 1. Leggi chats.db (SQLite)
# ========================

print("Tables in DB:", tables)
try:
    df_chats = pd.read_sql_query("SELECT * FROM chats", conn)
    df_chats =df_chats.drop_duplicates(subset='type_and_id')
    print("number of unique chats", len(df_chats))
    print("chats.db - Tabella 'chats'")
    print(df_chats.head())
except Exception as e:
    print("Errore nel leggere la tabella:", e)

conn.close()

# ========================
# 2. Leggi discovery_edges.csv.gz
# ========================
try:
    df_edges = pd.read_csv('../material/discovery_edges.csv.gz')
    df_edges = df_edges.drop_duplicates(subset='type_and_id')
    print("✅ discovery_edges.csv.gz, \n" \
    "Il timestamp da l'ultima volta che hanno visitato quel gruppo ma questo significa che non è davvero indicativo di una timeline \n")
    print(df_edges.head())
except Exception as e:
    print("Errore nel leggere discovery_edges:", e)

# ========================
# 3. Leggi first_nodes.csv.gz
# ========================
try:
    df_first_nodes = pd.read_csv('../material/first_nodes.csv.gz')
    print("number of non unique first nodes", len(df_first_nodes))
    df_first_nodes = df_first_nodes.drop_duplicates(subset='type_and_id')
    print("✅ first_nodes.csv.gz")
    print(df_first_nodes.head())
    print("number of unique first nodes", len(df_first_nodes))
except Exception as e:
    print("Errore nel leggere first_nodes:", e)


Tables in DB: [('chats',)]
number of unique chats 127141
chats.db - Tabella 'chats'
           type_and_id                   token                  parent  \
0                 None  [keyword] thedemocrats                    None   
40  channel_1889806290         thedemocratskmf  [keyword] thedemocrats   
41  channel_1413288788       thedemocratsindia  [keyword] thedemocrats   
42  channel_1709284265           thedemocratsa  [keyword] thedemocrats   
43  channel_1807294270             democratsmv  [keyword] thedemocrats   

       timestamp  
0   1.722583e+09  
40  1.728093e+09  
41  1.728093e+09  
42  1.728093e+09  
43  1.728093e+09  
✅ discovery_edges.csv.gz, 
Il timestamp da l'ultima volta che hanno visitato quel gruppo ma questo significa che non è davvero indicativo di una timeline 

          type_and_id              parent     timestamp
0  channel_1306559115  channel_1840578235  1.722586e+09
1  channel_2036850729  channel_1840578235  1.722586e+09
2  channel_1941222046  channel_18

In [4]:
print("type_and_id unique in df_first_nodes" + str(df_first_nodes.type_and_id.nunique()))
print("type_and_id in df_first_nodes" + str(len(df_first_nodes)))
print("type_and_id NaN in df_first_nodes " + str(df_first_nodes['type_and_id'].isna().sum()))

type_and_id unique in df_first_nodes247
type_and_id in df_first_nodes247
type_and_id NaN in df_first_nodes 0


In [5]:
# Fare preprocessing dei testi:
import os
import re
from typing import Callable, Union

# import spacy
# from sklearn.feature_extraction.text import TfidfVectorizer
# from tqdm import tqdm
from unidecode import unidecode
import langdetect


class PreProcessing:
    """Class for performing text preprocessing operations.

    Args:
        noadverbs (bool, optional): Flag to remove adverbs from the text. Defaults to False.
        noadjectives (bool, optional): Flag to remove adjectives from the text. Defaults to False.
        noverbs (bool, optional): Flag to remove verbs from the text. Defaults to False.
        noentities (bool, optional): Flag to remove named entities from the text. Defaults to False.
        language (str, optional): Language for the Spacy model. Defaults to 'en'.
        remove_list (bool, optional): Flag to remove a list of words from the text. Defaults to False.

    Attributes:
        noadverbs (bool): Flag to remove adverbs from the text.
        noadjectives (bool): Flag to remove adjectives from the text.
        noverbs (bool): Flag to remove verbs from the text.
        noentities (bool): Flag to remove named entities from the text.
        language (str): Language for the Spacy model.
        remove_list (bool): Flag to remove a list of words from the text.
        punctuation (str): Regular expression pattern for removing punctuation.
        nlp (spacy.Language): Spacy language model.
        stopwords (list): List of stopwords.

    Methods:
        lowercase_unidecode: Converts text to lowercase and removes diacritics.
        remove_urls: Removes URLs from the text.
        remove_tweet_marking: Removes Twitter mentions and hashtags from the text.
        remove_punctuation: Removes punctuation from the text.
        remove_repetion: Removes repeated words from the text.
        append_stopwords_list: Appends additional stopwords to the existing list.
        remove_stopwords: Removes stopwords from the text.
        remove_n: Removes words with length less than or equal to n from the text.
        remove_numbers: Removes or filters out numbers from the text.
        remove_gerund: Removes gerund endings from verbs in the text.
        remove_infinitive: Removes infinitive endings from verbs in the text.
        filter_by_idf: Filters out words based on their inverse document frequency.

    """

    def __init__(self, noadverbs: bool = False, noadjectives: bool = False, noverbs: bool = False,
                 noentities: bool = False, language: str = 'en', remove_list: bool = False,stopwords=[]):
        """Initialize the PreProcessing object.

        Args:
            noadverbs (bool, optional): Flag to indicate whether to remove adverbs. Defaults to False.
            noadjectives (bool, optional): Flag to indicate whether to remove adjectives. Defaults to False.
            noverbs (bool, optional): Flag to indicate whether to remove verbs. Defaults to False.
            noentities (bool, optional): Flag to indicate whether to remove named entities. Defaults to False.
            remove_list (bool, optional): Flag to indicate whether to remove stopwords. Defaults to False.
        """
        self.noadverbs = noadverbs
        self.noadjectives = noadjectives
        self.noverbs = noverbs
        self.noentities = noentities
        self.remove_list = remove_list
        self.punctuation = (
                r'\(|!|"|#|\$|%|&|\'|\(|\)|\*|\+|,|-|\.|\/|'
                r':|;|<|=|>|\?|\@|\[|\]|\^|_|`|\{|\}|~|\||'
                r'\r\n|\n|\r|\\\)'
        )
        # self.nlp = self._load_spacy_model(language)
        # self.stopwords = [unidecode(x).lower() for x in list(self.nlp.Defaults.stop_words)]
        self.stopwords=stopwords



    
    def _process_text(self, text: Union[str, list], function: Callable) -> Union[str, list]:

        if isinstance(text, str):
            return function(text)
        elif isinstance(text, list):
            return [function(x) for x in text]
        return ''
    
    
    def lowercase_unidecode(self, text: Union[str, list]) -> Union[str, list]:
        """Convert the given text to lowercase and remove any diacritical marks (accents).

        Args:
            text (Union[str, list]): The text to be processed. It can be either a string or a list of strings.

        Returns:
            Union[str, list]: The processed text. If the input is a string, the output will be a string. If the input is a list,
            the output will be a list of strings.

        Example:
            >>> pre_processor = PreProcessor()
            >>> text = "Café"
            >>> pre_processor.lowercase_unidecode(text)
            'cafe'
        """
        from unidecode import unidecode
        text = self._process_text(text, lambda value: value.lower())
        text = self._process_text(text, unidecode)
        return text

    def remove_urls(self, text: Union[str, list]) -> Union[str, list]:
        """Removes URLs from the given text or list of texts.

        Args:
            text (Union[str, list]): The text or list of texts from which to remove URLs.

        Returns:
            Union[str, list]: The text or list of texts with URLs removed.

        """
        return self._process_text(text, lambda value: re.sub(r'http\S+ *', '', value).strip())

    def remove_tweet_marking(self, text: Union[str, list]) -> Union[str, list]:
        """Removes tweet markings (e.g., @mentions and #hashtags) from the given text.

        Args:
            text (Union[str, list]): The text or list of texts to process.

        Returns:
            Union[str, list]: The processed text or list of processed texts with tweet markings removed.
        """
        return self._process_text(text, lambda value: re.sub(r'(@|#)\S+ *', '', value).strip())

    def remove_html_tags(self, text: Union[str, list]) -> Union[str, list]:
        """Removes HTML tags from the given text.

        Args:
            text (Union[str, list]): The text or list of texts to process.

        Returns:
            Union[str, list]: The processed text or list of processed texts with HTML tags removed.
        """
        return self._process_text(text, lambda value: re.sub(r'<.*?> *', '', value).strip())

    def remove_punctuation(self, text: Union[str, list]) -> Union[str, list]:
        """Removes punctuation from the given text.

        Args:
            text (Union[str, list]): The text from which punctuation needs to be removed.

        Returns:
            Union[str, list]: The text with punctuation removed.
        """
        text = self._process_text(text, lambda value: re.sub(self.punctuation, ' ', value))
        text = self._process_text(text, lambda value: re.sub(' {2,}', ' ', value).strip())
        return text

    def remove_repetition(self, text: Union[str, list]) -> Union[str, list]:
        """Removes repeated words in the given text.

        Args:
            text (Union[str, list]): The input text or list of words.

        Returns:
            Union[str, list]: The processed text with repeated words removed.

        """
        return self._process_text(text, lambda value: re.sub(r'\b(\w+)\s+\1\b', r'\1', value))

    def append_stopwords_list(self, stopwords: list) -> None:
        """Appends additional stopwords to the existing list of stopwords.

        Parameters:
        stopwords (list): A list of stopwords to be appended.

        """
        self.stopwords.extend(stopwords)

    def remove_stopwords(self, text: Union[str, list]) -> Union[str, list]:
        """Removes stopwords from the given text.

        Args:
            text (Union[str, list]): The input text from which stopwords need to be removed.

        Returns:
            Union[str, list]: The processed text with stopwords removed.

        """
        return self._process_text(text, lambda value: re.sub(rf'\b({"|".join(self.stopwords)})\b *', '', value).strip())

    

    def remove_n(self, text: Union[str, list], n: int) -> Union[str, list]:
        """Removes words of length 1 to n followed by the word 'pri' from the given text.

        Args:
            text (Union[str, list]): The input text or list of texts to process.
            n (int): The maximum length of words to remove.

        Returns:
            Union[str, list]: The processed text or list of processed texts.

        """
        return self._process_text(text, lambda value: re.sub(rf'(\b|^)\w{{1,{n}}}(\b|$) ?', '', value).strip())

    def remove_numbers(self, text: Union[str, list], mode: str = 'replace') -> Union[str, list]:
        """Removes or replaces numbers in the given text.

        Args:
            text (Union[str, list]): The input text or list of texts.
            mode (str, optional): The mode of operation. Defaults to 'replace'.
                - 'filter': Removes the numbers from the text.
                - 'replace': Replaces the numbers with an empty string.

        Returns:
            Union[str, list]: The processed text or list of processed texts.
        """
        if mode == "filter":
            return self._process_text(text, lambda value: '' if re.search('[0-9]', value) else value)
        elif mode == "replace":
            return self._process_text(text, lambda value: re.sub('[0-9] *', '', value))

    def remove_gerund(self, text: Union[str, list]) -> Union[str, list]:
        """Removes the gerund form '-ndo' from the given text.

        Args:
            text (Union[str, list]): The input text or list of texts to process.

        Returns:
            Union[str, list]: The processed text with the gerund form removed.

        """
        return self._process_text(text, lambda value: re.sub(r'ndo\b', '', value))

    def remove_infinitive(self, text: Union[str, list]) -> Union[str, list]:
        """Removes the infinitive form of verbs from the given text.

        Args:
            text (Union[str, list]): The input text or list of texts to process.

        Returns:
            Union[str, list]: The processed text with infinitive forms removed.

        """
        return self._process_text(text, lambda value: re.sub(r'r\b', '', value))
    
    
    def detect_language(self,text):
        import langdetect
        try:
            d=langdetect.detect_langs(text)
            # Trasforma la lista in un dizionario
            langs_dict = {lang.lang: lang.prob for lang in d}
            best_lang=max(langs_dict,key=langs_dict.get)
            best_lang=best_lang if langs_dict[best_lang]>=0.7 else 'unk'
            return best_lang    
        except langdetect.LangDetectException as e:
            return 'unk'
        return None


from spacy.lang.en.stop_words import STOP_WORDS
stopwords = list(STOP_WORDS)

# here the funziona to call to preprocess the text
def preprocess_text(text,stopwords=stopwords):
    try: 
        pp=PreProcessing(language='en',stopwords=stopwords)

        # Preprocessing pipeline
        text_low = pp.lowercase_unidecode(text)

        lang = pp.detect_language(text_low)
        if lang in ('unk', None):
            return ("", "unk")

        text_clean = pp.remove_stopwords(text_low)
        text_clean = pp.remove_tweet_marking(text_clean)
        text_clean = pp.remove_urls(text_clean)
        text_clean = pp.remove_repetition(text_clean)
        text_clean = pp.remove_punctuation(text_clean)
        text_clean = pp.remove_numbers(text_clean)
        text_clean = pp.remove_n(text_clean, n=3)
        result = (text_clean, lang)
    except Exception:
        return ("", "unk")
    if not (isinstance(result, tuple) and len(result) == 2):
        print("----\n-----\n error in preprocess_text ------\n------\n")
        result = ("", "unk")
    return result


In [None]:
from multiprocessing import Pool, cpu_count
from tqdm import tqdm
import os
import pandas as pd
from glob import glob

os.environ["TOKENIZERS_PARALLELISM"] = "false"

pp2 = PreProcessing(language='en', stopwords=stopwords)

# Path to the final preprocessed file
output_path_preprocessed_messages = "../material/preprocessed_messages.tsv.gz"
output_path_channels_without_message = "../material/channels_without_message.tsv.gz"

channels_without_message = []

# If the file exists, load it directly and skip the rest
if os.path.exists(output_path_preprocessed_messages):
    print("--- File already exists: {}".format(output_path_preprocessed_messages))
    df_preprocessed_non_empty_channels = pd.read_csv(output_path_preprocessed_messages, sep='\t', compression='gzip')
    print("--- File loaded with {} preprocessed messages.".format(len(df_preprocessed_non_empty_channels)))
else:
    print("--- File not found, proceeding with preprocessing...")

    def process_file(args):
        file, channel_id, token = args
        try:
            df = pd.read_csv(file, sep='\t', compression='gzip', usecols=['text', 'timestamp'])
            df = df.dropna(subset=['text'])
            df['text'] = df['text'].astype(str)
            pairs = df['text'].apply(preprocess_text)
            
            #count the touple of length 2 (valids) and the void ones (invalids)
            valid   = sum(1 for p in pairs if isinstance(p, tuple) and len(p)==2)
            invalid = len(pairs) - valid
            #decomment to debug
            #print(f"--- valid pairs: {valid}, invalid pairs: {invalid}")
            #print("pairs", pairs)
            df['text_preprocessed'] = [p[0] for p in pairs]
            df['language']          = [p[1] for p in pairs]

            # 4) Filter and return immediatly
            df = df[df['text_preprocessed'] != ""]
            if df.empty:
                return None
                 
            
            df['channel_id'] = channel_id
            df['token'] = token
            return df if not df.empty else None
        except Exception as e:
            print(f"--- Error in file {file}: {type(e).__name__}: {e}")
            return None

    count_first_nodes = 0
    count_channels_without_message = 0
    file_args = []

    # Compute file_args and channels_without_message
    for _, row in df_first_nodes.iterrows():
        count_first_nodes += 1
        channel_id = row['type_and_id']
        token = row['token']
        channel_path = os.path.join(extracted_dir, channel_id)
        if not os.path.isdir(channel_path):
            channels_without_message.append(channel_id)
            count_channels_without_message += 1
            continue

        files = glob(os.path.join(channel_path, '[0-9][0-9][0-9][0-9]-[0-1][0-9].tsv.gz'))
        if not files:
            count_channels_without_message += 1
            channels_without_message.append(channel_id)
            continue

        file_args.extend([(file, channel_id, token) for file in files])

    print("--- Number of messages in file_args:", str(len(file_args)))
    print("--- Channels without messages count:", count_channels_without_message)
    print("--- First nodes count:", count_first_nodes)
    print("--- Number of distinct channel_ids in file_args:(subtraction of the above two)", len({entry[1] for entry in file_args}))

    # Multiprocessing
    results = []
    with Pool(cpu_count()) as pool:
        pbar = tqdm(total=len(file_args))
        for res in pool.imap_unordered(process_file, file_args):
            pbar.update(1)
            results.append(res)

    all_english_messages = [df for df in results if df is not None]
    df_preprocessed_non_empty_channels = pd.concat(all_english_messages, ignore_index=True)

if os.path.exists(output_path_channels_without_message):
    print("--- File already exists: {}".format(output_path_channels_without_message))
    df_channels_without_message = pd.read_csv(output_path_channels_without_message, sep='\t', compression='gzip')
    print("--- File channels_without_messages loaded with length = {}".format(len(df_channels_without_message)))
else:
    df_channels_without_message = pd.DataFrame({'channel_id': channels_without_message})

# Clean-up and filtering
df_channels_without_message = df_channels_without_message.dropna(subset=['channel_id'])
df_channels_without_message.drop_duplicates(subset=['channel_id'], inplace=True)
df_channels_without_message = df_channels_without_message[
    ~df_channels_without_message['channel_id'].isin(df_preprocessed_non_empty_channels['channel_id'])]
df_channels_without_message.to_csv(output_path_channels_without_message, sep='\t', index=False, compression='gzip')

#Clean-up and filtering
df_preprocessed_non_empty_channels = df_preprocessed_non_empty_channels.dropna(subset=['channel_id', 'text_preprocessed'])
df_preprocessed_non_empty_channels = df_preprocessed_non_empty_channels.astype(str)
df_preprocessed_non_empty_channels = df_preprocessed_non_empty_channels[
    df_preprocessed_non_empty_channels['text_preprocessed'].apply(lambda x: isinstance(x, str))]
df_preprocessed_non_empty_channels['date'] = pd.to_datetime(df_preprocessed_non_empty_channels['timestamp'], unit='s')
df_preprocessed_non_empty_channels.to_csv(output_path_preprocessed_messages, sep='\t', index=False, compression='gzip')

# Filter English-only messages
df_english_preprocessed_non_empty_channels = df_preprocessed_non_empty_channels.copy()
df_english_preprocessed_non_empty_channels = df_english_preprocessed_non_empty_channels[
    df_english_preprocessed_non_empty_channels['language'] == 'en']



--- File not found, proceeding with preprocessing...
--- Number of messages in file_args: 1071
--- Channels without messages count: 67
--- First nodes count: 247
--- Number of distinct channel_ids in file_args:(subtraction of the above two) 180


  0%|          | 0/1071 [00:00<?, ?it/s]

--- valid pairs: 2, invalid pairs: 0
pairs 0    (dear holders problems distribution tokens sta...
1    (hello friends happy inform extended raffle ab...
Name: text, dtype: object
--- valid pairs: 1, invalid pairs: 0--- valid pairs: 2, invalid pairs: 0

  0%|          | 1/1071 [00:01<28:56,  1.62s/it]


pairs--- valid pairs: 2, invalid pairs: 0 --- valid pairs: 1, invalid pairs: 0

pairs0    (, unk)
Name: text, dtype: object
pairs pairs 
 0    (, unk)
Name: text, dtype: object0                                         (think, en)
1    (biden announces dropping presidential race, en)
Name: text, dtype: object

0    (instagram install follow photos videos, en)
1                                         (, unk)
Name: text, dtype: object
--- valid pairs: 1, invalid pairs: 0--- valid pairs: 0, invalid pairs: 0--- valid pairs: 1, invalid pairs: 0


pairspairspairs Series([], Name: text, dtype: object)--- valid pairs: 7, invalid pairs: 0
--- valid pairs: 2, invalid pairs: 0pairs
 
pairs 

  1%|          | 6/1071 [00:01<03:52,  4.58it/s]

0    (welcome republican party proudly loudly suppo...
Name: text, dtype: object
0     (multiple impacts reported iran fires missiles...
5     (famous celebrities named diddy case names sho...
6     (president jimmy carter celebrated birthday ye...
7     (uses mirror attention helicopter pilot mile a...
8     (weather anchor starts crying live mother desc...
9     (country music star garth brooks accused ping ...
10    (texas father bawls cleared starving year desp...
Name: text, dtype: object  0    (dimokretsge eeskamash inthikhaabuvi hasan lat...
2                                              (, unk)
Name: text, dtype: object0    (batter days, id)
Name: text, dtype: object--- valid pairs: 1, invalid pairs: 0--- valid pairs: 5, invalid pairs: 0



pairs
--- valid pairs: 0, invalid pairs: 0pairs 
0    (channel auctioned, en)
Name: text, dtype: object pairs0                                    (plays chess, en)
1                                               (, tr)
2    (bangladesh prote

  1%|▏         | 15/1071 [00:01<01:20, 13.18it/s]


--- valid pairs: 10, invalid pairs: 0
--- valid pairs: 6, invalid pairs: 0
pairspairs--- valid pairs: 1, invalid pairs: 0 
0    (, unk)
1    (, unk)
2    (, unk)
3    (, unk)
4    (, unk)
5    (, unk)
6    (, unk)
7    (, unk)
8    (, unk)
9    (, unk)
Name: text, dtype: object--- valid pairs: 4, invalid pairs: 0pairs 

0    (, unk)
1    (, unk)
2    (, unk)
3    (, unk)
4    (, unk)
5    (, unk)
Name: text, dtype: object 
pairs0    (means term dehumanisation dehumanization deni...
Name: text, dtype: object
 --- valid pairs: 2, invalid pairs: 0--- valid pairs: 1, invalid pairs: 0
0    (, unk)
1    (, unk)
2    (, unk)
3    (, unk)
Name: text, dtype: object

pairs 0    (seen, en)
1       (, unk)
Name: text, dtype: object pairs
0    (, unk)
Name: text, dtype: object

  2%|▏         | 21/1071 [00:02<01:06, 15.91it/s]


--- valid pairs: 31, invalid pairs: 0--- valid pairs: 2, invalid pairs: 0

--- valid pairs: 1, invalid pairs: 0pairs
 pairspairs0                                       (revshare, sq)
1    (announcement weekly revenue share update atte...
Name: text, dtype: object  
1    (START FREE CHAT, en)
Name: text, dtype: object0     (donald trump took bullet country voting fuck,...
1     (fully endorse president trump hope rapid reco...
2                        (trump assassin need find, en)
3                (stop fighting save america trump, en)
4     (trump election trump hype video cent hits har...
5     (trump yelling fight fight getting grazed bull...
6     (words donald trump president united states el...
8     (time fight hold mainstream media accountable ...
9     (normal election trump stopped trump president...
12    (tried silence donald trump tried jail donald ...
13    (center times square single video monitor repo...
14    (iconic photographs american history suspect p...
15    (bi

  2%|▏         | 26/1071 [00:02<00:53, 19.70it/s]

0             (ioprst, da)
1             (takker, no)
2     (smotret zvukom, sl)
3    (menia dementsii, ca)
Name: text, dtype: object--- valid pairs: 12, invalid pairs: 0--- valid pairs: 5, invalid pairs: 0


pairspairs --- valid pairs: 20, invalid pairs: 0 
0                                               (, unk)
1                                               (, unk)
2                                               (, unk)
3                                               (, unk)
4     (sovuk kunlar boshlandi kasallikning oiok lard...
5     (kaifiiatni kutaramiz uiidagi sovg adan bittas...
6                                               (, unk)
7     (slim body ozdiruvchi aiollar uchun uramimiz u...
8     (guruhda odam paytirishga yordam beruchi haqid...
9     (guruhda sizga yordam beradi kinish xabarni ch...
10    (xodamlar javobni topa olishmayapti topa olasi...
11    (qaysi oyda ilgansiz ulgan oyingiz ustiga bosi...
Name: text, dtype: object0                  (, unk)
1    (poraduite m

  3%|▎         | 34/1071 [00:02<00:35, 28.88it/s]

1                             (bukval, lt)
3                                  (, unk)
4                                  (, unk)
5                                  (, unk)
7                                  (, unk)
8    (nochnoe psygansu pachka sigaret, sv)
9                                  (, unk)
Name: text, dtype: object


pairs
pairs
 --- valid pairs: 10, invalid pairs: 00    (, unk)
Name: text, dtype: object pairs--- valid pairs: 0, invalid pairs: 0
--- valid pairs: 5, invalid pairs: 0pairs
pairs --- valid pairs: 3, invalid pairs: 0
 0    (, unk)
1     (, en)
2     (, en)
3    (, unk)
4    (, unk)
Name: text, dtype: object
 Series([], Name: text, dtype: object)

pairs 0                     (, unk)
1    (chuvstvuesh zapakh, cs)
2        (tualet nashiol, sq)
3                     (, unk)
4                     (, unk)
Name: text, dtype: object--- valid pairs: 5, invalid pairs: 00    (kazhetsia chto poslednie srednii vyros, sk)
1                                         (, unk)
2     

  4%|▍         | 43/1071 [00:02<00:25, 39.63it/s]

--- valid pairs: 11, invalid pairs: 0 
--- valid pairs: 11, invalid pairs: 0
--- valid pairs: 2, invalid pairs: 0

pairs pairs 
0     (breaking news obtained previously hidden vide...
1                                       (boom said, en)
3     (trump room unforgivable fact biden families m...
4     (vivek ramaswamy says trump told time gave hea...
5     (relevant data fintel showing astonishingly hu...
6     (company shorted million shares attempted assa...
7     (african children play assassination attempt t...
8     (gaza seen disasters witnessed wonderful ameri...
9     (shocking evidence bidendeath covered white ho...
10           (people killed wounded israelwar gaza, en)
11    (according philippe lazzarini commissioner gen...
Name: text, dtype: objectpairs--- valid pairs: 2, invalid pairs: 0
 
0                     (, unk)
1           (niggas know, en)
2                 (sizhu, sq)
3                     (, unk)
4        (liubimyi kanal, sw)
5     (kvantovaia fizika, lt)
6      

  5%|▌         | 54/1071 [00:02<00:20, 49.37it/s]

 0              (podkrovatnyi chikiriau ddddddddddd, cy)
1     (vsem priiatnogo appetita khoroshego dnia robe...
2                                               (, unk)
6     (esli umyshlennoe ubiistvo degree murder schit...
7                                               (, unk)
8                                      (tupo menia, sk)
9                                               (, unk)
10                                              (, unk)
11                                    (open geimer, no)
Name: text, dtype: object
--- valid pairs: 8, invalid pairs: 0
pairs 0                                               (, unk)
10    (lakonizm faktury odnom predmete inter foto me...
11    (utonchennyi svetlyi inter kvartiry dukhe pari...
12    (utonchennyi svetlyi inter kvartiry dukhe pari...
13    (inter prirodnoi gamme iarkikh ottenkov kvarti...
23                                              (, unk)
24                                              (, unk)
25                            (stu

  6%|▌         | 61/1071 [00:03<00:52, 19.26it/s]

--- valid pairs: 21, invalid pairs: 0
pairs --- valid pairs: 125, invalid pairs: 00                                               (, unk)
1                                               (, unk)
2     (zagotovit podosinoviki zimu prostykh sposobov...
3     (spravliat materinskoi ustalost sovety mnogode...
4                                               (, unk)
5     (nosit futbolku dzhinsami sovetov chtoby osvez...
6                                               (, unk)
12    (chemodan ruchki glavnykh priznaka chto otnosh...
13                                              (, unk)
23    (trend estestvennost pereiti clean beauty soit...
24    (ostorozhno trendy poiavliaiutsia pochemu nimi...
25    (endokrinolog raskryla sposoby izbezhat pereed...
26                                              (, unk)
27    (chemodan ruchki glavnykh priznaka chto otnosh...
28    (modnye tsveta oseni odezhde pokoriaiut voobra...
34                                              (, unk)
35    (vremia snizheni

  6%|▌         | 66/1071 [00:04<01:11, 14.14it/s]


pairs 0     (sizlar uchun ajoyib lgan trenddagi toplam end...
1     (xodamlar javobni topa olishmayapti topa olasi...
2     (orinning appaiib turishi zhoningizga tegdimi ...
3                                               (, unk)
4     (uiingizda svet unda muammo uchun azhoiib fona...
                            ...                        
61    (slim body ozdiruvchi aiollar uchun uramimiz u...
62                                              (, unk)
63                                              (, unk)
64    (sovuk kunlar boshlandi kasallikning oiok lard...
65                                              (, unk)
Name: text, Length: 66, dtype: object
--- valid pairs: 53, invalid pairs: 0
pairs 0                                               (, unk)
1     (Gnts ljmy khdhw dwran khdm ldwl bltwfq wltfhm...
2                (ntnyhw blzWkm sytGyWb Gdan lHkwm, cy)
3     (nshr lnwb lTyby wywsf ltqw rdny lmlk lthny wn...
5     (tDmn yDan tshylt llHjj tmryn lmmlk rdny wtHsy...
6     (rdny khl

  7%|▋         | 70/1071 [00:04<01:10, 14.22it/s]

--- valid pairs: 118, invalid pairs: 0
pairs 0                                                (, unk)
1      (harris whale alert value bought xdtxn holder ...
2                                                (, unk)
3      (harris whale alert value bought xbatxn positi...
4      (harris whale alert value bought xbatxn positi...
                             ...                        
113    (harris kamala xbatxn position market chart tr...
114    (harris whale alert value bought xbatxn positi...
115    (harris whale alert value bought ftxn position...
116    (harris kamala holder market chart trade trend...
117    (announcement distribution tokens holders reve...
Name: text, Length: 118, dtype: object
--- valid pairs: 1, invalid pairs: 0
pairs 0    (poll showing america doesn want grumpy trump ...
Name: text, dtype: object--- valid pairs: 120, invalid pairs: 0

pairs 0      (bHth jdyd lbrwfyswr kyrf lrbT lsry lsysy yuZh...
1      (bldy lmGr lrdyw nfSl bldy lmGr mntd lslTt ldr...
2     

  7%|▋         | 73/1071 [00:04<01:10, 14.11it/s]

--- valid pairs: 1, invalid pairs: 0
--- valid pairs: 100, invalid pairs: 0pairs
 pairs0    (, unk)
Name: text, dtype: object 
0                (lbyt skry lsry lmHdwdfy jnwb lbnn, cy)
1      (ljysh lsry ynshr mshhd tHDyr lqwt lljtyH lbry...
2                      (lSbH lmjtm Hyan ldhkr ktwbr, cy)
3                          (Sfrt ndhr mnTqty lshrwn, cy)
4                                                (, unk)
                             ...                        
96                                                (, no)
97     (ljysh lsry mqtl jndyyn lshml hjwm msyWr akhry...
98              (kbyr lmwTnyn wSlthm hdhh lrsy khyr, cy)
99     (bydn ynSH lnan mhjm lmnshat lnfTy lyrnywqblh ...
100    (wlwn mrykywn bydn tHSl rGbth mhjm lmnshat lnw...
Name: text, Length: 100, dtype: object
--- valid pairs: 93, invalid pairs: 0--- valid pairs: 2, invalid pairs: 0
pairs --- valid pairs: 4, invalid pairs: 00               (, sw)
1    (nikki haley, sw)
Name: text, dtype: object

pairs 
pairs0    (

  7%|▋         | 78/1071 [00:04<00:55, 17.85it/s]

--- valid pairs: 224, invalid pairs: 0
pairs 0                   (visit biden trump texas border, en)
1      (tucker carlson worst people military sense pr...
2      (police find starved bitten year girl initiall...
3      (transgender pentagon official share pronouns ...
4        (going viral avoiding lgbtq painted stairs, en)
                             ...                        
299                            (funniest video seen, en)
300    (oregon gets sawed half foot blade comes spinn...
301    (palestine protesters easter vigil mass patric...
302                                          (medal, en)
303    (bibles burned easter sunday tennessee trailer...
Name: text, Length: 224, dtype: object
--- valid pairs: 4, invalid pairs: 0
pairs --- valid pairs: 5, invalid pairs: 00                                               (, sw)
1                                     (california, en)
2                                               (, en)
3    (holland verry supporting israel honest 

  8%|▊         | 83/1071 [00:04<00:44, 22.00it/s]

pairs 0                           (welcome erik, nl)
1    (nikki haley great leader whitehouse, en)
2                       (talk nikki haley, so)
Name: text, dtype: object
--- valid pairs: 1, invalid pairs: 0
pairs 1    (models videos, ca)
Name: text, dtype: object
--- valid pairs: 4, invalid pairs: 0
pairs 0    (quiz maga right winger penned following quote...
1                                              (, unk)
2    (think deserve president united states going p...
3    (zealand high income countries allow unrestric...
Name: text, dtype: object
--- valid pairs: 15, invalid pairs: 0
pairs 0     (sure recipe years financial moral bankruptcy ...
1     (mark zuckerberg pressure censor leak theory c...
2     (republican senators wrote president biden dem...
3     (republican senator signed letter democrats ca...
4     (storm clouds gathering american economy small...
5     (franklin roosevelt famously said thing fear r...
6     (abortion notoriously divisive issue america a...
7      (

  8%|▊         | 87/1071 [00:05<00:47, 20.85it/s]

--- valid pairs: 66, invalid pairs: 0
pairs 0     (tqdym mdyn bthm ltkhTyT lDrm lnyrn shrT wmSn ...
1     (mnSwr llsfyr mryky khll lywm lwsy lHrb bldwl ...
2      (ldqyq khyrtyn tnfy rsmyan ltwSl tbdl Hlyan, cy)
3                             (lbyd ySwt lSlH ksyf, cy)
4                                               (, unk)
                            ...                        
64    (lstntjt lntkhbt Glby lsysy fqdt qdrth Hshd lm...
65    (ydan lnjHt lsysy lntkhbt lmHlyhnk yDan njHt l...
66                 (ntnyhw yrfD lqbwl bntkhbt mbkr, cy)
67    (mnSwr khll zyrth llthny skhnyn lmwHd trshWH G...
68                                 (wtsl yDan bydn, cy)
Name: text, Length: 66, dtype: object
--- valid pairs: 5, invalid pairs: 0
pairs 0    (nikki haley haley ccyn qjutxn holder market c...
1                                              (, unk)
2                                              (, unk)
3    (nikki haley haley hknfgq ygoc position market...
4                                      

  8%|▊         | 91/1071 [00:05<00:43, 22.51it/s]

pairs 0     (recovering america groundbreaking addiction r...
1     (facing time history vitriolic poisonous ameri...
2     (father robert kennedy assassinated years toda...
3     (officials profiting policies pushes scientist...
4     (ranks conspiracy theorists include ninth circ...
5     (national true cost living coalition study mid...
6     (huge news caught president biden utah second ...
7     (breaking kansas sues pfizer misrepresentation...
8     (says ukraine inside russia forces attack bord...
9     (week senatehomeland security committee finall...
10    (word secrecy repugnant free open society peop...
Name: text, dtype: object
--- valid pairs: 241, invalid pairs: 0
pairs 3      (goodie bags folks advantage price price whale...
6      (thoughts highly recommend buying thoughts tim...
7      (found group trying find migrate trust wallet ...
10     (people selling sell regret sure said brought ...
14                      (airdrop rewards going guys, en)
                      

  9%|▉         | 97/1071 [00:05<00:34, 28.62it/s]

pairs --- valid pairs: 97, invalid pairs: 0
0     (president east palestine ohio toxic train der...
1     (clear americans want break free party system ...
2     (breaking members named committee advises cent...
3     (president biden says wants dark money politic...
4     (president clean cesspool corruption force pub...
5     (support successful independent campaign decad...
6     (breaking brazilex president bolsonaro indicte...
7     (works military contractors providing steady p...
8                                               (, unk)
9     (breaking kennedy welcomes nicole shanahan run...
10    (mass poisoned food bill gatesplan typical ped...
Name: text, dtype: object--- valid pairs: 264, invalid pairs: 0
pairspairs  
--- valid pairs: 215, invalid pairs: 0
pairs 0     (aiolini adrlaidigan erkaklar uchun makhsus ch...
1     (endi ming funktsiialik braslet ozish uchun or...
2     (sizlar uchun azhoiib bulgan khil trenddagi to...
3     (qaysi oyda ilgansiz ulgan oyingiz ustiga bo

 10%|▉         | 102/1071 [00:05<00:30, 32.06it/s]

 pairs
  pairs 0     (radical shutting country process pushing oper...
1     (attorney representing east palestine victims ...
2     (senate bill disaster spent proxy ukraine requ...
3     (seek republican answer democratic answer righ...
4     (president bidencognitive competence comes att...
5     (mass poisoned americans didn suddenly lazy sa...
6     (congressional advisory committee voted favor ...
7     (leave thoughts power government takes people ...
8     (vaccine gold rush pharma giving babies hepati...
9     (warns global health takeover says pandemic tr...
10    (biden administration claim julian assange con...
11    (grew white house definitely career politician...
12    (building bases ukraine russian border startin...
Name: text, dtype: object
--- valid pairs: 90, invalid pairs: 0pairs 
pairs --- valid pairs: 40, invalid pairs: 0
0      (ldkhly lbrlmny Sdqt ntkhbt lslTt lmHly ltrykh...
1      (wlyd lnqsh lHlwl lmqtrH llslTt lmHly wlty tsh...
2                          (G

 11%|█         | 113/1071 [00:05<00:19, 48.13it/s]

--- valid pairs: 10, invalid pairs: 0--- valid pairs: 94, invalid pairs: 0
pairs
pairs
 pairs 0    (important payment ready received payment bank...
1    (live stream boom alina habba donald trump fac...
2    (president east palestine ohio toxic train der...
3    (clear americans want break free party system ...
4    (president biden says wants dark money politic...
5    (president clean cesspool corruption force pub...
6    (breaking brazilex president bolsonaro indicte...
7                                 (robert kennedy, da)
8    (gibson released documentary diddy black epste...
9    (breaking kennedy welcomes nicole shanahan run...
Name: text, dtype: object 
0     (kipriklarni maksimal darazhada ustirib beruvc...
1                                               (, unk)
2     (abar millioner bulishingiz mumkin odamlar not...
3                                               (, unk)
4                                               (, unk)
                            ...                  

 11%|█▏        | 122/1071 [00:05<00:16, 57.48it/s]

 
pairs0     (world economic forum billionaires boys club a...
1     (people renounced racial hatred gender hatred ...
2     (fact vaccines recommends routine injection ch...
3     (media openly discussing idea banning social m...
4     (breaking robert kennedy canceled iowa campaig...
5     (found price wheat right bushel loaf bread cos...
6     (corporate media branded conspiracy theorist s...
7     (fauci says covid infection vaccinated boosted...
8     (harrisdemocratic party unrecognizable father ...
9     (astrazeneca vaccine contains monkey channel, en)
10    (imagine mexico invaded texas support russian ...
11    (clever real estate survey americans skipped m...
12    (watch brought insane lady said quote vaccine ...
13    (immunization agenda immunization agenda aims ...
14    (covid pandemic fundamentally change relations...
15    (going address nation friday share livestream ...
16    (trump rally tomorrow arizona dropping endorsi...
17    (robert kennedy caption unite ameri

 13%|█▎        | 135/1071 [00:05<00:12, 75.49it/s]

pairs --- valid pairs: 16, invalid pairs: 0--- valid pairs: 2, invalid pairs: 0

pairs0                                                (, unk)
1      (rasskazyvaiut dizainery nestandartnykh priemo...
2      (ochen krasivaia kollektsiia stekla peizazhi a...
4      (kollektsiia bokalov glass sonny seletti vyduv...
5      (roskosh zakaz mebel marketri solomki novaia k...
                             ...                        
162                                              (, unk)
163    (inter vdokhnovlennyi tvorchestvom kloda mone ...
173    (uiutnyi osobniak podmoskov dlia gostepriimnoi...
174    (ispanskii dizainerskii brend barcelona sotrud...
176        (etnika deluxe foto mebel outdoor baxter, no)
Name: text, Length: 74, dtype: object
 pairs--- valid pairs: 298, invalid pairs: 0 --- valid pairs: 6, invalid pairs: 0--- valid pairs: 4, invalid pairs: 0
--- valid pairs: 6, invalid pairs: 0pairs


pairs  pairspairs0    (leave thoughts power government takes people ...
1    (vaccine g

 14%|█▎        | 145/1071 [00:05<00:11, 81.71it/s]


--- valid pairs: 3, invalid pairs: 0--- valid pairs: 10, invalid pairs: 0
0     (planned long game weaponization amendment app...
1     (happy year grateful journey channel think rig...
2     (official utahballot huge milestone campaign w...
3     (close nuclear today cuban missile crisis lead...
4     (thanks phoenix packing house feel love join, en)
5     (handle immigrants compassion humanity favor i...
6     (letdifferences aside unite protect natural wo...
7     (time chart course future nationmilitary forei...
8     (matters worse virus leaked product funded bio...
9                            (support channel join, fr)
10    (uncle john kennedy ardent supporter arts famo...
Name: text, dtype: object

pairs pairs 
0    (important payment ready received payment bank...
1    (live stream boom alina habba donald trump fac...
2    (imperative people aware happen entering final...
Name: text, dtype: object0    (epstein list public releasing massive reports...
1    (international fede

 15%|█▌        | 164/1071 [00:06<00:08, 111.12it/s]

--- valid pairs: 19, invalid pairs: 0
pairs pairs0     (shocking revelation exposes pharmasecret moti...
1     (unfortunately announce today million livpure ...
2     (recovering america groundbreaking addiction r...
3     (sorry chance livpure company stock able produ...
4     (facing time history vitriolic poisonous ameri...
5     (attention melania trumpweight loss secret rev...
6     (father robert kennedy assassinated years toda...
7     (bombshell read carefully focus survive know w...
8     (today memorial americans honor soldiers falle...
9     (biggest pharmaceutical scandal alert mainstre...
10    (national true cost living coalition study mid...
11    (unlock secret optimal health livpure ultimate...
12    (breaking kansas sues pfizer misrepresentation...
13    (embark transformative journey dream body pure...
14    (week senatehomeland security committee finall...
15    (genius powers right fact nasa study found cas...
16    (important healthy person means easy fact acce...

 16%|█▋        | 176/1071 [00:06<00:09, 95.28it/s] 

0    (cornel west sean hannity heated debate oustin...
Name: text, dtype: object--- valid pairs: 22, invalid pairs: 0


--- valid pairs: 16, invalid pairs: 0pairs
0     (shocking expose judge arthur engoron caught m...
1     (president trump president jail cell people el...
2     (payment notification dear patriots received r...
3     (final patriots message holders received payou...
4     (congratulations paid dear patriot cashout com...
5     (huge donald trump tucker carlson going live h...
6     (payment ready received payment bank america h...
7     (salvadorpresident nayib bukele joined telegra...
8     (payment arrived large recorded bank account a...
9     (today truth social voters trust trump biden b...
11    (gibson leaks ritual room hidden camera hillar...
12    (storm century coming people need know coming ...
13    (dear patriots light recent events compelled w...
14    (payment ready received payment bank america h...
15    (dear friends writing immense gratitude heart .

 18%|█▊        | 194/1071 [00:06<00:07, 114.32it/s]

 --- valid pairs: 41, invalid pairs: 0pairs --- valid pairs: 3, invalid pairs: 0pairs0     (happy year thanks support beating presidents ...
1     (interview inside texas politics explain vital...
2     (patrick david asks candidacy tipping scales b...
3     (real income years real wages means collecting...
4     (official utahballot huge milestone campaign w...
5                                                (, no)
6                                               (, unk)
7     (people understand ukraine means business weap...
8     (race need little votes achieve currently need...
9     (president biden doesn debating governing coun...
10    (joseph mercola discuss canadian banks collude...
11    (president trump running american people deser...
12         (thanks phoenix packing house feel love, en)
13    (lists powerful healing substance schedule mea...
14    (george magazine asked think need aligned uncl...
15    (decline life expectancy symptom thing poverty...
16    (wonderful ta

 20%|██        | 218/1071 [00:06<00:05, 144.23it/s]

 --- valid pairs: 3, invalid pairs: 00    (scam project, hr)
1    (starts fraud, en)
2               (, unk)
3               (, unk)
Name: text, dtype: object0                                              (, unk)
1    (menia zaebalo vremia meshat govnozhuia govnom...
3                                     (niro party, lt)
4                  (tozhe prikolu koff prikhodiat, sl)
5                                              (, unk)
7                                              (, unk)
8                    (reaktsii nakidaite pidarmot, et)
Name: text, dtype: object--- valid pairs: 14, invalid pairs: 0



pairs--- valid pairs: 39, invalid pairs: 0 
pairs --- valid pairs: 336, invalid pairs: 0pairs0                                         (hcoming, en)
2                                          (fizike, sl)
3                                        (actually, en)
6                                      (means kill, fi)
9                                               (, unk)
10                

 22%|██▏       | 234/1071 [00:06<00:07, 108.43it/s]

--- valid pairs: 56, invalid pairs: 0--- valid pairs: 54, invalid pairs: 0
pairs--- valid pairs: 214, invalid pairs: 0
--- valid pairs: 7, invalid pairs: 0 
pairs
 pairs --- valid pairs: 8, invalid pairs: 0
pairs0                                           (news, en)
1    (welcome bidencash warmly welcome bidencash fi...
2    (replenished cards cheap price section remind ...
3    (replenished cards cheap price section remind ...
4    (replenished cards cheap price section remind ...
5                          (work domains restored, en)
6    (replenished cards cheap price section remind ...
Name: text, dtype: object 0     (icymi maricopa county board supervisors publi...
1     (resign days presented writ warrento waiver to...
2     (notice agent notice principal formally served...
3     (fuck live thursday night march live response ...
4     (sweden ends centuries neutrality officially j...
5     (dollarization progressing forward rapidly rus...
6     (icymi ntsb chair jennifer homendy 

 23%|██▎       | 247/1071 [00:06<00:08, 91.69it/s] 


0     (bidenweth price price weth uniswap change vol...
1     (great waiting information start token distrib...
2     (looking forward giveaway understand correctly...
3               (guys good news distribution today, en)
4                             (rumor allocate away, en)
7                  (guys tell participate giveaway, en)
9     (balance vote token welcome participate giveaw...
12    (know giveaways biggest token holders money sm...
13                   (know event starts sure taken, en)
14                                              (, unk)
15                                              (, unk)
18                          (dont twitter telegram, en)
19     (announcement posted twitter telegram admin, en)
20    (wonder giveaway affect course sponsored inves...
21                                              (, unk)
24    (great news bidencoin community project listed...
25                         (thats work vote reward, en)
28                  (crazy rewards definitely m

 24%|██▍       | 258/1071 [00:07<00:14, 55.50it/s]

--- valid pairs: 92, invalid pairs: 0
pairs 0                                                (, unk)
3      (washington post confirmed fact assisting ukra...
5      (therepublicansvoice soross billion woke empir...
6      (patriots drunk hymns drums gaza palestinian c...
8      (debt clock project money creation scam wizard...
                             ...                        
101                         (interesting realsganon, en)
102                                              (, unk)
103    (love elonattitude freakin deviant elitist rob...
104    (icymi elon musk tells advertisers specificall...
105    (normies ridiculous harness control weather de...
Name: text, Length: 92, dtype: object--- valid pairs: 88, invalid pairs: 0
pairs 0      (congratulations anons called according plan f...
1      (hello crown corporate judiciary insurrectioni...
5      (military bases bulgaria italy romania germany...
7      (nature presidential power entitles president ...
8      (biden promin

 25%|██▍       | 267/1071 [00:08<00:25, 31.88it/s]

--- valid pairs: 1, invalid pairs: 0
pairs --- valid pairs: 122, invalid pairs: 00    (dating dating telegram find crush places love...
Name: text, dtype: object

pairs 0                           (folks happy year potus, en)
1      (congresswoman eddie bernice johnson dedicated...
2      (administration cancelled student debt million...
3      (administration leading delivering ambitious c...
4      (prices america declined average peak decrease...
                             ...                        
117    (billion infrastructure funds help rebuild bri...
118    (chris thanks young folks engaged government p...
119    (time hardworking americans little breathing r...
120                                 (optimism potus, lt)
121    (jean carnahan devoted public servant woman re...
Name: text, Length: 122, dtype: object
--- valid pairs: 169, invalid pairs: 0
pairs 0      (today welcomed prime minister croo belgium wh...
1      (generations lgbtqi americans summoned courage...
2     

 26%|██▌       | 274/1071 [00:08<00:35, 22.32it/s]

--- valid pairs: 183, invalid pairs: 0
pairs --- valid pairs: 49, invalid pairs: 0
0      (today learned annual inflation fell october l...
1      (today president biden president joao manuel g...
2       (rockin national christmas tree white house, en)
3      (world aids biden harris administration recomm...
4      (capping insulin costs month seniors medicare ...
                             ...                        
178    (favorite photos year white house white house,...
179                                    (white house, en)
180                                    (white house, en)
181                   (year white house white house, en)
182    (year cranes going shovels hitting ground amer...
Name: text, Length: 183, dtype: object
pairs --- valid pairs: 200, invalid pairs: 0
pairs 0      (week proud unveil visitor center stonewall na...
1      (today administration extending overtime prote...
2      (reports shown progress lowering inflation kno...
3      (time america better d

 26%|██▌       | 279/1071 [00:09<00:37, 20.94it/s]

--- valid pairs: 210, invalid pairs: 0--- valid pairs: 164, invalid pairs: 0
pairs
 0      (worked tirelessly bring hersh home jill heart...
1         (build economy works working people potus, en)
2      (americasupport unions higher today time nearl...
3      (administration delivered significant investme...
4      (took office recovered jobs lost pandemic adde...
                             ...                        
205    (families southeast appalachia left reeling he...
206    (directed team provide available federal resou...
207    (committed traveling communities impacted hurr...
208    (directed team provide available resource fast...
209    (wednesday travel north carolina briefing stat...
Name: text, Length: 210, dtype: object
pairs --- valid pairs: 220, invalid pairs: 0
0      (project ahead schedule coming soon whistleblo...
1      (reports major european airlines actively canc...
2      (late spring year nancy hanks mcadden family v...
3      (file trump assassination f

 26%|██▋       | 283/1071 [00:09<00:37, 21.15it/s]

--- valid pairs: 131, invalid pairs: 0
pairs 0                                                (, unk)
1      (dorogaia chitatel nitsa chemodannoe nastroeni...
2                                                (, unk)
3      (deti vzgliady glazok boiazlivykh nozhek shalo...
4      (zashchity detei laifkhaki vospitaniiu zvezd s...
                             ...                        
220    (dorogaia chitatel nitsa etot sovershenno letn...
221    (ezhevika zhimolost kakaia letniaia iagoda zna...
222    (prokhodili vypusknye zvezd samye nezabyvaemye...
223                                              (, unk)
225    (tekh vechno shoke chto takoe affektatsiia pro...
Name: text, Length: 131, dtype: object
--- valid pairs: 142, invalid pairs: 0
pairs 0      (anastasiia nemoliaeva biografiia lichnaia zhi...
1                                                (, unk)
2      (sdelat krasivuiu letniuiu prichesku prostoi l...
3                                                (, unk)
4      (slozhila

 27%|██▋       | 287/1071 [00:09<00:44, 17.44it/s]

--- valid pairs: 609, invalid pairs: 0
pairs 0      (design biden admin flying migrants deported t...
2      (trump senior advisor good supreme court recog...
3      (biden influencer fantasizes murdering supreme...
4      (trump attorney think best decision expected s...
5      (democrats blown meltdown president trumpdomin...
                             ...                        
715    (november american people going reject kamala ...
716    (astoundingly phony moments speech night kamal...
717    (president trump press corrupt gave kamala tra...
718    (kamala harris sponsored bernie sanders trilli...
719    (letexamine record border czar harris inherite...
Name: text, Length: 609, dtype: object
--- valid pairs: 68, invalid pairs: 0
pairs --- valid pairs: 150, invalid pairs: 00                      (years delta november storm, en)
1       (heroes know country free mind satan world, en)
2     (regardless says publicly trump affirmed worki...
3     (corrupt powers governments bear 

 27%|██▋       | 290/1071 [00:09<00:49, 15.77it/s]

--- valid pairs: 152, invalid pairs: 0
pairs 0                                                (, unk)
1      (segodnia otmechaetsia koshek prisylaite komme...
2      (vesti sebia rabote chtoby tebia uvolili vredn...
3                                                (, unk)
4                                                (, unk)
                             ...                        
259    (turtsii snimut adaptatsiiu kholopa zvezdoi se...
260    (turetskikh akterov kotorye prikhodiatsia drug...
261                                              (, unk)
262    (pevitsa natali seichas chem zanimaetsia vygli...
263    (massivnye krossovki vesnu trendam goda prishl...
Name: text, Length: 152, dtype: object
--- valid pairs: 156, invalid pairs: 0
pairs 0                                                (, unk)
1      (paskhal desert gnezda ingredienty solomka sho...
2      (vesiolyi prazdnik pervomaiskii selen zaglianu...
3      (dlia ideal raboty kishechnika produktov kotor...
4               

 27%|██▋       | 293/1071 [00:10<01:14, 10.41it/s]

--- valid pairs: 73, invalid pairs: 0
pairs 0     (action posse earlyvoteaction christians hunte...
1     (breaking president trump announced military m...
2                            (dash dash continuous, en)
3     (biden harris admin caught handed turning migr...
4                             (boss president live, en)
                            ...                        
68                                              (, unk)
69    (wake wake wake recent concert texas creedscot...
70                           (boss subscribe share, en)
71                                               (, af)
72                                     (jesus amen, af)
Name: text, Length: 73, dtype: object
--- valid pairs: 547, invalid pairs: 0
pairs 0      (khyr ljysh lsry ntshl jthth lmkhtTfyn ylyyn u...
1      (lkbnyt syjtm Gfyr yTlb jylh llmshrk bjnzt lsh...
2               (uthr jthth rfDt stqbl mklmt ntnyhw, cy)
3      (ntnyhw skrtyrh skry rwmn Gwfmn mwskw jhwd lmk...
4      (Gdan lDrb lmrfq lqtSd

 28%|██▊       | 295/1071 [00:11<01:25,  9.05it/s]

--- valid pairs: 173, invalid pairs: 0
pairs 0      (ostromodnykh trendov manikiura pedikiura laif...
1                                                (, unk)
2                                                (, unk)
3      (turetskie serialy luchshie novinki oseni lisa...
4      (kakie pravila nuzhno sobliudat posle ukolov k...
                             ...                        
227                                              (, unk)
228    (pogone pamiat zvezdy kotorye stradali dements...
229                                              (, unk)
230    (modnyi goroskop chem vstrechat novyi kazhdomu...
231    (zaliubili smerti zvezd kotorykh ubili sobstve...
Name: text, Length: 173, dtype: object--- valid pairs: 163, invalid pairs: 0

pairs 0      (knew warned silenced join share codezxvii pri...
1      (bless letwin state share join share codezxvii...
2      (scotland knows join share codezxvii private, en)
3      (president trump announced campaign leading hi...
4      (friend p

 28%|██▊       | 297/1071 [00:11<01:18,  9.83it/s]

--- valid pairs: 3, invalid pairs: 0--- valid pairs: 1, invalid pairs: 0
pairs
 pairs 0    (safu long term project diamond hands, en)
Name: text, dtype: object0    (trumpcto protected click verify human, en)
1                            (bullish trump, sq)
2                  (website live trumponsol, en)
Name: text, dtype: object
--- valid pairs: 286, invalid pairs: 0
pairs 
0      (sounds years late administration screwed thin...
1      (administration naloxone life saving medicatio...
2      (national security memorandum enable governmen...
3      (ought able child flight paying today administ...
4                                                (, unk)
                             ...                        
284    (today grieve families friends lost overdose t...
285    (watch responsibly increased production meet i...
286    (gaslight lied america years goodwill trust sc...
287                (time years happened watch comes, en)
288                                           (luis,

 28%|██▊       | 301/1071 [00:11<01:03, 12.05it/s]

--- valid pairs: 472, invalid pairs: 0
pairs 0      (today welcomed prime minister croo belgium wh...
1                                                (, unk)
2      (dear biden clear hamas government palestine d...
3      (generations lgbtqi americans summoned courage...
4      (entire lgbtqi community especially young tran...
                             ...                        
485                                              (, unk)
486    (republican elected officials fought block ori...
487                                              (, unk)
488    (dominos pizzahut papajohns dutch bros orders ...
489         (thank helping ukraine taiwan palestine, en)
Name: text, Length: 472, dtype: object
--- valid pairs: 197, invalid pairs: 0
pairs --- valid pairs: 450, invalid pairs: 00      (good morning patriots bless america world req...
1      (ridiculous jury found president trump guilty ...
2                (guilty counts surprised game join, en)
3      (devices today received pres

 28%|██▊       | 303/1071 [00:11<01:03, 12.11it/s]

pairs 0      (tired announcement liar trump fake news presi...
1      (generations asian americans native hawaiians ...
2                                     (simple potus, fr)
3                                                (, unk)
4                                                 (, sw)
                             ...                        
455    (israel offered roadmap enduring ceasefire rel...
456    (proposal gaza begins complete ceasefire withd...
457    (decisive moment israel proposal hamas says wa...
458    (tune deliver remarks super bowl champion kans...
459    (decisive moment time raise voices demand hama...
Name: text, Length: 450, dtype: object
--- valid pairs: 310, invalid pairs: 0
pairs 0      (experience intersection politics crypto biden...
1      (bites dust banned berny reason automated bloc...
2                                       (rainy days, cy)
3      (welcome coinmarketcap price enter contract ad...
4      (bites dust banned benuel reason automated blo..

 28%|██▊       | 305/1071 [00:11<00:58, 13.06it/s]

--- valid pairs: 116, invalid pairs: 0
pairs --- valid pairs: 393, invalid pairs: 0
0                   (tell real reason ports closing, en)
1                                                 (, af)
2      (chair lara trump says volunteers poll watcher...
3      (breaking russian president vladimir putin ret...
4                                           (genius, af)
                             ...                        
111    (fema blocking starlink shipments people offli...
112    (allowing years impeachable treason join teleg...
113    (romans invisible attributes eternal power div...
114                                     (share gabi, en)
115    (veterans love trump powerful display respect ...
Name: text, Length: 116, dtype: objectpairs
 --- valid pairs: 611, invalid pairs: 00      (company known company stock market years join...
1      (house representatives traitor supposed ruled ...
2      (join lady south lawn white house easter roll ...
3      (jill spoke friend morning m

 29%|██▊       | 307/1071 [00:11<00:57, 13.22it/s]

 0      (rinoplastika veshchei kotorye nuzhno znat per...
1                                                (, unk)
2                                                (, unk)
3      (lunnyi goroskop dlia vsekh znakov zodiaka ian...
4                                                (, unk)
                             ...                        
306    (novoe testirovanie proekta probu priglashaem ...
307                                              (, unk)
314                                              (, unk)
315    (chto budete zakazyvat selebriti kotorye svoeg...
316    (godu zhizni umerla brazil skaia aktrisa zhand...
Name: text, Length: 185, dtype: object
0      (revenue share active claimed, en)
1      (revenue share active claimed, en)
12     (revenue share active claimed, en)
13     (revenue share active claimed, en)
14     (revenue share active claimed, en)
                      ...                
893                      (going moon, en)
898        (revenue share live claim, e

 29%|██▉       | 310/1071 [00:12<01:17,  9.85it/s]

--- valid pairs: 395, invalid pairs: 0
pairs 0      (opener launch know rugged unknown team solana...
1      (ready launch coming drop launch seconds token...
2      (experience intersection politics crypto biden...
3      (experience intersection politics crypto biden...
4      (experience intersection politics crypto biden...
                             ...                        
429    (experience intersection politics crypto biden...
430    (service list twitter follow twitter like twit...
431    (service list twitter follow twitter like twit...
432                                              (, unk)
433    (service list twitter follow twitter like twit...
Name: text, Length: 395, dtype: object
--- valid pairs: 12, invalid pairs: 0
pairs 0                                               (, unk)
1                                               (, unk)
2                                               (, unk)
3                                               (, unk)
5     (Mang Guang Xia

 29%|██▉       | 312/1071 [00:12<01:27,  8.66it/s]

--- valid pairs: 341, invalid pairs: 0
pairs 0      (kennedy trump ticket future proves past trust...
1            (remember remember november join storm, en)
2                                        (subscribe, ro)
3      (happy year rise unite fight bless america wwg...
4              (happy year everybody wish best love, en)
                             ...                        
374    (remember eric trump posted message giant wwgw...
375    (pizza instagram pedophile code words identifi...
376    (mjmind control break matrix hopelessly unawar...
377    (world shifting multipolarity adapt thrive par...
378    (Obama False Flag event sinister disrupt elect...
Name: text, Length: 341, dtype: object
--- valid pairs: 336, invalid pairs: 0
pairs --- valid pairs: 189, invalid pairs: 0
0      (narrow ways narrow paths narrow alleyways waz...
1      (money power control people simply slaves shee...
2      (urgent urgent urgent warrior share list chann...
3          (need patriots witnesse

 29%|██▉       | 314/1071 [00:12<01:44,  7.21it/s]



--- valid pairs: 451, invalid pairs: 0
pairs 0      (location found numbers located marks spot hid...
1                (bqqm bqqm know ncswic pain wwgwga, en)
2      (traitors careful storm awake fellow americans...
3           (channel need request follow today jqin, en)
4         (president trump cover time magazine join, en)
                             ...                        
467    (owing allegiance united states levies adheres...
468    (remember today tomorrow remember tomorrow tod...
469    (alive darkness falls hours join subscribe sha...
470    (return request jqin share channel better tomo...
471    (start creating enforcement local neighborhood...
Name: text, Length: 451, dtype: object
--- valid pairs: 516, invalid pairs: 0
pairs 0      (week proud unveil visitor center stonewall na...
1                                                (, unk)
3      (today administration extending overtime prote...
4                                         (standard, id)
5      (report

 30%|██▉       | 318/1071 [00:13<01:17,  9.73it/s]

--- valid pairs: 136, invalid pairs: 0
pairs 0                                                (, unk)
1                                                (, unk)
2                                                (, unk)
3      (Sheng Zong Tong Chuan Jian Shou Deng Shang Sh...
9      (Xiao Fang Sheng kamalaDa Dang walzZhe Zhong G...
                             ...                        
184    (karine jean pierre said categorically false f...
187    (Tong Xing morgan chase Dong Chang jamie dimon...
189    (Liao Zong Tong Guan Fang Xing Mian Quan dsHe ...
194                                              (, unk)
195                                              (, unk)
Name: text, Length: 136, dtype: object
--- valid pairs: 216, invalid pairs: 0
pairs 0      (trumpbuy spent trumpbuyer holder market dext ...
1      (trumpbuy trumphpwstv ewre holder trumpprice m...
2      (trumpbuy spent trumpbuyer holder market dext ...
3      (trumpbuy trumpajxgyfvb holder trumpprice mark...
4      (trumpbuy

 30%|██▉       | 320/1071 [00:13<01:42,  7.31it/s]

--- valid pairs: 106, invalid pairs: 0
pairs 0                                                (, unk)
1      (Guang Dong Qiao Duan Diao Xiong Ling Chen Lia...
3      (Cheng Gong Yong Yuan Liao Fang Shang Tian Jin...
4       (Zhou Lian Bang Zhan Jiang Zhong Geng Zhong, id)
5                                                (, unk)
                             ...                        
165    (Rong Jian Chang Tong Xiang Gang Jiang Ying To...
166                                          (Zhong, id)
167                    (Zhong Zhou Zheng Sheng Xiao, de)
170                                              (, unk)
172                                              (, unk)
Name: text, Length: 106, dtype: object


 30%|██▉       | 321/1071 [00:13<02:02,  6.15it/s]

--- valid pairs: 459, invalid pairs: 0
pairs 0      (attention warriors important thing today join...
1      (action green light president evidence posted,...
2          (position available places final mission, fr)
3      (automatic message attention esteemed members ...
4      (unbelievable happen confidental classified cr...
                             ...                        
458    (executive orders effect national emergencies ...
459    (money power control people simply slaves shee...
460    (federal define individual alive dead define f...
461    (save phone quickly banned internet soon sense...
462    (truth fully exposed channels fallout occur de...
Name: text, Length: 459, dtype: object


 30%|███       | 322/1071 [00:14<01:56,  6.41it/s]

--- valid pairs: 482, invalid pairs: 0
pairs 0      (optics people believe court trust plan wwgwga...
1      (forget digital information talking selected h...
2      (children tortured john podesta comet ping pon...
3                                (think guess share, en)
4                        (look holy follow punisher, en)
                             ...                        
500    (thought dead alive thought alive dead dark li...
501    (truth fully exposed channels fallout occur de...
502                               (christ ascension, en)
503               (enou conf cjfz owls good hunting, en)
504    (world health strips stops plandemic trying le...
Name: text, Length: 482, dtype: object
--- valid pairs: 7, invalid pairs: 0
pairs 0    (alyorbek masodiqov introducing alyorbek masod...
1    (bekmurod aniboyev turn attention bekmurod ani...
2    (sardor anvarov introducing sardor anvarov fel...
3    (abdurahmon boboyev presenting abdurahmon bobo...
4    (kamola jalolova intr

 30%|███       | 324/1071 [00:14<01:34,  7.93it/s]


--- valid pairs: 94, invalid pairs: 0pairs
 pairs 0                                                (, unk)
3                                                (, unk)
5      (Sheng Nian Guan Qing Chang Fang Peng Jing Kan...
6      (Gong Chan Dang Qing Zheng Zhong Gong Chan Dan...
7                                   (Huan Ying Dong, id)
                             ...                        
136                                              (, unk)
138    (Chuan Xian Sheng Gong zhenhua Jing Chuan Chan...
139                                              (, unk)
140    (Zhong Quan Zhong Gong Dong Zheng Bian Deng Sh...
141                                              (, unk)
Name: text, Length: 102, dtype: object
1                                          (, unk)
2                                          (, unk)
3                                          (, unk)
4                                          (, unk)
6                                          (, unk)
                          ..

 31%|███       | 329/1071 [00:14<00:51, 14.46it/s]

--- valid pairs: 8, invalid pairs: 0
pairs pairs 0    (statement donald trump president united state...
1    (president donald trump visits trustee offices...
2    (urgent patriotic alert biden kamalas leaked p...
3    (congratulations paid dear patriot cashout tra...
4                                              (, unk)
5    (breaking news trump badges unprecedented dema...
6    (rogan uncovers secret millions trump gold bad...
7    (want strike rich like craig invest trump gold...
Name: text, dtype: object0      (experience intersection politics crypto biden...
1      (experience intersection politics crypto biden...
2                                                (, unk)
3      (biden spent weth bidenbuyer position uniswap ...
4                                                (, unk)
                             ...                        
532    (experience intersection politics crypto biden...
533    (experience intersection politics crypto biden...
534    (kalian pegawai indones

 31%|███       | 333/1071 [00:14<00:46, 15.74it/s]

--- valid pairs: 16, invalid pairs: 0
pairs 0     (biden knew coming point believed come lifetim...
1     (breaking news exposed documents kept secret w...
2     (shocking documentary released biden filmed hi...
3     (coming soon biggest victory prepare minds blo...
4     (biggest scandal american history treason year...
5     (tons photo video evidence proving hunter bide...
6     (find documentvideos channel videodocuments ep...
7     (information public demonstrate criminal corru...
8     (grandparents know happening organized child s...
9     (latest news happening scenes masterful plan h...
10    (footage security cameras building hillary cli...
11    (secret leaked huge needs found bidenoffice hi...
12    (support confidential secret cases exposed pri...
13    (critical alert immediate action required atte...
14    (shocking news releqsed documents trump releas...
15    (slow burn bore internal coup happening ranks ...
Name: text, dtype: object
--- valid pairs: 19, invalid pairs

 31%|███▏      | 336/1071 [00:14<01:02, 11.84it/s]


 0      (knqw reasons disinformation necessary strateg...
1                            (know quiet join storm, en)
2      (year world watched formidable personalities d...
3      (think coincidence anniversary john kennedyass...
5      (truth transparency equal justice drain swamp ...
                             ...                        
491        (code announcement storm coming prepared, en)
492    (promises promises soon kept potus statement a...
493    (stay patriots crowded race higher chance pres...
494    (clash civilizations thesis peoplecultural rel...
495          (president trump gonna stop boss share, en)
Name: text, Length: 470, dtype: object--- valid pairs: 2, invalid pairs: 0
pairs 0    (breaking kamala harris refuses lift trump lib...
1    (hurry sold inside scoop click grab late turn ...
Name: text, dtype: object

--- valid pairs: 6, invalid pairs: 0--- valid pairs: 4, invalid pairs: 0

pairs pairs0    (breaking trumprally shooter allegedly identif...
1    (major f

 32%|███▏      | 340/1071 [00:15<00:54, 13.39it/s]

0                                  (letend friend, en)
2    (news appeals court georgia stayed proceedings...
3    (People Aided Anthony Fauci Coordination Biolo...
Name: text, dtype: object

pairs --- valid pairs: 121, invalid pairs: 0
0     (julian assange started live stream incredible...
2     (wikileaks releases audio recording julian ass...
3     (breaking tucker carlson released video exposi...
4     (read rave review steve bannon insightful powe...
5     (bombshell kamala harris mobile photos leaked ...
                            ...                        
74           (anybody want president united states, en)
75    (victim scam hope scammed despair dedicated ag...
76    (scammed attention scammed lost money trump li...
77           (kamala radical defund police liberal, en)
78    (government gangsters explosive documentary ba...
Name: text, Length: 68, dtype: objectpairs 
1                                                (, unk)
2                                             

 32%|███▏      | 344/1071 [00:15<00:45, 15.89it/s]

--- valid pairs: 9, invalid pairs: 0
pairs 0    (noticed song shall sleep fears trump played s...
1    (notice kamala harris doesn interviews dumb un...
2    (ivanka trump urgent life changing opportunity...
3    (trump liberty badges aren badges game changin...
4               (worth alpha team loves sooo join, en)
5    (breaking news trump badges selling fast secur...
6    (price trump liberty badges soar following tru...
7    (urgent time running trump liberty badges repu...
8    (urgent hours left final opportunity invest fu...
Name: text, dtype: object
--- valid pairs: 173, invalid pairs: 0
pairs 0      (waiting crusades practicing archery sword fig...
1      (Zhou Nian Gang Zhong Zhui Jian Zhen Xiang Xia...
2      (Song Zhong Zhou Nian Peng Ding Kang Xiang Gan...
3      (Zhou Nian temu Gang Cheng Xing Zhong Liao Min...
4      (facebook years sent facebook group hello hong...
                             ...                        
240    (Zhong Gong Quan Wang Pian Shang Gong Zhon

 32%|███▏      | 346/1071 [00:15<00:52, 13.73it/s]

--- valid pairs: 147, invalid pairs: 0
pairs 0                 (Guang Ming Shuo Zhong Zhong Liao, de)
1                                                (, unk)
3      (Guan Lian Guan Lian Huan Ying Dian Guan Chuan...
4      (Ping Xian Cheng Cheng Xiang Dang Feng Jian Li...
7      (Shan Yang Liao Chao Sang Quan Kong Zhou Zhou ...
                             ...                        
198                                              (, unk)
199       (Shan Shan Lian Huan Ying Dian Guan Chuan, id)
200                                              (, unk)
201    (Wang Chuan Dong Qing Yuan Ming Sheng Zhou Zho...
202                                 (Duan Zhua Shen, sw)
Name: text, Length: 147, dtype: object
--- valid pairs: 83, invalid pairs: 0
pairs --- valid pairs: 597, invalid pairs: 0
pairs 0     (huge event shake earth world going messy safe...
1              (best come active miss chance share, en)
2     (automatic message attention esteemed members ...
3     (urgent launched telegram 

 32%|███▏      | 348/1071 [00:15<00:56, 12.90it/s]

--- valid pairs: 507, invalid pairs: 0
pairs 0      (plandemic confirmed soon know approximate dat...
1           (public service announcement join share, en)
2      (turn notifications coming ready storm join sh...
3      (attention patriots important thing today ensu...
4      (showtime things going ugly remember taught co...
                             ...                        
528    (walz finished stolen valor scandal overcome d...
529    (transition rainbow treasury notes financial p...
530    (times forced racist anti american communist s...
531    (mark zuckerberg admitted things biden harris ...
533    (major alert night evil defeated ready whats c...
Name: text, Length: 507, dtype: object
--- valid pairs: 97, invalid pairs: 0
pairs 0      (limited time offer trump liberty badges left ...
1      (exclusive alert rogan revealed podcast drinks...
2      (insane prizes grabs cold hard cash luxury apa...
3      (huge donald trump tucker carlson going live h...
4      (attention

 33%|███▎      | 351/1071 [00:15<00:52, 13.73it/s]

--- valid pairs: 751, invalid pairs: 0
pairs 0              (adorable roxy toddlers slip sandals, en)
1                                                (, unk)
2      (reduced price wireless outdoor security camer...
3                                  (radiator heater, en)
4                (wooden high chair baby high chair, en)
                             ...                        
751                                     (price drop, ro)
752                                     (price drop, ro)
753                                     (price drop, ro)
754                                     (price drop, ro)
755                                          (deals, ca)
Name: text, Length: 751, dtype: object
--- valid pairs: 106, invalid pairs: 0
pairs 0      (challenge accepted president trump vows compa...
2              (president trump care harris selects, en)
3      (ready global currency reset quantum financial...
4      (president trump declares social security simp...
5      (history 

 33%|███▎      | 353/1071 [00:16<01:20,  8.88it/s]

--- valid pairs: 1, invalid pairs: 0
pairs 0    (join live seanhannity radio repthomasmassie c...
Name: text, dtype: object
--- valid pairs: 1, invalid pairs: 0
pairs 0    (aclu wrote bankrolled sweeping abortion amend...
Name: text, dtype: object


 33%|███▎      | 355/1071 [00:16<01:09, 10.29it/s]

--- valid pairs: 1, invalid pairs: 0
pairs 0    (thanks ashleymoodyfl jimmypatronis wiltonsimp...
Name: text, dtype: object
--- valid pairs: 1, invalid pairs: 0
pairs 0    (helping award million county infrastructure i...
Name: text, dtype: object
--- valid pairs: 635, invalid pairs: 0--- valid pairs: 708, invalid pairs: 0

pairs 0                                                (, unk)
1                                      (fuck shivam, en)
2                                 (hello biden sexy, no)
3                               (allah torture hell, en)
4                            (bonjour bonne journee, fr)
                             ...                        
659                                              (, unk)
660    (bipartisan border security deal nation safer ...
661    (inflation lowest nearly years wages wealth jo...
662    (join brownsville texas deliver remarks need p...
663                                              (, unk)
Name: text, Length: 635, dtype: object
pa

 33%|███▎      | 358/1071 [00:16<00:58, 12.10it/s]

--- valid pairs: 6, invalid pairs: 0 

pairs0      (spoke amir thani qatar president sisi egypt c...
1      (battle soul america fronts freedom vote funda...
2                                                (, unk)
3      (border patrol agents enforcement officers asy...
4      (today process decision asylum claim years lon...
                             ...                        
707                                     (think mind, en)
708                  (people world want militarists, en)
709    (today transgender visibility simple message t...
710              (explosion govern mental baby boom, en)
711    (today honor cesar chavez carrying cause dedic...
Name: text, Length: 708, dtype: object
pairs  0    (amendment florida come defeat november elimin...
1    (biden tells trust bureaucracy investigate ass...
2    (happy endorse slate school board candidates p...
3    (bidenflation holding gold americans protect r...
4    (kamala harris complicit massive coverup hide ...
5    (pu

 34%|███▍      | 362/1071 [00:16<00:46, 15.18it/s]


--- valid pairs: 19, invalid pairs: 0--- valid pairs: 1, invalid pairs: 0

pairs 0    (biden puts elderly life woman prison years pr...
Name: text, dtype: objectpairs
 0     (watch live desantis reynolds south carolina s...
1     (defeat elites candidate actually defeated pre...
2     (border persistent problem entire adult life u...
3                     (looks like core convictions, en)
4     (people iowa hardworking fearing patriotic bac...
5     (watch live grimes meet greet chip thomas mass...
6     (instead building wall like promised donald tr...
7             (nikki haley wants believe lyin eyes, en)
8         (agree christie nikki haley going smoked, en)
9     (watch live desantis reynolds deliver remarks ...
10    (donald trump running pursue issues nikki hale...
11    (donald trump gaslight covid lockdowns avoid a...
12    (watch live desantis delivers remarks northsid...
13    (nationrevival starts returning government peo...
14    (thank supporters volunteering iowa caucu

 34%|███▍      | 366/1071 [00:17<00:42, 16.61it/s]

--- valid pairs: 268, invalid pairs: 0
pairs 0      (Shui Feng Shen Zhou Zhou Zong Shen Zhou Zhou ...
1                                                (, unk)
2                                                (, unk)
3                                                (, unk)
4                   (Dong Fang Sheng Zhen Shen Zhou, de)
                             ...                        
307                                              (, unk)
308                                              (, unk)
309                                              (, unk)
310                                              (, unk)
312                                      (Ping Liao, sw)
Name: text, Length: 268, dtype: object
--- valid pairs: 29, invalid pairs: 0


 34%|███▍      | 368/1071 [00:17<00:41, 16.80it/s]

pairs --- valid pairs: 956, invalid pairs: 0
1     (vice president mike pence principled faith wo...
2     (anthony fauci needs held accountable reckonin...
3            (president fight lead americacomeback, en)
4     (biden admin shamefully pressuring israel perm...
5     (hawkeye state enjoyed meeting folks pennydine...
7                                               (, unk)
8                                               (, unk)
9     (thank kimreynoldsia reverse decline country s...
10    (watch live desantis reynolds speak meet greet...
11    (florida culture winning leading delivering re...
12    (race future president fight lead countryreviv...
13    (scott strong conservative bold ideas country ...
14    (founders understood religious liberty given g...
15    (casey stand america attended todayhistoric ma...
16                                              (, unk)
17    (biden content manage americadecline confront ...
18    (heard kimreynoldsia iowans follow lead caucus...
19 

 35%|███▍      | 371/1071 [00:17<00:47, 14.88it/s]

--- valid pairs: 886, invalid pairs: 0
pairs 0       (round french parliamentary elections right wi...
1       (national rally reform fake nationalist partie...
2       (quick google search david rachline vice presi...
3                                    (wwallies china, en)
4                                 (nationalists ally, en)
                              ...                        
1012    (algerian tranny banned boxing world champions...
1014                              (karma corner guys, en)
1016       (news pushing military brought psyop play, en)
1017                                          (photo, sk)
1018         (plans largest building aucklandskyline, en)
Name: text, Length: 886, dtype: object
--- valid pairs: 2, invalid pairs: 0
pairs 0    (york city mayor eric adams indicted federal c...
1    (alert trump released documentation assassinat...
Name: text, dtype: object


 35%|███▍      | 373/1071 [00:17<01:09, 10.06it/s]

--- valid pairs: 56, invalid pairs: 0--- valid pairs: 67, invalid pairs: 0
pairs 0                                               (, unk)
1          (nice president doesn need teleprompter, en)
2     (november kamala harris held accountable crime...
3     (donald trump reveals trump silver coins lifet...
4      (knew lose doesn matter bigger calling damn, en)
                            ...                        
62                                   (melania join, fi)
63                                   (melania join, fi)
64                                   (melania join, fi)
65                                   (melania join, fi)
66    (invest americafuture trump silver coin melani...
Name: text, Length: 67, dtype: object

pairs 0     (tucker russell brand sure describing exactly ...
1     (cruz confronted zuckerberg instagram wall str...
2     (setting climate john kerry speaking wefannual...
3     (geologist professor plimer utterly demolishes...
4     (voice skull nonlethal weapo

 35%|███▌      | 375/1071 [00:18<01:07, 10.30it/s]

--- valid pairs: 96, invalid pairs: 0
pairs 0     (poll reveals leaning conservative previous ge...
1                                (fight fight join, en)
2     (kash patel lays epic monologue morning join, en)
3     (trump silver coins unmissable opportunity hug...
4                                            (join, fi)
                            ...                        
91    (smart turn weeks donald trump guaranteed safe...
92    (trump room president trump illegal migrants c...
93    (president trump north carolina georgia commun...
94    (donald trump press briefing hurricane respons...
95    (urgent trump silver coins worth rising news p...
Name: text, Length: 96, dtype: object
--- valid pairs: 119, invalid pairs: 0
pairs 0      (director christopher wray told congress yeste...
1      (crooked bidenperformance night dismal network...
2                                    (join official, en)
3      (meet francisco radical kamala harris join off...
4      (know seconds america 

 35%|███▌      | 377/1071 [00:18<01:20,  8.66it/s]

--- valid pairs: 21, invalid pairs: 0
pairs 0     (step away goal today afraid changing today pu...
1     (following videos faint hearted compilation bi...
2     (george soros security time false flags time h...
3     (breaking huge washington names deleted jeffre...
4     (urgent alert biden exposed explicit adult vid...
5     (world know week going change cdename truth ex...
6     (forget donald trump posted video january tell...
7     (urgent tucker released video epstain cell dea...
8     (mission completed thought seen world shaken s...
9     (urgent urgent patriot share list channels ext...
10    (waiting long time come know know prepare back...
11    (footage security cameras building hillary cli...
12    (opportunity bright future trumpteam exclusive...
13    (action create financial stability today takin...
14    (important message found original channel kash...
15    (final list real channels telegram soon import...
16    (years mainstream media said crazy conspiracy ...
17  

 35%|███▌      | 379/1071 [00:18<01:30,  7.68it/s]

0     (attention public calling patriots freedom fig...
1     (news guess real conspiracy video posted priva...
2     (message emphasizes importance reading list es...
3     (reminder following channel qawakening exposin...
4     (breaking news happened shock world kill ronal...
5     (dems unsuccessful removing trump ballot weapo...
6     (people need know coming entering final phase ...
7     (force unleashed important channel post extrem...
8     (slow burn bore internal coup happening ranks ...
9     (alert tucker aired tape president whistleblow...
10    (leaked bombs seen seen world shaken hour leak...
11    (people need know coming entering final phase ...
12    (hollywood chaos gibson unleashes explosive tr...
13    (shocking video killed president john kennedy ...
14    (biden knew coming point believed come lifetim...
15    (message emphasizes importance reading list es...
16    (today urgent brink making history adversaries...
17    (urgent livestream announcement incoming u

 36%|███▌      | 381/1071 [00:19<01:35,  7.22it/s]

--- valid pairs: 40, invalid pairs: 0
pairs--- valid pairs: 188, invalid pairs: 0 
pairs 0                               (fallen angels join, en)
1      (dualism addendum follow popular video dualism...
2      (satanic origins sexual revolution exposes dis...
3      (word faith movement exposed depth video uncov...
4      (alina habba lawfare real thing seeing stop vo...
                             ...                        
187    (houthi strikes costing companies francetotale...
188    (taylor swift admits george soros owners music...
189    (force islamic sharia america flag replace ame...
190    (kiev responds reports general sacked zelensky...
Name: text, Length: 188, dtype: object
0     (shocking news releqsed documents alina releas...
1     (presenting list real channels telegram prepar...
2     (alert tucker realeasd tapes tein database bid...
3     (opportunity brighter future president donald ...
4     (crash aeroplane retrieved ocean picture know ...
5     (jeffrey epstein

 36%|███▌      | 382/1071 [00:19<01:46,  6.45it/s]

--- valid pairs: 1, invalid pairs: 0
pairs 0    (, unk)
Name: text, dtype: object
--- valid pairs: 1, invalid pairs: 0
pairs 0    (information visit send message, en)
Name: text, dtype: object
--- valid pairs: 194, invalid pairs: 0
pairs 0      (breaking illegal aliens dropped skis coast oc...
1      (breaking venezuelan president nicolas maduro ...
2      (breaking swiss parents child taken authoritie...
3      (thank father supporting unconditionally donal...
4      (police secret service plenty time notice null...
                             ...                        
189    (major alert white house taken offline ready c...
190    (donald trump unveils ultimate health secret c...
191    (trump threat guys kamala wonder enemy united ...
192    (breaking kamala harris campaign account caugh...
193    (rule simple survive destroy right eyes choose...
Name: text, Length: 194, dtype: object


 36%|███▌      | 386/1071 [00:19<01:08, 10.02it/s]

--- valid pairs: 123, invalid pairs: 0
pairs 0      (breaking news alert unveiling tangled covert ...
1        (today ocean city maryland follow telegram, en)
2      (wins drop believe president trump info breaki...
3      (incriminating evidence trump released novembe...
4      (expect allot children rescued human child tra...
                             ...                        
118    (know ultimately wasn government deal making h...
119    (video trump shuttle pilot revealing trump sav...
120    (attention juicy details revealed donald trump...
121    (trump jumped huge betting favorite election d...
123    (watch okeefemediagroup exposes blatantly raci...
Name: text, Length: 123, dtype: object
--- valid pairs: 82, invalid pairs: 0
pairs--- valid pairs: 295, invalid pairs: 0 
0     (ladies gentlemen breaking news donald trump a...
1     (calling patriots happening live broadcast rem...
2         (need patriots witnesses prepared coming, en)
3                (react love trump fol

 36%|███▋      | 389/1071 [00:19<00:54, 12.59it/s]

--- valid pairs: 155, invalid pairs: 0
pairs 0      (emergency broadcast system activated believe ...
1      (stand united trump trumpshare join trump expl...
2      (told truth covid told truth masks told truth ...
3      (breaking rfkrunning mate nicole shanahan post...
4      (banned video death easy justice served stop c...
                             ...                        
156    (trump supporters good channel consider checki...
157    (breaking news ladies gentlemen donald trump a...
158    (massive trump petition goes viral hours patri...
159    (trump officially announced added banks adapti...
160    (breaking news reported trump exonerated janua...
Name: text, Length: 155, dtype: object
--- valid pairs: 350, invalid pairs: 0pairs
 --- valid pairs: 45, invalid pairs: 00                                                (, unk)
2      (Nian Dong Qing Ling Jian Tiao Xian Fang Liao ...
3                                                (, unk)
4      (Zhong Gong Nian Luan Xiang C

 37%|███▋      | 391/1071 [00:19<01:12,  9.33it/s]

--- valid pairs: 162, invalid pairs: 0
--- valid pairs: 6, invalid pairs: 0pairs
 pairs 0      (video rightful president trumpheart gold scru...
1            (fight began join share julian assange, en)
2      (breaking news putin gives tucker carlson secr...
3      (breaking classified documents asaange publish...
                             ...                        
159    (government control weather happy congressiona...
160    (people need know coming entering final phase ...
161    (best president american history trump soon sh...
162    (human clones highlights gene decode nicholas ...
163    (journalist gets close secret state dept spoke...
Name: text, Length: 162, dtype: object2       (hillary tape, en)
3                  (, unk)
6                  (, unk)
7        (media dying, en)
8      (border crisis, en)
9    (political chats, en)
Name: text, dtype: object



 37%|███▋      | 394/1071 [00:20<00:57, 11.83it/s]

--- valid pairs: 1, invalid pairs: 0
pairs 0    (welcome communities haven advertised congratu...
Name: text, dtype: object
--- valid pairs: 661, invalid pairs: 0
pairs 1      (forever remember witnessed inside courthouse ...
2      (winred overwhelming support america stay stro...
3                                             (Liao, id)
5                                                (, unk)
6      (Zhou Chuang Xing Chuan Cheng Zhong Qian Zong ...
                             ...                        
888                           (current state things, en)
889    (atlanta news reports owner black barbershop b...
890                           (Hold IComing scavino, en)
891    (Ying Shou Xiang sunakHuo Cheng Cheng Shang Xi...
894    (Bian Tian Shang Liang Zhou Qian Meng Xuan Pan...
Name: text, Length: 661, dtype: object


 37%|███▋      | 396/1071 [00:20<00:57, 11.72it/s]

--- valid pairs: 3, invalid pairs: 0
pairs 0    (tommy tuberville making wonder told honest ha...
1    (dedicated correcting post users sharing stati...
2               (brace coming victor davis hanson, en)
Name: text, dtype: object
--- valid pairs: 1, invalid pairs: 0
pairs --- valid pairs: 1695, invalid pairs: 0
0    (, unk)
Name: text, dtype: objectpairs
 0       (president trump comrade kamala radical left m...
1       (president trump wants defund police years sai...
2       (president trump attack israel happened presid...
3       (president trump million people country years ...
4       (happened president trump talks time spent gol...
                              ...                        
1819    (trump senior adviser absolutely disgusted kam...
1820    (michigan voter touted wages going wages going...
1821    (ruhle watch didn clear direct answer okay tal...
1822    (reacts zelensky criticizing radical yorker ap...
1823    (trump senior adviser reacts kamalaspeech path...


 37%|███▋      | 399/1071 [00:20<00:46, 14.43it/s]

--- valid pairs: 89, invalid pairs: 0
pairs 0     (truth went minutes prior showtime government ...
1                   (similar events happened prior, en)
2     (virginia governor glenn youngkin signed bill ...
3                      (state union biden powerful, en)
4                                     (biden great, ca)
                            ...                        
84    (intelligence community narrative going bloodb...
86                                               (, en)
87    (group presidential election english hindi spe...
88                                   (lion jackals, en)
89                    (global security risk looses, en)
Name: text, Length: 89, dtype: object
--- valid pairs: 1705, invalid pairs: 0
pairs --- valid pairs: 655, invalid pairs: 0
0       (venezueladeadliest gang green light fire amer...
1       (unlike kamala harris president trump radical ...
2       (yesterday reported members savage venezuelan ...
3       (addition atrocious record border s

 37%|███▋      | 401/1071 [00:20<00:51, 13.04it/s]

 0                                                (, unk)
1      (year popularly elected prosecutors campaigned...
3      (Chuan Zong Tong djtDe Cong Zhang Shang Zhou C...
5      (nightmare look alan kelly briefly addresses c...
6                                                (, unk)
                             ...                        
913                                              (, unk)
914    (Chuan Ming Ping xJiang Chuan Zheng Tian Bang ...
917                                              (, unk)
918    (trump raised sham charge record setting milli...
921    (gateway pundit telegram manchin officially le...
Name: text, Length: 655, dtype: object
--- valid pairs: 58, invalid pairs: 0
pairs 0     (follow channel curiosity baffled clarity achi...
1     (latest post states giving people money alcoho...
2                   (surrounding deep state people, en)
3                         (opinion indian election, en)
4                                      (goto thank, en)
5        

 38%|███▊      | 404/1071 [00:20<00:42, 15.87it/s]

--- valid pairs: 896, invalid pairs: 0
--- valid pairs: 95, invalid pairs: 0pairs 
pairs0      (biden bidenxtxn position market chart trade t...
1                                                 (, de)
2                                                (, unk)
3      (biden spent weth bidenbuyer position uniswap ...
4      (biden bidenxfcfad holder market chart trade t...
                             ...                        
920                                              (, unk)
921               (trump trump messiah donald trump, en)
922    (service wallet registering bbns tech customiz...
923                                              (, unk)
924                                              (, unk)
Name: text, Length: 896, dtype: object
 0                                                (, unk)
2                                    (devils kamala, en)
6                                                 (, en)
7                                     (find youtube, en)
10               

 38%|███▊      | 406/1071 [00:20<00:43, 15.29it/s]

--- valid pairs: 100, invalid pairs: 0
pairs 0                                                (, unk)
2      (mean posting biased media sources election lm...
3                         (prediction markets agree, en)
5                        (post wide variety sources, en)
6                                                (, unk)
                             ...                        
98                                            (lost, en)
99                                            (true, fr)
100    (info racist bigot homophobe reeeeeeeee useful...
101    (vote biden trump threat illegal immigrant sta...
102                            (trump wins michigan, en)
Name: text, Length: 100, dtype: object
--- valid pairs: 651, invalid pairs: 0
pairs 0                                                (, unk)
1                                                (, unk)
2                                                (, unk)
3                                                (, unk)
4               

 38%|███▊      | 408/1071 [00:21<01:17,  8.51it/s]

--- valid pairs: 28, invalid pairs: 0
pairs 2     (korpus strazhei islamskoi revoliutsii ukrains...
4                                               (, unk)
9                                               (, unk)
10    (spooky season begun halloweentide black sentr...
11    (komanda roven sdelali nomer raketnye ataki ir...
12                                     (rasforsite, no)
14    (podumal ukraintsy izobreli apostrof budu iuza...
15         (slava bogu chto ochko barana borodatoe, hr)
16                                              (, unk)
19    (ocotillo flowers cactus ocotillo somewhat com...
21                                          (dobro, pt)
22    (sozhaleniiu pervyi chelovek mire ostavil komm...
23    (uvidet nifiodov librarium obsuzhdaiut blizhni...
25                                              (, unk)
26                                              (, unk)
27             (parni chto diddi ebal ochko bibera, sl)
28                                              (, unk)
29  

 38%|███▊      | 410/1071 [00:21<01:22,  8.04it/s]

--- valid pairs: 3, invalid pairs: 0pairs
 0                                              (, unk)
3                                              (, unk)
4    (interpretiroval popravku imenno chasti person...
Name: text, dtype: object
--- valid pairs: 13, invalid pairs: 0--- valid pairs: 1292, invalid pairs: 0
pairs 0                                                 (, unk)
1                                                 (, unk)
2       (comedian isaac butterfield massive taken aust...
3       (police deserve minor injuries protecting gove...
4                                            (policy, pl)
                              ...                        
1525                       (politicians judges cloth, en)
1531    (outside smoking news minimum pricing announce...
1532    (outvote millions dependent government sustena...
1533                     (skydiver jumping parachute, en)
1534                                              (, unk)
Name: text, Length: 1292, dtype: object

pair

 38%|███▊      | 412/1071 [00:22<02:03,  5.32it/s]

0     (dnia vyborov verkhovnyi zaslushaet delo fevra...
1     (dnia vyborov segodnia proidut pervye respubli...
3     (aiove segodnia rossiiskaia pogoda chto budet ...
4                                               (, unk)
5     (triumfal nachalo prezidentskoi kampanii ekhal...
6                                               (, unk)
7                                               (, unk)
8     (pokhozhe tramp vyigraet shtaty etikh praimeri...
9                                               (, unk)
10    (khot vystupiv chut luchshe oprosov nikki khei...
12    (takoi rezul gempshira obuslovlen faktom chto ...
13                                              (, unk)
14    (nekie vooruzhennye formirovaniia udarili voen...
Name: text, dtype: object
--- valid pairs: 450, invalid pairs: 0
pairs 0      (Juan Mian Chou Jiang Zhou Xiao Yuan Juan Diao...
1                                                (, unk)
2                                                (, unk)
3                             

 39%|███▊      | 414/1071 [00:22<01:47,  6.10it/s]

--- valid pairs: 11, invalid pairs: 0
pairs 0     (chlen palaty predstavitelei respublikanets dz...
1                                     (foto geniia, ro)
3                                               (, unk)
5                                               (, unk)
6     (nekotorye aziatskie strany rezul tate etogo p...
7     (vchera byli veselye debaty moderatsii vzaimny...
8                                               (, unk)
9                                               (, unk)
11                          (demokratiia opasnosti, sl)
12    (dnei vyborov vchera verkhovnyi shtata kolorad...
15    (dnei vyborov respublikanskaia partiia shtata ...
Name: text, dtype: object


 39%|███▊      | 415/1071 [00:22<02:02,  5.35it/s]

--- valid pairs: 2, invalid pairs: 0
pairs 0    (want purchase nickname contact contract teleg...
1                                              (, unk)
Name: text, dtype: object
--- valid pairs: 0, invalid pairs: 0
pairs Series([], Name: text, dtype: object)
--- valid pairs: 2, invalid pairs: 0
pairs 0         (, sw)
1    (admin, so)
Name: text, dtype: object


 39%|███▉      | 418/1071 [00:22<01:23,  7.85it/s]

--- valid pairs: 1, invalid pairs: 0
pairs 0    (sell nickname, ca)
Name: text, dtype: object
--- valid pairs: 10, invalid pairs: 0
pairs 0     (poka otdykhe ssha osobo pishu khochetsia napi...
1     (dnei vyborov rovno prezidentskikh vyborov ssh...
2     (vremenem york times vykatil opros vyborov she...
3     (esli sledovat etomu oprosu kollegiia vyborshc...
4                                 (prishel podarok, sl)
5     (rezul taty bobbi kennedi poka rano chem govor...
6     (dnia vyborov podvedeny itogi vcherashnikh vyb...
7                                               (, unk)
8                                               (, unk)
10                                              (, unk)
Name: text, dtype: object
--- valid pairs: 349, invalid pairs: 0
pairs 

 39%|███▉      | 420/1071 [00:23<01:09,  9.39it/s]

0                                                 (, en)
1                                    (election sure, en)
2                                       (thank mate, en)
3      (voting live wanna date politics running bigge...
4                                            (trump, en)
                             ...                        
370                                   (donald trump, ro)
372                   (reality looks facts emotions, en)
373       (american people aren racial demographics, en)
374    (south speak gone nonwhite immigration georgia...
375    (trying black pill remedies saying need race c...
Name: text, Length: 349, dtype: object
--- valid pairs: 2, invalid pairs: 0
pairs 0    (witch hunt letitia james york city state comp...
3    (great news badly needed north carolina virtua...
Name: text, dtype: object--- valid pairs: 16, invalid pairs: 0

pairs 0         (zhdem interv vladimirom vladimirovichem, sl)
1                                               (, unk)

 39%|███▉      | 423/1071 [00:23<00:52, 12.27it/s]

--- valid pairs: 686, invalid pairs: 0
pairs 1             (breaking says voting president trump, en)
2                                                (, unk)
3      (washington state sixth jurisdiction reject am...
5      (tucker carlson active trump supporter raided ...
6                                                 (, fr)
                             ...                        
970                                              (, unk)
971                                              (, unk)
972                                              (, unk)
974    (Yong Dian Huan Diao Dian telegram Ying Yong Z...
975    (Xing jeffrey epstein Guan Feng Ting Jian Jian...
Name: text, Length: 686, dtype: object
--- valid pairs: 759, invalid pairs: 0
pairs 0                                   (real president, en)
1                                                (, unk)
2      (trump biden laken riley remember going forget...
3                                                 (, fr)
4               

 40%|███▉      | 425/1071 [00:23<01:25,  7.55it/s]

--- valid pairs: 65, invalid pairs: 0
--- valid pairs: 1247, invalid pairs: 0--- valid pairs: 79, invalid pairs: 0
pairs pairs--- valid pairs: 1422, invalid pairs: 0 
0     (knows respected order president history stree...
2     (comrade kamala harris lied working mcdonalds ...
6     (hostage crisis israel taking place comrade ka...
7                                               (, unk)
8     (grieve senseless death israeli hostages horri...
                            ...                        
74    (comrade kamala harris country border allowing...
76      (wrong kamala know missing everybody knows, en)
77    (republican conservative msdnc constantly putt...
78    (kamala sleepy going difficult campaign bigges...
79    (fake staged photo clue plug cord phone work b...
Name: text, Length: 65, dtype: objectpairs
0                       (bullish marketing investors, en)
1       (dream come true double savings night happen, en)
2       (experience intersection politics crypto biden...


 40%|███▉      | 427/1071 [00:23<01:19,  8.06it/s]

0       (kamala harris whale alert value bought xbatxn...
1       (migration update harris migration improved sm...
2       (kamala harris whale alert value bought xbatxn...
3       (kamala harris whale alert value bought xbatxn...
4       (migrate following pinned message instructions...
                              ...                        
1730    (reward claiming live read pinned message clai...
1731    (kamala harris whale alert value bought ftxn p...
1732    (migration update harris migration improved sm...
1733    (kamala harris whale alert value bought ftxn p...
1734    (kamala harris whale alert value bought ftxn p...
Name: text, Length: 1422, dtype: object 0     (acting director homan worked presidents seen ...
1                                (eagle pass texas, en)
2     (GREAT SILENT MAJORITY RISING LIKE NEVER BEFOR...
3     (middle election super tuesday coming shortly ...
4     (supreme court address historic question presi...
                            ...           

 40%|████      | 431/1071 [00:24<01:01, 10.33it/s]

--- valid pairs: 115, invalid pairs: 0
pairs 0      (judge taken away constitutional right free sp...
1                                                (, unk)
2      (israel find sadly fewer hostages currently th...
3      (think radical left lunatics causing chaos col...
4      (great wisconsin michigan tremendous crowds en...
                             ...                        
145                  (victory november save america, en)
146    (press conference tomorrow morning trump tower...
147    (bookkeeper called legal expense tiny descript...
148    (real verdict november people know happened do...
149    (senator vance hearing constituents ohio sayin...
Name: text, Length: 115, dtype: object
--- valid pairs: 2, invalid pairs: 0
pairs 0                                     (going live, en)
1    (breaking news videos receipts uncovered white...
Name: text, dtype: object


 40%|████      | 433/1071 [00:24<00:55, 11.50it/s]

--- valid pairs: 109, invalid pairs: 0
pairs 1                                                (, unk)
2                                                (, unk)
5                         (blin gimn takoi klassnyi, fi)
6                                                (, unk)
8                                                (, unk)
                             ...                        
259                                               (, tl)
260                                              (, unk)
262                               (vstrechaem novyi, cs)
265    (spisok novogodnikh prikolov dlia togo chto ra...
266                                              (, unk)
Name: text, Length: 109, dtype: object
--- valid pairs: 53, invalid pairs: 0
pairs --- valid pairs: 19, invalid pairs: 0
pairs 0     (nikki haley ballot indiana didn petition sign...
2     (fani willis fulton county admitted having sex...
3     (years people saying elvis look alike going pl...
4     (immunity granted president

 41%|████      | 435/1071 [00:24<00:58, 10.82it/s]

0     (espn recently fired woman publicly said belon...
1     (movies mentioned mainstream media city dreams...
2        (book alert save america copy today books, en)
3     (episode triggered starts rumble going questio...
4                               (episode triggered, da)
5                                               (, unk)
6     (triggered tonight heck happening springfield ...
7                                 (going live tune, en)
8                                    (live tonight, en)
9                                               (, unk)
10    (year jocelyn nungaray murdered illegal aliens...
12                                     (going live, en)
14                                     (going live, en)
15                                              (, unk)
16                                    (live rumble, en)
17    (tonight triggered ground pulse people going c...
18    (family fought american freedom includes takin...
19                                     (going li

 41%|████      | 438/1071 [00:24<00:46, 13.71it/s]

0                           (biden face corruption, en)
1             (stand trump winningpublishing today, en)
2              (love trump winningpublishing today, en)
3     (corrupt left wing cabal wants scare submissio...
4     (victory ukraine look like perpetual death ukr...
5                                               (, unk)
6                                 (going live tune, en)
7     (tonight russell brand live blast continue con...
8     (excited join buddy tucker carlson tour fall, en)
9                                      (going live, en)
10                                              (, unk)
11                                              (, unk)
12                       (real hunter story hiding, en)
13                                   (live tonight, en)
14                                              (, unk)
15                                        (tonight, en)
16    (charlie kirkgreat book right wing revolution ...
17                                (going live tu

 41%|████      | 440/1071 [00:24<00:47, 13.37it/s]

--- valid pairs: 22, invalid pairs: 0
pairs --- valid pairs: 34, invalid pairs: 0--- valid pairs: 26, invalid pairs: 0

0     (ready extra special years episode triggered b...
1                                 (going live tune, en)
2     (record year school choice expansion letcontin...
3                  (going live rumble eastern tune, en)
4                                               (, unk)
5                                     (live rumble, fr)
6                                               (, unk)
7     (biden wants forget borders gateways drug cart...
9     (triggered tonight matt whitaker breaks basele...
10    (iowa maga patriots leave chance need turn cau...
11    (maybe weird want people severe intellectual p...
12    (live coverage iowa caucuses beginning rumble ...
13        (vivek triggered tonight going live tune, en)
14    (hampshire maga patriots come join monday afte...
15    (happy endorse father unite entire defeat rino...
16    (parent prioritize kidsafety educa

 42%|████▏     | 445/1071 [00:24<00:32, 19.55it/s]

--- valid pairs: 796, invalid pairs: 0--- valid pairs: 21, invalid pairs: 0

pairs 0     (whistleblower live tonight tune check lies de...
1                                               (, unk)
2                                 (going live tune, en)
3     (prison meme want silenced locked douglass mac...
4     (douglas mackey fighting twitter wars remember...
5                                 (going live tune, en)
6                                      (click link, en)
7                                               (, unk)
8     (face rabid antisemitism biased anti israel me...
9     (live tune time major episode triggered revolv...
10    (live tonight definitely going want miss going...
11       (guys episode tonight tucker carlson miss, en)
12                                              (, unk)
13    (know post office giving personal data labor u...
14             (chance free deal winningpublishing, en)
15      (going live check latest episode triggered, en)
16                   

 42%|████▏     | 449/1071 [00:25<00:28, 21.55it/s]

--- valid pairs: 29, invalid pairs: 0
pairs --- valid pairs: 125, invalid pairs: 0
0     (extra special episode triggered dana white bu...
1     (extra special episode triggered town hall liv...
2     (cart checkout massive game changer psqh patri...
3     (fight school choice happening doesn affect te...
4     (special coverage tonight fatherflorida rally ...
5     (guys excited tonightepisode triggered exclusi...
6     (questions ready going live minutes tune trump...
7                                     (squad goals, ca)
8     (ready awesome episode triggered national puls...
9     (going live triggered great raheem kassam join...
10    (tens millions dollars chinese government fund...
11    (guys ready awesome episode triggered notoriou...
12                                              (, unk)
13    (lifts curtain swamp plus hamas rioters storm ...
14    (rumble right major interview awesome book lif...
15                                              (, unk)
16                   

 42%|████▏     | 452/1071 [00:25<00:31, 19.67it/s]

--- valid pairs: 1714, invalid pairs: 0
pairs 0       (looks like white americanseriously starting d...
1       (russia orthodox russian teen nikita zhuravel ...
2       (muslims majority russia moscow filled chechen...
3       (know chechyan commanderface online media past...
4                                   (merry christmas, en)
                              ...                        
1867                                  (facebook link, af)
1869                                              (, unk)
1870    (year massive victory dealt fatal blows enemy ...
1871                                     (happy year, en)
1872                                     (cool guess, pt)
Name: text, Length: 1714, dtype: object
--- valid pairs: 112, invalid pairs: 0
pairs 0                                                (, unk)
1                          (harrisburg donaldjtrump, pt)
2                                                (, unk)
6      (word refuse sleepy biden democratic national ...
7  

 42%|████▏     | 455/1071 [00:25<00:50, 12.32it/s]

--- valid pairs: 696, invalid pairs: 0
pairs 3       (Zheng Qian Ming Zhou Peng Chao Chuan Zong Ton...
5       (Chuan Jian Ting Ting Neng Sheng Neng Rang She...
6       (Zhuan Xiong Chuan iwHe nhDe Sheng Liao magaZh...
11                                                (, unk)
12                                                (, unk)
                              ...                        
1004                                              (, unk)
1005    (Xiao Shang Liao terrence bradley Duan Zhou fa...
1006    (drop bombshell fani willis nathan wade trump ...
1007                                     (Ding Chuan, id)
1008    (Kang Biao Jiang Nian Ling Huan Cheng Jiang Ni...
Name: text, Length: 696, dtype: object
--- valid pairs: 1695, invalid pairs: 0
pairs 0       (nazi accounts going mental spazzing having po...
1                                                 (\, es)
2                         (gabs users retard boomers, en)
3                      (covid russia putin demonrats, en)


 43%|████▎     | 457/1071 [00:26<00:58, 10.43it/s]

--- valid pairs: 1399, invalid pairs: 0
pairs 0                                                  (, ro)
1                                          (revshare, sq)
5                        (coming buying rolling chad, en)
7                                          (incoming, en)
8                                          (revshare, sq)
                              ...                        
1734    (harris whale alert value bought xdtxn positio...
1735    (represent mundi ventures looking partnership ...
1736    (harris whale alert value bought xdtxn positio...
1737    (harris kamala xfef holder market chart trade ...
1738    (harris whale alert value bought xfef holder c...
Name: text, Length: 1399, dtype: object
--- valid pairs: 38, invalid pairs: 0
pairs 0              (join trump office telegram channel, en)
1     (peace peace peace president trump recalls rec...
2     (crooked hillary suffers case trump derangemen...
3              (join trump office telegram channel, en)
4       

 43%|████▎     | 459/1071 [00:26<01:00, 10.18it/s]

--- valid pairs: 198, invalid pairs: 0
pairs 5                                                (, unk)
7                                                (, unk)
9      (reka tisa iavila okonchanii formirovaniia per...
10                     (nastol plokho chto khorosho, sk)
11                                               (, unk)
                             ...                        
549                       (popadaem prosto sdokhnut, sk)
552                                              (, unk)
553                                              (, unk)
558    (tranny seethe white people having romantic am...
559    (forget columbia mother america america people...
Name: text, Length: 198, dtype: object
--- valid pairs: 1254, invalid pairs: 0
pairs 0                                           (morning, en)
1                                            (kahani, sw)
2       (americans maryland conducted rally support pr...
4       (message dear customer xxxxxxxxcredited lacs i...
5          

 43%|████▎     | 461/1071 [00:26<01:37,  6.27it/s]

--- valid pairs: 183, invalid pairs: 0
pairs --- valid pairs: 186, invalid pairs: 0
2      (robert rait micro speedy charm louis vuitton,...
9      (kazhdym dniom kachestvo postov vsio khuzhe ch...
10     (prikol iavlenie kartinka menia smeshila poka ...
14                        (tebia nikogda obmeniaiut, et)
17                                               (, unk)
                             ...                        
493                                              (, unk)
495    (peredaiu motuznoi estafetu bukake chellendzhe...
498                                              (, unk)
508                           (nochnoi tortoposting, it)
517                                           (post, en)
Name: text, Length: 183, dtype: objectpairs 
2                                                (, unk)
3                                                (, unk)
4                                  (alga kazakhstan, sw)
5      (chioto vspomnil chto svinogitler okna vybrosi...
6               

 43%|████▎     | 462/1071 [00:27<01:52,  5.40it/s]


--- valid pairs: 1010, invalid pairs: 0--- valid pairs: 772, invalid pairs: 0
pairs 0                                                 (, unk)
1        (coming maduro carry gitmo donkey elon musk, en)
2       (breaking venezuela opposition clearly beat ni...
3       (Shou Ying Liao Rang Liao Mian Song Xing Xuan ...
4                                                 (, unk)
                              ...                        
1469                                              (, unk)
1472               (Chuan Ming Shang Sheng Chai Xiao, id)
1473                  (Dang Ming Zhou Ying Hong Liao, tl)
1474                                              (, unk)
1475                                              (, unk)
Name: text, Length: 1010, dtype: object

pairs 
0      (follow alert terryhfollowed trumponsol fastes...
1      (trumpbuy spent trumpbuyer holder market dext ...
2      (trumpbuy trumpsjsb vbtxn holder trumpprice ma...
3      (trumpbuy spent trumpbuyer position market dex...
4

 43%|████▎     | 464/1071 [00:27<02:00,  5.04it/s]

--- valid pairs: 200, invalid pairs: 0
pairs 2                                                (, unk)
4      (vyshel treiler komedii zhdun kino glavnyi ger...
7      (rozygrysh vorovanikh mobilok dlia uchastiia p...
8                                                (, unk)
9                                                (, unk)
                             ...                        
397    (boiat chto novyi smartfn vivo iasnite poka no...
398                                              (, unk)
399                                    (gitler umer, no)
400                               (dobroe utro saby, pt)
402                                  (happens peter, no)
Name: text, Length: 200, dtype: object
--- valid pairs: 649, invalid pairs: 0
pairs 0                                    (admin wallrus, so)
1                                                 (, fi)
2                                             (time, en)
3                                     (holders able, en)
4               

 44%|████▎     | 467/1071 [00:27<01:21,  7.41it/s]

--- valid pairs: 190, invalid pairs: 0
pairs 0      (prior florida texas join trump office telegra...
1               (join trump office telegram channel, en)
2               (join trump office telegram channel, en)
3      (poilievre received documents trudeau covering...
4      (massive rows trump supporters giant trump fla...
                             ...                        
198             (join trump office telegram channel, en)
199    (president trump remember suffering death gods...
200             (join trump office telegram channel, en)
201    (biden white house released statement proclaim...
202             (join trump office telegram channel, en)
Name: text, Length: 190, dtype: object
--- valid pairs: 211, invalid pairs: 0
pairs 0                                                (, unk)
1                                                (, unk)
3      (zelenskii zaiavil zhal chto zhena nedavno pon...
4                                                (, unk)
5               

 44%|████▍     | 469/1071 [00:28<01:29,  6.76it/s]

--- valid pairs: 212, invalid pairs: 0
pairs--- valid pairs: 196, invalid pairs: 0 4                             (izrayil khai, so)
5                                        (, unk)
8      (podpischik danil podelilsia mneniem, sl)
11                                       (, unk)
12                              (zaporozhbe, sk)
                         ...                    
564                                 (adminy, pl)
575                                      (, unk)
580                                      (, unk)
584                                       (, pl)
585        (uest kstati ideologicheski nash, et)
Name: text, Length: 212, dtype: object

pairs 0      (exclusive donald trump tucker carlson react t...
1      (palm beach wake rigged biden trial york team ...
2               (join trump office telegram channel, en)
3      (senior legal analyst describes trump convicti...
4      (thousands londoners chanting support presiden...
                             ...               

 44%|████▍     | 472/1071 [00:28<01:05,  9.21it/s]

--- valid pairs: 234, invalid pairs: 0
pairs 0      (urgent launched telegram bringing explosive p...
1      (mark levin interview president trump night jo...
2      (mark levin interview president trump night jo...
3      (mark levin interview president trump night jo...
4      (kicked candidate supposedly million votes cla...
                             ...                        
249    (trump site valdosta help affected biden kamal...
250    (latino voter educates msnbc voting trump year...
251    (breaking biden announced additional resources...
252             (join trump office telegram channel, en)
253        (hero join trump office telegram channel, en)
Name: text, Length: 234, dtype: object
--- valid pairs: 25, invalid pairs: 0
pairs 0     (elon interesting satellite contained drops ti...
1     (gender transformation untold realities docudr...
2     (tucker carlson fleet shares story cultural re...
3     (humans required restaurant california robotic...
4     (jerry nadler d

 44%|████▍     | 474/1071 [00:28<01:03,  9.37it/s]

--- valid pairs: 218, invalid pairs: 0
pairs 4                                                (, unk)
6                                                (, unk)
7                                         (pidorasy, lt)
8      (psikhicheski zdorovye liudi posle okonchaniia...
9      (uzbeki priekhali tadzhiki iami priekhali zase...
                             ...                        
522                                              (, unk)
523                             (dokhlaia palestnia, sk)
524        (bazuki strelial palestne chto zdokhnuli, sk)
525    (obstanovka tsakhale proekt razgrom podpisat, sl)
526    (natives england started race riot blackpillin...
Name: text, Length: 218, dtype: object
--- valid pairs: 820, invalid pairs: 0
pairs--- valid pairs: 0, invalid pairs: 0 
0      (trumpbuy trumpecyox qidv holder trumpprice ma...
1      (trumpbuy spent trumpbuyer position market dex...
2      (trumpbuy trumpgfpzer vthtxn position trumppri...
3      (trumpbuy spent trumpbuyer po

 44%|████▍     | 476/1071 [00:28<01:02,  9.50it/s]

0                                                 (, no)
3                   (khuiovyi admin postit prostite, ro)
8                                (genii geopolitiki, et)
10            (bliat nado post vtoroe maia pridumat, pt)
11     (menia chestno govoria zaebali prikoly shashly...
                             ...                        
707    (interesnyi fakt obez iany zabrat banan poobes...
711                                              (, unk)
718                                              (, unk)
724                                              (, unk)
726    (rebiat menia plokhie novosti golosuem ternati...
Name: text, Length: 255, dtype: objectSeries([], Name: text, dtype: object)

--- valid pairs: 27, invalid pairs: 0
pairs 0              (channels join telegram share share, en)
1     (trump biden probably manchurian candidate pre...
2            (good falling covid kings queens join, en)
3     (james comer found biden atleast fake names go...
4     (questioning bidenunit

 45%|████▍     | 480/1071 [00:28<00:52, 11.15it/s]

--- valid pairs: 67, invalid pairs: 0
pairs 0                                            (told, en)
1          (nice president doesn need teleprompter, en)
2     (november kamala harris held accountable crime...
3     (donald trump reveals trump silver coins lifet...
4      (knew lose doesn matter bigger calling damn, en)
                            ...                        
62                                   (melania join, fi)
63                                   (melania join, fi)
64                                   (melania join, fi)
65                                   (melania join, fi)
66    (invest americafuture trump silver coin melani...
Name: text, Length: 67, dtype: object
--- valid pairs: 96, invalid pairs: 0--- valid pairs: 196, invalid pairs: 0

pairs 0               (join trump office telegram channel, en)
1               (join trump office telegram channel, en)
2        (people join trump office telegram channel, en)
3      (capitol police announced charge democrat

 45%|████▌     | 482/1071 [00:29<00:54, 10.73it/s]

 0     (poll reveals leaning conservative previous ge...
1                                (fight fight join, en)
2     (kash patel lays epic monologue morning join, en)
3     (trump silver coins unmissable opportunity hug...
4                                            (join, fi)
                            ...                        
91    (smart turn weeks donald trump guaranteed safe...
92    (trump room president trump illegal migrants c...
93    (president trump north carolina georgia commun...
94    (donald trump press briefing hurricane respons...
95    (urgent trump silver coins worth rising news p...
Name: text, Length: 96, dtype: object
--- valid pairs: 27, invalid pairs: 0
pairs 0     (pretty obvious united states doesn strong con...
1           (american people angry realdonaldtrump, en)
2       (pertinent political today realdonaldtrump, en)
3     (knows support federal abortion circumstances ...
4     (democrats women letting play womensports walz...
5     (lyin kamala re

 45%|████▌     | 484/1071 [00:29<00:52, 11.20it/s]

--- valid pairs: 245, invalid pairs: 0
pairs 0               (join trump office telegram channel, en)
1      (news alert koch backed open borders fiend use...
2               (join trump office telegram channel, en)
3               (join trump office telegram channel, en)
4      (president trump steve gruber morning going de...
                             ...                        
257    (world watching join trump office telegram cha...
258      (people join trump office telegram channel, en)
259    (president trumpmotorcade left courthouse join...
260    (trump house judiciary investigates bragg pros...
261             (join trump office telegram channel, en)
Name: text, Length: 245, dtype: object
--- valid pairs: 122, invalid pairs: 0
pairs 0      (director christopher wray told congress yeste...
1      (crooked bidenperformance night dismal network...
2                                    (join official, en)
3      (meet francisco radical kamala harris join off...
4      (know sec

 45%|████▌     | 486/1071 [00:29<00:58, 10.03it/s]

--- valid pairs: 54, invalid pairs: 0
pairs 0                                 (realdonaldtrump, ro)
1      (bidenpolitical prosecution realdonaldtrump, ro)
2                      (year today realdonaldtrump, en)
3                    (voting biden realdonaldtrump, en)
4     (thank boris johnson machine style trump naked...
5     (record raised president trumpcampaign million...
6                           (thank realdonaldtrump, en)
7     (civil rights totally violated highly politica...
8     (large scale mounting evidence unprecedented n...
9     (crooked biden saying medicare cover americans...
10    (purpose hurting political opponent biden tryi...
11    (president like destroying country border allo...
12                                              (, unk)
13    (today honor immortal heroes landed normandy y...
14    (flashback march crooked biden called american...
15          (thank california maga realdonaldtrump, en)
16    (insulin pricing gotten millions americans tru...
17  

 46%|████▌     | 488/1071 [00:29<01:06,  8.73it/s]

 0               (join trump office telegram channel, en)
1      (police initiate emergency response reports bo...
2               (join trump office telegram channel, en)
3        (action join trump office telegram channel, en)
4      (harrington american people know president tru...
                             ...                        
282             (join trump office telegram channel, en)
283             (join trump office telegram channel, en)
284    (happening join trump office telegram channel,...
285    (president trump right close border need bills...
286    (president trump leaving join trump office tel...
Name: text, Length: 274, dtype: object--- valid pairs: 256, invalid pairs: 0

pairs 1           (zakhodil pravye prikoly proshlogo goda, sl)
4                                                (, unk)
9                                             (anal, it)
12                                               (, unk)
13                                 (dlia tekh znaet, sk)
   

 46%|████▌     | 491/1071 [00:30<00:49, 11.60it/s]

--- valid pairs: 85, invalid pairs: 00     (happy easter including crooked corrupt prosec...
1     (congressman carlos gimenez florida great morn...
2                                               (, unk)
3     (going country worst corrupt president history...
4     (want illegal alien criminals crawling windows...
5     (rebecca lavrez known jpraying grandma unfairl...
6     (crooked judge juan merchan allowing talk taki...
7                                 (realdonaldtrump, da)
8                   (america great realdonaldtrump, ro)
9                                               (, unk)
10                                (realdonaldtrump, ro)
11    (look found fake news report realdonaldtrump, en)
12    (biden totally lost control israel situation a...
13      (better years donaldjtrump realdonaldtrump, en)
14    (jerrod sessler fantastic candidate great cong...
15    (thank america great donaldjtrump realdonaldtr...
16    (judge juan merchan highly conflicted judge yo...
17    (want

 46%|████▌     | 495/1071 [00:30<00:38, 14.91it/s]

--- valid pairs: 80, invalid pairs: 0
1                                                 (, unk)
3                                                  (, fr)
4                                                  (, fr)
7                                                  (, ro)
8                                           (scavino, it)
                              ...                        
1393    (Yuan cruzHuo Yuan Zhou Xiao meta ceoZha Tian ...
1395                                              (, unk)
1396    (Qing Niao fani willisYou Tian Chou Duan amand...
1398                                              (, unk)
1401                                              (, unk)
Name: text, Length: 1017, dtype: objectpairs 
0     (comrade kamala harris lied working mcdonalds ...
1                                 (realdonaldtrump, da)
2     (christy shamblin mother sergeant nicole reald...
3     (mccollum father lance corporal rylee mccollum...
4     (want thank families great warriors lost came ..

 46%|████▋     | 498/1071 [00:30<00:37, 15.19it/s]

--- valid pairs: 2, invalid pairs: 0
pairs 0    (vullnet shyti welcome donald trump supporters...
1    (ottor welcome donald trump supporters group n...
Name: text, dtype: object
--- valid pairs: 3, invalid pairs: 0
pairs--- valid pairs: 67, invalid pairs: 0 
pairs 0    (julie madeline welcome donald trump supporter...
1    (godwin apedo welcome donald trump supporters ...
2    (valentine nise welcome donald trump supporter...
Name: text, dtype: object


 47%|████▋     | 501/1071 [00:30<00:33, 17.22it/s]

0     (world blowing middle east fire biden finally ...
1                                 (realdonaldtrump, ro)
2                                               (, unk)
3     (nikki haley ballot indiana didn petition sign...
4                                               (, unk)
                            ...                        
66    (strict rules regulations department injustice...
67                                              (, unk)
68    (fact fani willis lover wade unquestionably st...
69    (legal scholars extremely thankful supreme cou...
70    (wrong build successful liquid company owning ...
Name: text, Length: 67, dtype: object--- valid pairs: 286, invalid pairs: 0

pairs --- valid pairs: 47, invalid pairs: 00      (followed money identified biden received laun...
1      (president trump trying illegally remove ballo...
2               (join trump office telegram channel, en)
3      (biden pulls agents child trafficking focus ma...
4               (join trump office t

 47%|████▋     | 505/1071 [00:30<00:26, 21.38it/s]

pairs0     (acting director homan worked presidents seen ...
1                                (eagle pass texas, en)
2                          (radical left democrats, en)
3     (president trump slams fani willis nathan wade...
4     (trump said north carolina saying virginia pap...
5     (middle election super tuesday coming shortly ...
6     (supreme court address historic question presi...
7                            (america donaldjtrump, en)
8     (nikki haley trounced night record setting fas...
9     (sure coincidence bush senior director factual...
10    (tiktok facebook zuckerschmuck double business...
11    (trump liberty coins sure miss post right mome...
12    (sure coincidence bush senior director factual...
13    (bidenspeech night getting panned world radica...
14                      (great awakening pill wins, en)
15    (tucker carlson responds bidenstate union addr...
16             (join trump office telegram channel, en)
17             (join trump office telegram 

 48%|████▊     | 513/1071 [00:30<00:16, 34.11it/s]

 0    (trump nation dreams, en)
Name: text, dtype: object
0      (breaking illegal aliens dropped skis coast oc...
1      (breaking venezuelan president nicolas maduro ...
2      (breaking swiss parents child taken authoritie...
3      (thank father supporting unconditionally donal...
4      (police secret service plenty time notice null...
                             ...                        
192    (donald trump unveils ultimate health secret c...
193    (breaking kamala harris campaign account caugh...
194    (share share high crimes kamala harris article...
195    (trump threat guys kamala wonder enemy united ...
196    (rule simple survive destroy right eyes choose...
Name: text, Length: 197, dtype: objectpairs
 0    (julian welcome donald trump supporters group ...
1    (john kennedy welcome donald trump supporters ...
2    (donald trump welcome donald trump supporters ...
3    (khloe philips welcome donald trump supporters...
Name: text, dtype: object
--- valid pairs: 2, inva

 48%|████▊     | 518/1071 [00:30<00:15, 35.96it/s]

--- valid pairs: 404, invalid pairs: 0--- valid pairs: 17, invalid pairs: 0
pairs 
0     (garrett ventry unlike biden president trump a...
1     (failing york times prosecutors appeals court ...
2     (peru alien attack expedition report face peel...
3                            (donald trump leading, en)
4                                      (eric trump, ro)
5                                               (, unk)
6                                                (, pt)
7     (failed secretary defense lloyd austin fired i...
8     (experts legal analysts stated litigation judg...
9                   (difference crowds trump biden, en)
10                                              (, unk)
11                                              (, unk)
12                                              (, unk)
13                                              (, unk)
14    (jordan think clear president trump right grea...
15    (tell desperate backed corner know absolutely ...
16    (donald trump d

 49%|████▉     | 523/1071 [00:31<00:19, 28.10it/s]

pairs0     (elon musk corporations pulling advertise goin...
1                                                (, pt)
2                                 (trump christmas, pt)
3     (donald trump hope watching kangaroo court wit...
4     (donald trump people asking thought historylow...
5              (biden skipped army navy game today, en)
6                                                (, pt)
7      (watch mark levin congressman michael waltz, en)
8     (donald trump great poll numbers iowa thank ma...
9                                                (, pt)
10    (donald trump didn deranged jack smith team fi...
11    (trump national doral miami signed golf host c...
12    (crowd president trump hampshire trump energy,...
13                       (myth legend donald trump, en)
14     (donald trump thank dana white vegas nevada, en)
15    (usual completely biased democrat judge arthur...
16                   (trump biden threat democracy, en)
17    (biden drop fake political indictment

 50%|████▉     | 531/1071 [00:31<00:14, 37.87it/s]

pairs--- valid pairs: 9, invalid pairs: 0pairs  0               (join trump office telegram channel, en)
1      (boomeven mainstream media approved romanstory...
2      (chemical medications joint pain discover natu...
3      (president donald trump walking fpit lane join...
4      (finally trump liberty coin trade value stack ...
                             ...                        
107    (alert trump released documentation assassinat...
108                             (muted ringing head, en)
109    (human body water means frequencies form sound...
110      (beautiful humans feel happen banned video, en)
111    (tinnitus wipes memories month discover winnin...
Name: text, Length: 110, dtype: object0                                               (, unk)
1                                               (, unk)
2     (world better trump president bring sanity com...
3     (trump proven peace prosperity biden proven ch...
4                                                (, pt)
5      

 50%|█████     | 538/1071 [00:31<00:15, 34.03it/s]

--- valid pairs: 399, invalid pairs: 0
--- valid pairs: 455, invalid pairs: 0
pairs pairs 0                                  (getting started, en)
1                               (america knows time, en)
2                                    (bless america, ca)
3                                   (best days come, en)
4      (real president stood east palestine mattered,...
                             ...                        
407                            (mexicans love trump, en)
408                          (perfect biden welcome, nl)
409                                     (love texas, en)
410                                           (boss, en)
411                                               (, en)
Name: text, Length: 399, dtype: object0                                                (, unk)
1                                (deport bare hands, en)
2      (illegal immigration want vote opposite illega...
3                                                 (, tl)
4                

 51%|█████     | 543/1071 [00:31<00:20, 25.88it/s]

--- valid pairs: 2, invalid pairs: 0
pairs 0    (warm welcome tampon follow, en)
1                   (boss follow, en)
Name: text, dtype: object
--- valid pairs: 115, invalid pairs: 0--- valid pairs: 65, invalid pairs: 0

pairspairs  0                                       (spam links, en)
1                            (stop coming join link, en)
2                                       (spam links, en)
3                                             (guys, en)
4      (guys greetings serbia support fight proportio...
                             ...                        
110                                              (, unk)
111                                              (, unk)
112    (hearts broken people supporting donald trump ...
113                                    (privet vsem, sl)
114                                       (language, tl)
Name: text, Length: 115, dtype: object
0                     (biden oligarchy decide fate, en)
1           (steve bannon remarks reporting 

 51%|█████     | 547/1071 [00:31<00:20, 25.89it/s]

pairs 0                 (peoplepresident, en)
1                     (april fools, en)
2                  (love maga nuns, tl)
4               (nice warm welcome, en)
5                    (easter great, en)
                     ...               
464                         (robot, en)
465                              (, en)
466             (judge stick order, en)
467    (going beautiful graduation, en)
468                  (final battle, en)
Name: text, Length: 456, dtype: object--- valid pairs: 68, invalid pairs: 0

pairs--- valid pairs: 64, invalid pairs: 0
pairs 0                                                (, id)
1                                               (, unk)
2                      (unconstitutional raid lago, en)
3     (senator kennedy questions wray epstein invest...
4                     (biden bogus executive order, da)
                            ...                        
60               (people york thought debate night, en)
61                               (th

 51%|█████▏    | 551/1071 [00:32<00:20, 24.86it/s]


--- valid pairs: 31, invalid pairs: 0--- valid pairs: 709, invalid pairs: 0
--- valid pairs: 21, invalid pairs: 0

pairspairs  pairs0                               (year today follow, en)
1     (worldgreatest fighters respect greatest fight...
2                                          (follow, en)
3                     (real commander chief follow, en)
4                                               (, unk)
5                             (kingbuilding follow, en)
6                          (indicted bullsh follow, en)
7                     (real commander chief follow, en)
8                       (love newport beach follow, en)
9                          (peoplepresident follow, en)
10                                          (bless, en)
11                         (bring greatness follow, en)
12                           (deal zelensky follow, en)
13    (beneficial cold showers cold showers useful c...
14                           (nice birthday follow, en)
15    (gibson right need st

 52%|█████▏    | 556/1071 [00:32<00:17, 28.67it/s]


0      (, unk)
1      (, unk)
2      (, unk)
3      (, unk)
4      (, unk)
        ...   
128    (, unk)
129    (, unk)
130    (, unk)
131    (, unk)
132    (, unk)
Name: text, Length: 133, dtype: object
0      (, unk)
1      (, unk)
2      (, unk)
3      (, unk)
4      (, unk)
        ...   
118    (, unk)
119    (, unk)
120    (, unk)
121    (, unk)
122    (, unk)
Name: text, Length: 123, dtype: object0                                           (, unk)
1                                  (inevitable, en)
2      (real criminals ones destroying country, en)
3                                (indict unite, en)
4                                 (stop coming, en)
                           ...                     
526                                 (real boss, en)
527                                         (, unk)
528                                      (wait, en)
529                                         (, unk)
530                           (bring greatness, en)
Name: text, Length: 

 53%|█████▎    | 568/1071 [00:32<00:10, 46.85it/s]




 54%|█████▎    | 574/1071 [00:32<00:20, 24.82it/s]


0     (, unk)
1     (, unk)
2     (, unk)
3     (, unk)
4     (, unk)
5     (, unk)
6     (, unk)
7     (, unk)
8     (, unk)
9     (, unk)
10    (, unk)
11    (, unk)
12    (, unk)
13    (, unk)
14    (, unk)
15    (, unk)
16    (, unk)
17    (, unk)
18    (, unk)
19    (, unk)
20    (, unk)
21    (, unk)
22    (, unk)
23    (, unk)
24    (, unk)
25    (, unk)
26    (, unk)
27    (, unk)
28    (, unk)
29    (, unk)
30    (, unk)
31    (, unk)
32    (, unk)
33    (, unk)
34    (, unk)
35    (, unk)
36    (, unk)
37    (, unk)
38    (, unk)
39    (, unk)
40    (, unk)
41    (, unk)
42    (, unk)
43    (, unk)
44    (, unk)
45    (, unk)
Name: text, dtype: objectpairs--- valid pairs: 2, invalid pairs: 0
 pairs
0    (completely agree, en)
Name: text, dtype: object
 --- valid pairs: 23, invalid pairs: 0
pairs--- valid pairs: 2, invalid pairs: 00    (exactly know said think said hots things dest...
1    (movie natural selection higher consciousness ...
Name: text, dtype: object
 --- valid 

 58%|█████▊    | 616/1071 [00:32<00:05, 78.34it/s]

0                                 (, sw)
1    (trump news cost sell username, en)
Name: text, dtype: object
pairs

--- valid pairs: 29, invalid pairs: 0pairs--- valid pairs: 28, invalid pairs: 0
  
--- valid pairs: 129, invalid pairs: 0 pairspairspairs   0      (, unk)
1      (, unk)
2      (, unk)
3      (, unk)
4      (, unk)
        ...   
127    (, unk)
128    (, unk)
129    (, unk)
130    (, unk)
131    (, unk)
Name: text, Length: 132, dtype: object--- valid pairs: 6, invalid pairs: 0
0            (trump train wooo, af)
1    (trump taken ballot write, en)
Name: text, dtype: object
0      (, unk)
1      (, unk)
2      (, unk)
3      (, unk)
4      (, unk)
        ...   
115    (, unk)
116    (, unk)
117    (, unk)
118    (, unk)
119    (, unk)
Name: text, Length: 120, dtype: object--- valid pairs: 119, invalid pairs: 0

pairs0     (chemtrail haarp scalar waves haarp emits supe...
1     (camera paid crisis actor john sullivan confes...
2     (doctor receives years prison deliberatel

 59%|█████▉    | 632/1071 [00:33<00:04, 90.16it/s]



0    (funds available immediately delighted announc...
1    (bank sending message dear customer funds fail...
2    (watch patriot michael brown shares card exper...
3    (card cash loyal patriot michael brown shared ...
4    (donald trump gave discount card today holiday...
5    (order card discount system finance team great...
6        (biggest secrets deep state exposed join, en)
7    (time agree biden share post fellow patriots w...
8    (leavitt democrats blown freakout mode biden w...
9    (bars stop winning letsupport trump chance wan...
Name: text, dtype: objectpairs
 --- valid pairs: 1, invalid pairs: 0--- valid pairs: 21, invalid pairs: 0
pairs 0      (wonder diddy long time posting alex jones, en)
1    (beta cringe videos long time posting alex jon...
2                   (long time posting alex jones, en)
3              (nice long time posting alex jones, en)
4    (vance gave permission vote trump long time po...
5             (weird long time posting alex jones, en)
6    (

 61%|██████    | 648/1071 [00:33<00:06, 62.95it/s]

pairs  --- valid pairs: 30, invalid pairs: 0--- valid pairs: 46, invalid pairs: 0
pairs --- valid pairs: 12, invalid pairs: 0
pairs0     (trump media stock surges percent week trading...
1     (donald trump instant lottery incredible oppor...
2     (breaking appalling insulting bidenwhite house...
3     (burnett people talk threat democracy trump po...
4     (winner trump lottery dear delighted announce ...
5     (hello spring president trump kicks april stop...
6                                               (, unk)
7     (voters registering photo texas pennsylvania a...
8     (emergency migrant crime wave seized america t...
9     (judge issues ruling illegal immigrant flights...
10         (republicans rename major airport trump, en)
11    (exciting news alert donald trump finally reve...
12    (transgender defense attorney flaunts giant br...
13    (time georgia days away general election presi...
14    (welcome continuous wins lottery keystroke win...
15    (biden suffers total ec

 62%|██████▏   | 660/1071 [00:33<00:07, 53.13it/s]

--- valid pairs: 35, invalid pairs: 0
--- valid pairs: 37, invalid pairs: 0
pairspairs  2     (breaking russia electroizolit plant fire mosc...
3     (apparently clue ukrainians hate russia holodo...
4                    (sounds like problem superman, no)
7     (roadside assistance insurance told cancel cov...
8                                            (woke, nl)
9     (reached critical mass took hours middle town ...
10                                              (, unk)
12    (stfu click bait ripoff scams people like stra...
13                                         (banned, no)
16    (pedo priest truth wasn earth bible finally wr...
17                                          (whale, en)
22                                               (, fr)
25    (prediction solar eclipse going solar eclipse ...
26                                      (happening, en)
27                                     (looks like, et)
28                                           (eyes, tr)
29    (freak nat

 63%|██████▎   | 670/1071 [00:34<00:09, 42.67it/s]


--- valid pairs: 282, invalid pairs: 0
pairs 0                     (time save america join enjoy, sq)
1                                                (, unk)
2                (real president america great love, en)
3      (aproved loyal supporters believed president t...
4      (patriots electrifying news melania trump prom...
                             ...                        
277    (president trump like trump support ridersonth...
278    (come stronger save america patriots follow da...
279                                    (think ready, en)
280    (welcome qintelltelegram channel gateway advan...
281    (california crisis moment thief brazenly steal...
Name: text, Length: 282, dtype: object
--- valid pairs: 4, invalid pairs: 0
pairs--- valid pairs: 124, invalid pairs: 0
pairs 1      (worship satan cleaning actors rally olympic c...
2      (paying anti trump agents trump chicago venezu...
3      (drop kamala exposure continues olympic evil p...
4                             

 63%|██████▎   | 678/1071 [00:34<00:10, 37.20it/s]

pairs 0      (honey badger fights leopards walks away join ...
1      (thermal blankets help illustrate massive inva...
2      (christians massacred christmas extremists nig...
3      (population control watch interesting pandemic...
4      (operation mockingbird operation infiltrate co...
                             ...                        
156    (wooz news superstar rafael jdefendant eric cl...
157        (subscribe trump office telegram channel, en)
159    (homeland security committee votes impeach may...
160    (original trust science good fluoride actually...
161                                               (, id)
Name: text, Length: 132, dtype: object
--- valid pairs: 347, invalid pairs: 0
pairs 0                                                (, unk)
1                                                (, unk)
2                                                (, unk)
3                                                (, unk)
4                                                (, unk

 64%|██████▍   | 684/1071 [00:34<00:11, 33.11it/s]

pairs 
2    (cristy talked cole kelley sunday education ch...
Name: text, dtype: objectpairs 
3    (case interested outreach groups holding train...
Name: text, dtype: object
--- valid pairs: 7, invalid pairs: 0--- valid pairs: 0, invalid pairs: 0
pairs--- valid pairs: 0, invalid pairs: 0
 Series([], Name: text, dtype: object)

pairs --- valid pairs: 38, invalid pairs: 0pairs
 pairs3    (realized haven posted podcast latest youtube,...
4                                              (, unk)
5    (interviews tina cannon rachel terry heidi bal...
6                                   (list youtube, en)
7                                              (, unk)
8                                (podcast spotify, en)
9                              (interviews coming, en)
Name: text, dtype: objectSeries([], Name: text, dtype: object) 
0                                               (, unk)
1     (insulted people odisha lord jagannath rahul g...
2     (insulted people odisha lord jagannath rahul g..

 65%|██████▍   | 691/1071 [00:34<00:10, 37.26it/s]

--- valid pairs: 1, invalid pairs: 09    (flyers ucrp caucus night additional resources...
Name: text, dtype: object
0    (reply email looks like hacker unfortunately p...
1    (making updates hello check working ucrp want ...
2                   (utah county republican party, en)
9    (utah county republican party send email purch...
Name: text, dtype: object

pairs 
pairs0                                               (, unk)
1                                               (, unk)
2                  (monica wilburepisode came week, en)
3                                               (, unk)
4                    (tina cannonepisode came week, en)
5                                               (, unk)
10    (sign pledge ucrp list caucus convention suppo...
11                                              (, unk)
12                                              (, unk)
Name: text, dtype: object
--- valid pairs: 2, invalid pairs: 0
 pairs--- valid pairs: 5, invalid pairs: 018    (willing 

 65%|██████▌   | 698/1071 [00:35<00:10, 36.82it/s]

 --- valid pairs: 308, invalid pairs: 0
1                                    (intentional, it)
2    (important update effective immediately mrpdel...
3     (update documentary whitmer kidnapping hoax, en)
4    (stop friends friday commit sending personal t...
Name: text, dtype: objectpairs --- valid pairs: 20, invalid pairs: 0
0                   (israel biggest terrorist state, en)
1      (defending life different creating false flag ...
2      (zionists true jews claim speaks anti semites ...
3      (true jews palestinians semitic people cousins...
4                                       (spam links, en)
                             ...                        
303                                          (hello, fi)
304                                              (, unk)
305                                               (, sw)
306                                              (, unk)
307                                               (, en)
Name: text, Length: 308, dtype: object
--- v

 66%|██████▌   | 703/1071 [00:35<00:09, 38.07it/s]

--- valid pairs: 1763, invalid pairs: 0
pairs 0       (altcoins flexing outperforming having identit...
1       (west bankgot drama telenovela israelis killed...
2                   (good morning happy month season, en)
3       (coinbase flexing muscles crypto bitcoin atms ...
4                                                  (, pl)
                              ...                        
2086    (goldman sachs thinks goldgoing shine israelte...
2087    (farting frog splash launched pump paid koth b...
2088          (reaczhedfbmmfheevusrnhwmxsxjxepjmzpbw, nl)
2089    (farting frog splash hits seen comments reply ...
2090    (israeli militaryplaying lebanon japanunemploy...
Name: text, Length: 1763, dtype: object
--- valid pairs: 25, invalid pairs: 0
pairs 0     (check upcoming conference learn turning point...
1     (tyler bowyer action appeared explain efforts ...
2     (forget vote nonpartisan section ballot michig...
5     (newaygo county conservative voter guide presi...
6     (m

 66%|██████▌   | 708/1071 [00:35<00:11, 30.29it/s]

 0                                          (fooled, en)
1           (july cadillac bring trump flags uncle, en)
2     (credit warren carpenter excellent timeline im...
4     (plan like typical labadie colbeck voter suppr...
5     (trained poll workers kicked building detroit ...
6     (bring blanket cozy evening stars enjoy nostal...
7     (referring poll challengers poll whiners poll ...
8     (effect hint august think november stop suppre...
9                                           (march, en)
10    (gary anderson interested sponsorship opportun...
11    (appeared roger stonediscussed push globalist ...
12    (biden survive doug burgum right stonezone liv...
13    (crave freedom refilled yearning burst garrett...
14            (push representatives senators voted, en)
15    (breaking michigan governor gretchen whitmer o...
16    (discussed sbpassed couple weeks doesn effect ...
17    (suppressing vote liddle clubs talking point s...
19                                          (ca

 67%|██████▋   | 713/1071 [00:35<00:10, 33.44it/s]

 0     (need trump trump needs trump force training c...
1     (check precinct activist training easier invol...
2                                 (signs left text, en)
3           (returns party compliments mrpdelegate, en)
4     (reported million early imply early voters kno...
5     (great tonight newaygo county thank speakers v...
13    (knocking doors making phone calls sending tex...
14    (insights information state budget committee a...
15                (bunch thieves liars thank fired, en)
16    (needed remove glad right thing right thing ea...
17                                        (exactly, en)
18    (approved malfeasance immediately removed stat...
19                                              (, unk)
20    (super saturday team rogers week fremont rsvp ...
21    (hard working republicans newaygo county atten...
24    (weekprimary election republicans sold constit...
25                       (green township wednesday, en)
26    (want know frustrated excel spreadsheet a

 67%|██████▋   | 718/1071 [00:35<00:10, 34.91it/s]


--- valid pairs: 3, invalid pairs: 0
pairs 0                                              (, unk)
1    (today october come texians gonzales refused r...
2    (amazing debate vance doubt donald trump chose...
Name: text, dtype: object
--- valid pairs: 62, invalid pairs: 0
pairs 1     (know state gina johnsen voted gretchen whitme...
2     (executive committee meeting scheduled thursda...
3     (kendall beyer ballot precinct delegate wasn c...
4                    (prophet known fill affidavit, en)
5     (trump needs michigan michigan needs macomb co...
                            ...                        
78    (debate presence policy exceeded wildest fever...
79                                         (xsigns, en)
80                (xsigns available letground text, en)
81    (wish wasn city ordinances pretty sign confisc...
82                                         (xsigns, en)
Name: text, Length: 62, dtype: object
--- valid pairs: 20, invalid pairs: 0
--- valid pairs: 14, invalid 

 68%|██████▊   | 723/1071 [00:36<00:11, 30.75it/s]


--- valid pairs: 69, invalid pairs: 00     (week away lincoln reagan dinner featured spea...
1     (tickets texas youth summit today tickets avai...
2                                                (, da)
3     (donald trump perfectly summed election closin...
4     (voter registration challenge simple steps hel...
5     (bless president trump praying safety safety f...
6     (today national voter registration visit links...
7     (united states constitution document continue ...
8     (texas youth summit starts tomorrow sept time ...
11    (important reminders november election confirm...
12    (wish metcalf happy birthday rally ignite wave...
14    (join tuesday meeting montgomery county republ...
15    (tomorrow attend montgomery county faith coali...
16    (tony buzbee served lead counsel defending gre...
Name: text, dtype: object
pairs 
0     (kristina believes require million dollars bea...
1     (great question question people stated support...
2     (pass anti life bills tenta

 68%|██████▊   | 728/1071 [00:36<00:10, 33.74it/s]

 --- valid pairs: 76, invalid pairs: 00     (montgomery county showed republican party tex...
1     (tomorrow june montgomery county trump train i...
2     (tomorrow join montgomery county party update ...
3     (involved local republican party meetings open...
4     (early voting city conroe runoff river plantat...
5     (want houston runaway developers real estate b...
6     (tonight tomorrow rallies support president tr...
8     (attention state delegates republican party te...
9     (save date thurs june election integrity townh...
10    (tomorrow early voting uniform election runoff...
11    (early voting conroe runoffs ends tonight conr...
12    (remember glory today happy birthday president...
13    (conroe residents vote tomorrow critical runof...
14    (vote conroe runoff election polls close vote ...
15    (thank voted conroe runoff election volunteers...
16    (thank amazing dads impossible reachable going...
17    (tuesday montgomery county republican party sw...
18    (ton

 68%|██████▊   | 732/1071 [00:36<00:09, 35.06it/s]



--- valid pairs: 106, invalid pairs: 00     (mike johnson ukraine agenda shows different m...
1                         (join sorry short notice, en)
2     (john uniparty moolenaar terrible voting recor...
3     (come receipts outlandish accusations american...
4     (saying bolder secure sounds little understand...
                            ...                        
76                                              (, unk)
77    (delegates trump organizers alexa otte shane t...
79                               (spoken presented, en)
80                              (brave strong true, en)
81              (bought tickets come join saturday, en)
Name: text, Length: 76, dtype: object
pairs--- valid pairs: 2, invalid pairs: 0
pairs--- valid pairs: 23, invalid pairs: 0pairs
 pairs  0                                     (tickets sale, sv)
1                                                (, unk)
2      (forget elected copas threatened state committ...
3                                  (h

 69%|██████▉   | 741/1071 [00:36<00:07, 46.62it/s]


pairs 
--- valid pairs: 19, invalid pairs: 0--- valid pairs: 6, invalid pairs: 0
0     (early voting ends tomorrow visit website vote...
1     (conroe billion bond necessary texas scorecard...
2     (support local republican party monday drive v...
3     (friday chairman bryan christ sent press relea...
4     (message provided behalf perry woodlands towns...
5     (party voted unanimously endorse shelley sekul...
6     (today election polls open election determine ...
7     (join jameson ellis wheels warriors battle bra...
8     (jameson ellis watching weather wheels warrior...
9     (tomorrowpm conroe board meeting includes agen...
10    (special legislative session underway border s...
11    (report released congressional homeland securi...
12    (speaker mike johnson today keeping promise am...
13                (visit house view january videos, en)
14    (grassroots conservative republicans montgomer...
21    (drive vote golf tournament great success cong...
29    (republican prec

 70%|██████▉   | 747/1071 [00:36<00:06, 49.61it/s]

--- valid pairs: 33, invalid pairs: 0
pairs 0     (additional july parade details uploaded calen...
1     (happy independence time country registered vo...
2     (independence willing pledge sake freedom carr...
3     (south county july parade thank volunteers pat...
4     (time volunteer help cruz senate float tomorro...
5     (party share bobbi bodenhamer precinct chair p...
7     (thoughts prayers aftermath storm cooling cent...
8     (county judge mark keough county libraries pow...
10    (county judge mark keough supply distribution ...
11    (montgomery county chair gwen withrow experien...
12    (clear democrats want illegals voting hell ben...
13    (saturday distribution site water mres bull sa...
14    (nice serfs effective citizen leaders smile pl...
15    (find date information power restoration cooli...
16    (stop join praying president donald trump word...
17    (gwen withrow chairman montgomery county repub...
18    (action july deadline urge elected trustees se...
19  

 70%|███████   | 753/1071 [00:36<00:11, 28.09it/s]

pairs--- valid pairs: 1, invalid pairs: 0 0               (whoa guess migration missed date, en)
1                            (miss migration mate, en)
2            (migration live check pinned message, en)
3            (migration live check pinned message, en)
4            (migration live check pinned message, en)
5    (follow instruction pinned message migrate tok...
6            (migration live check pinned message, en)
7            (migration live check pinned message, en)
Name: text, dtype: object

pairs --- valid pairs: 3, invalid pairs: 0
0    (laura welcome letsgobrandontoken main chat, en)
Name: text, dtype: objectpairs 0                             (migrating deadline, en)
1                                              (, unk)
2    (migration letsgobrandontoken main chat improv...
Name: text, dtype: object

--- valid pairs: 79, invalid pairs: 0--- valid pairs: 1365, invalid pairs: 0

pairspairs  0                             (Shang Mian Qing Kuang, tl)
1                      

 71%|███████   | 758/1071 [00:37<00:19, 16.33it/s]

--- valid pairs: 407, invalid pairs: 0
pairs 0                             (welcome chris webber, nl)
1                                          (uniswap, sw)
2                                          (raydium, so)
3                     (optimistic passionate strong, en)
4                     (revshare interested claiming, en)
                             ...                        
610                                           (hodl, cy)
616    (reward claiming live read pinned message clai...
617    (reward claiming live read pinned message clai...
618    (reward claiming live read pinned message clai...
619    (claimed rewards thank admin guys suggest yall...
Name: text, Length: 407, dtype: object
--- valid pairs: 79, invalid pairs: 0
pairs 0     (migration update migration improved smartdefi...
1             (migration live check pinned message, en)
2             (migration live check pinned message, en)
3             (migration live check pinned message, en)
4     (believe token 

 71%|███████   | 762/1071 [00:38<00:29, 10.55it/s]

pairs 1      (gonna real diamond hands true potential quick...
7      (feels good work team putting project lettry s...
8      (believe token good future proper planning goi...
9      (welcome kama official telegram group able cla...
10                                    (welcome paul, en)
                             ...                        
594    (read pinned message carefully follow pinned m...
597                                (keeping bullish, en)
598                           (confirmed millioners, en)
599              (letsupporting token super bullish, en)
600    (wish coins claim airdrops fill wallet coins, en)
Name: text, Length: 426, dtype: object
--- valid pairs: 1651, invalid pairs: 0
pairs 0                                   (nice knowing, en)
1                                          (hello, en)
2                          (welcome message saved, en)
3       (welcome trump community millionare trump, en)
4       (welcome trump community millionare trump, en)
       

 71%|███████▏  | 765/1071 [00:38<00:32,  9.31it/s]

--- valid pairs: 511, invalid pairs: 0
pairs 0      (happy able work beautiful project like projec...
4      (admit addicted shilling addicted getting peop...
5      (claimed holders reward thanx participants fee...
6      (cool amazing long claimed actually going hehe...
7                                           (coming, en)
                             ...                        
821    (imagine billions rewards earned according siz...
822    (feel sorry doubting opportunity happy claimed...
823    (received rewards fast stressful truly great p...
826    (feels like people going jump community regret...
829                                        (welcome, nl)
Name: text, Length: 511, dtype: object
--- valid pairs: 562, invalid pairs: 0pairs
 0                          (going moon lambo season, en)
1          (bought tokens like claim holders reward, en)
2                                                 (, en)
3                        (pinned message claim mate, en)
6            (re

 72%|███████▏  | 767/1071 [00:39<00:53,  5.65it/s]

--- valid pairs: 527, invalid pairs: 0
pairs 0      (india general election live updates voting se...
1      (sabha election analysis candidates contesting...
2      (congressdecision skip exit poll debates shows...
3      (high stakes battle odisha elected supremo nav...
4      (phase polls loots reserve evms throws vvpat m...
                             ...                        
522    (haryana assembly polls expose falsehood misru...
523    (mumbai congress leaders demand change helm ah...
524    (cibil scores farmers face fadnavis warns bank...
525    (people india given clear mandate modi governm...
526    (leader marathwada blames ajit pawarncp poll s...
Name: text, Length: 527, dtype: object
--- valid pairs: 622, invalid pairs: 0
pairs 0                      (takes mins staking ethereum, en)
1      (shouldn miss project golden opportunity earn ...
2                         (revenue share live claim, en)
5      (days able mooooooreeeeeeeeeeee ready enjoy ro...
6      (volume s

 72%|███████▏  | 769/1071 [00:40<00:58,  5.18it/s]

--- valid pairs: 94, invalid pairs: 0
pairs 0                                                (, unk)
1      (officials announced stay home order rockdale ...
3      (years hurricane helene landfall date category...
4                                                (, unk)
5      (conscious free people walking planet actually...
                             ...                        
110    (surrender children care guard custodial custo...
111    (basics ensure children care wander dangerous ...
112    (report suspicious activity happening affected...
113    (police vegas told prepare meet president bide...
114    (kamala harris admin making people apply permi...
Name: text, Length: 94, dtype: object
--- valid pairs: 118, invalid pairs: 0
pairs 0      (watching meteorologist tiktok covering tornad...
1          (dutch interesting nexrad radar evidence, en)
2      (wichita falls texas oklahoma border nexrad ra...
3      (astounding mention weather modification label...
4      (file jail 

 72%|███████▏  | 771/1071 [00:41<01:26,  3.46it/s]

--- valid pairs: 815, invalid pairs: 0
pairs --- valid pairs: 406, invalid pairs: 0
0                                              (hold, en)
1                                              (hold, en)
2                                              (hold, en)
3       (count whales completely token today larger on...
4                                                 (, unk)
                              ...                        
1174    (time rush forget airdrop wallet kind magic re...
1178                    (hold guys hold mooning soon, en)
1179    (keeps putting huge smiles faces huge bonuses,...
1180                          (definitely grow xsure, en)
1183     (bold opportunities like afraid rich thanks, en)
Name: text, Length: 815, dtype: objectpairs
 0      (cold reunion prince harry prince william kept...
1      (elon musk picked notorious date officially sh...
2      (nasty surprise male migrant center open feet ...
3      (watch college football team sets celebratory ...
4    

 72%|███████▏  | 772/1071 [00:42<01:31,  3.25it/s]


--- valid pairs: 558, invalid pairs: 0
pairs 0       (whistleblower reveals information walzties, en)
1                      (windy wind weather forecast, en)
2                                                (, unk)
3                                                (, unk)
4                                                (, unk)
                             ...                        
708    (fema calling liar claiming reports giving mon...
711    (trump mentioned world fairs tennessee held mo...
712    (fema blocking starlink shipments people offli...
713    (watch look inside furnished maine apartments ...
714    (holy crap president gone level authority conf...
Name: text, Length: 558, dtype: object


 72%|███████▏  | 774/1071 [00:42<01:10,  4.20it/s]

--- valid pairs: 1799, invalid pairs: 0
pairs 0       (voting person votes counted mandatory vote br...
1       (city county valid required glad common sense,...
2              (staged trumps biggest miistake debat, en)
3                                                  (, en)
4                                     (jewish colony, en)
                              ...                        
2035                                               (, en)
2036                                               (, sq)
2037                                   (area coumtry, en)
2038                                           (good, sl)
2039    (literally like choices decent restaurant groc...
Name: text, Length: 1799, dtype: object
--- valid pairs: 75, invalid pairs: 0
pairs 0      (breaking federal government allegedly process...
1                                            (bingo, tl)
2      (truth download share spread word victim ignor...
3      (examples world proves ancestors utilized elec...
13  

 72%|███████▏  | 776/1071 [00:42<01:03,  4.62it/s]

--- valid pairs: 419, invalid pairs: 0--- valid pairs: 709, invalid pairs: 0

pairspairs  --- valid pairs: 409, invalid pairs: 00      (biden slapped massive backlash trying replace...
1      (police officers help homeless boyvan defund p...
2      (heartbreaking photos nypd officer jonathan di...
3      (decade years nearly half people german town h...
4      (university picked chief accused felony strang...
                             ...                        
414    (taylor swiftboyfriend travis kelce gets exten...
415    (country star helps clean damage nebraska torn...
416    (senators issue ultimatum ethical failure high...
417    (watch jamal murray dunks lebron james scores ...
418    (trump shows love desantis great meeting miami...
Name: text, Length: 419, dtype: object0      (kamala harris spent harris buyer position mar...
1      (kamala harris spent harris buyer holder marke...
2      (kamala harris spent harris buyer position mar...
3      (kamala harris whale spent ha

 73%|███████▎  | 780/1071 [00:42<00:41,  7.00it/s]

--- valid pairs: 436, invalid pairs: 0
pairs 0      (francisco voters implement drug testing welfa...
1      (republican senator jumps race replace mitch m...
2      (lgbt indoctrination push suffer blow state ad...
3      (hackers seize trump court docs release shake ...
4      (trump celebrates gets major supreme court rea...
                             ...                        
431    (bidennew crackdown diesel trucks buses slamme...
432    (american social media star kidnapped haiti st...
433         (view audience applaud took racism read, en)
434    (brutal news migrant influencer coached illega...
435    (state republicans governor checkmate pass bil...
Name: text, Length: 436, dtype: object
--- valid pairs: 3, invalid pairs: 0
--- valid pairs: 11, invalid pairs: 0pairs 
pairs 0    (bible mass migration foreigners gaining power...
1    (immigrants taken germany eliminate german civ...
3    (bernie sanders called satanic ritual abuse ch...
Name: text, dtype: object0           

 73%|███████▎  | 782/1071 [00:43<00:37,  7.74it/s]

--- valid pairs: 437, invalid pairs: 0
pairs--- valid pairs: 4, invalid pairs: 0
pairs 0                                              (, unk)
1                                              (, unk)
2    (official evil presidents executed crimes huma...
3                                 (yeeeaaa buddyy, so)
Name: text, dtype: object
 0      (biden gone calls trump congressman nearest nu...
1                                                (, unk)
2      (facility removes bible table honors imprisone...
3             (collapse global american empire read, en)
4      (deadly terror attack hits jerusalem suspects ...
                             ...                        
432    (families idaho college murder victims point m...
433    (routine patrol goes rails officers encounter ...
434    (americasodom gomorrah dystopia rookie speaks ...
435    (jack smith claims granting trump immunity pav...
436    (officers respond home maine official removed ...
Name: text, Length: 437, dtype: object


 73%|███████▎  | 786/1071 [00:43<00:26, 10.72it/s]

--- valid pairs: 465, invalid pairs: 0
pairs 0      (politicoattempt brand having babies right end...
1                           (ship sliding away read, en)
2      (columbia chaos escalates overnight hostage si...
3      (zelenskyy says working washington future pres...
4      (university florida puts columbia shame perfec...
                             ...                        
460    (ivanka breaks silence posts emotional word re...
461    (tech mogul stuns figure trump donation guilty...
462    (cracker barrel makes significant changes rais...
463    (bidenattempt cash minutes trumpguilty verdict...
464    (trump agree peopletown hall biden missing rep...
Name: text, Length: 465, dtype: object--- valid pairs: 9, invalid pairs: 0

pairs 0                                              (, unk)
1                                              (, unk)
2    (leftist upset triggered missed trump believe ...
3    (breaking jacqueline marsaw fired according bi...
4    (media turns shooting

 74%|███████▍  | 790/1071 [00:43<00:20, 13.77it/s]

--- valid pairs: 78, invalid pairs: 0
pairs  0      (watch weather channel covered team scientists...
1      (study published christmas found children rece...
2                                                (, unk)
4      (holding official narrative events lend moment...
5                                                (, unk)
                             ...                        
97                  (absolutely love jordanresponse, en)
98     (collision coming tucker carlson jordan peters...
100    (drone attack military installation jordan kil...
101    (pure unadulterated greed convince simply naiv...
103    (people familiar tomorrowrelease texting askin...
Name: text, Length: 78, dtype: object
--- valid pairs: 8, invalid pairs: 03      (podmoskov vyboram gotovo segodnia sostoialas ...
10                                               (, unk)
14     (pervyi brifing proshel tsentre obshchestvenno...
15     (podmoskov otkrylis izbiratel nykh uchastkov o...
16     (tsentr obshchestve

 75%|███████▍  | 798/1071 [00:43<00:11, 23.88it/s]

--- valid pairs: 63, invalid pairs: 0pairs--- valid pairs: 140, invalid pairs: 0
0    (, unk)
1    (, unk)
Name: text, dtype: object
pairs--- valid pairs: 3, invalid pairs: 0pairs
pairs    

0    (, unk)
1    (, unk)
2    (, unk)
Name: text, dtype: object
1     (stitched image shooting suspect maxwell yeari...
2                                               (, unk)
5                                                (, so)
6     (currently kamala rally atlanta mind people sa...
7     (dozen individuals including journalist evan g...
                            ...                        
68    (tricky left people searching answer think sol...
69    (good morning makes wonder lies history easter...
70                                        (eastern, en)
71                                        (eastern, en)
72    (shared past great awakening thought share cat...
Name: text, Length: 63, dtype: object0    (, unk)
1    (, unk)
2    (, unk)
3    (, unk)
Name: text, dtype: object
0      (nazi 

 76%|███████▌  | 809/1071 [00:43<00:06, 38.30it/s]

--- valid pairs: 84, invalid pairs: 0--- valid pairs: 5, invalid pairs: 0

pairs pairs 0                                              (, unk)
1    (trumpbuy spent trumpbuyer position market dex...
2    (trumpbuy spent trumpbuyer position market dex...
3    (trumpbuy spent trumpbuyer holder market dext ...
4    (trumpbuy spent trumpbuyer position market dex...
Name: text, dtype: object0                                                (, nl)
1     (sidney powell revealed discovered department ...
2     (montana medically kidnapped teen wyoming gend...
3     (watch entirety relieved hear bret speaking pl...
4     (tucker carlson bret weinstein traveled darien...
                            ...                        
89    (tucker poisoning food wasn diabetes years say...
90                                              (, unk)
91                                         (asking, af)
92                                       (happened, en)
93    (sure praying land owners texas horrible come .

 76%|███████▌  | 815/1071 [00:43<00:07, 35.86it/s]

--- valid pairs: 422, invalid pairs: 0pairs
 0      (trial remove trump ballot begins guess appoin...
1      (year shot dead apparently trying intimidate f...
2      (green agenda fail biden counter billions wind...
3      (zelenskyy adviser admits truth corruption ukr...
4                                 (trick treat read, en)
                             ...                        
417        (blue city mayor stolen second time read, en)
418    (exorcist warns attending taylor swift concert...
419    (arizonademocratic attorney general announces ...
420    (huge plot twist leader blasts democrats endor...
421    (general motors pulls funding foundering elect...
Name: text, Length: 422, dtype: object
--- valid pairs: 10, invalid pairs: 0
pairs 0                                               (, unk)
5     (letie dzhimmi kartera oktiabria budet shiroko...
6     (ministr oborony ssha lloid ostin napisal tvit...
7                                               (, unk)
11    (edinstvennyi o

 77%|███████▋  | 820/1071 [00:44<00:17, 14.61it/s]

pairs 0     (dmitrii bykov rasskazal svoikh literaturnykh ...
1                                               (, unk)
2     (dlia amerikantsev simvol vtoroi mirovoi voiny...
3                                               (, unk)
4     (schitat chto vsio zavisit prezidenta travma v...
5     (prezidenta baidena priznan vinovnym triom ugo...
6                                               (, unk)
11                                              (, unk)
14                                              (, unk)
15    (dzhulian assanzh osnovatel wikileaks kotorogo...
16    (kogda prokurory ssha nachali rassledovanie vy...
17    (pravitel stvo ssha obnarodovalo obvinitel zak...
19    (gosudarstvennyi departament ssha opublikoval ...
21    (griadushchie utra niusu debaty vpervye sostoi...
22    (iroi babloian obsudim detali debatov piatnits...
23    (baidenu bylo vazhno vygliadet energichnym sto...
24    (zamene kandidata pobeda porazhenie debatakh e...
25    (khozhu uverenno govoriu bystro khor

 77%|███████▋  | 824/1071 [00:45<00:18, 13.70it/s]

--- valid pairs: 558, invalid pairs: 0
pairs 0      (state supreme court rejects abortion challeng...
1      (researchers find single surprising source dis...
2      (christian fire department employee files suit...
3      (trump campaign announces record shattering fu...
4      (band louisiana catholics shield community dia...
                             ...                        
553    (white house aide tries gaslighting america sa...
554    (biden refuses drop remains committed second d...
555    (watch bill maher calls biden hard look debate...
556    (uvalde police chief indicted school shooting ...
557    (question left mind debate important history r...
Name: text, Length: 558, dtype: object
--- valid pairs: 75, invalid pairs: 0--- valid pairs: 581, invalid pairs: 0

pairspairs  0     (trumpbuy spent trumpbuyer position market dex...
1     (trumpbuy spent trumpbuyer holder market dext ...
2     (trumpbuy spent trumpbuyer position market dex...
3     (trumpbuy spent trumpbuyer

 77%|███████▋  | 827/1071 [00:45<00:20, 12.14it/s]

--- valid pairs: 1, invalid pairs: 0
--- valid pairs: 1, invalid pairs: 0pairs
 pairs 0    (fight fight, en)
Name: text, dtype: object
0    (breaking donald trump sentenced july found gu...
Name: text, dtype: object--- valid pairs: 205, invalid pairs: 0

pairs--- valid pairs: 1, invalid pairs: 0 
pairs 0    (breaking french prosecutor indicts telegram p...
Name: text, dtype: object
1                                                (, unk)
2          (join netflix redpills goodlion telegram, en)
3      (egyptpm says country prepared sacrifice milli...
4            (fuck lockdown maskup assholes forever, en)
5                                        (holy crap, en)
                             ...                        
265    (space years went legalizing marriage convinci...
266                                          (facts, en)
267    (remember days elon went israel media talking ...
268    (think lied confederate army slaves fighting r...
269    (elon fire zionist companies america b

 78%|███████▊  | 831/1071 [00:45<00:16, 14.41it/s]

--- valid pairs: 41, invalid pairs: 0
pairs 0                                               (, unk)
1                                               (, unk)
2                                               (, unk)
4                                               (, unk)
5                                               (, unk)
6     (uchityvaia chto osobennost predvaritel nykh g...
7     (proshlykh respublikanskikh praimeriz godu nev...
8     (prezidentskikh vyborakh goda tramp ustupil ba...
9     (vprochem dzho baiden proshlykh demokratichesk...
10                                              (, unk)
11    (obrashchaet sebia vnimanie kraine nizkaia iav...
12    (uvazhaemyi kollega igor slabykh peredache tri...
13                                              (, unk)
14                              (zhdiom gotovimsia, hr)
15       (konechno ochen umestny analogii diktator, sk)
16    (poka zamiraniem serdtsa zhdut interv skromno ...
17                                              (, unk)
18  

 78%|███████▊  | 834/1071 [00:45<00:16, 14.01it/s]

--- valid pairs: 2449, invalid pairs: 0
pairs 0       (spreading unchecked lies rooted racism justif...
1       (spreading unchecked lies rooted racism justif...
4       (biden harris administration committed helping...
5       (national security depends supporting israel c...
6       (hySHArv yhvdym TSbAv mvHmd bvvdAy yHzvr plsTy...
                              ...                        
2761                              (speak phone biden, en)
2762                                 (telegram unban, en)
2763    (happening president biden lady participate na...
2764                                              (, unk)
2765    (todayannouncement builds administrationwork e...
Name: text, Length: 2449, dtype: object
--- valid pairs: 246, invalid pairs: 0
pairs 0      (bolee podpisei podderzhku samovydvizheniia pu...
1      (naruzhnaia bannernaia agitatsiia kandidata pu...
2      (kandidatov prezidenty vydvinuli neparlamentsk...
3      (putin vzial kontrol situatsiiu otopleniem pod...
4  

 78%|███████▊  | 837/1071 [00:46<00:23, 10.09it/s]

--- valid pairs: 89, invalid pairs: 0
pairs 0     (breaking biden says hurricane helene devastat...
1     (breaking stratocumulus cloud bands captured a...
2     (breaking arizona democrat secretary state adr...
3     (breaking bongino hints diddy tapes cause prob...
4     (breaking dockworkers major ports east gulf co...
                            ...                        
85    (breaking keith olbermann demands biden arrest...
86    (breaking york post officially endorses donald...
87    (breaking biden administration granted approva...
88    (breaking iransupreme leader orders military p...
89    (breaking lancaster county pennsylvania announ...
Name: text, Length: 89, dtype: object
--- valid pairs: 1074, invalid pairs: 0
pairs 0                          (revenue share live claim, en)
1                                         (sell news, en)
2                                         (sell news, en)
3                          (revenue share live claim, en)
4                       

 78%|███████▊  | 839/1071 [00:46<00:22, 10.44it/s]

--- valid pairs: 1025, invalid pairs: 0
pairs 0                                         (sell news, en)
1                                         (sell news, en)
2                                         (sell news, en)
3                        (lambo lambo lambo letsgooo, sw)
4                              (load your ready bull, en)
                              ...                        
1259                       (revenue share live claim, en)
1260                       (revenue share live claim, en)
1261    (people earnn better know potential holding st...
1262    (letcapacity push project forward beneficial s...
1263                     (lambo lambo lambo letsgooo, sw)
Name: text, Length: 1025, dtype: object
--- valid pairs: 58, invalid pairs: 0
pairs 0     (breaking donald trump confirms melaniamother ...
1     (breaking bill johnson submitted letter resign...
2     (breaking classified briefing tuesday join con...
3     (epstein list public team releasing massive re...
4     (b

 79%|███████▊  | 841/1071 [00:46<00:19, 11.55it/s]

--- valid pairs: 215, invalid pairs: 0
pairs 0      (trump appointed federal judge stuns biden har...
1      (democrats panic pleas kamala harris step asid...
2      (supreme court delivers game changing election...
3      (shock report completely blindsides harris ahe...
4      (harris running mate walz busted scandal piece...
                             ...                        
210    (massive update vance walz debate game changer...
211    (difficult time desantis makes heartbreaking a...
212     (trump elon musk surprise announcement read, en)
213    (trump makes amazing help americans hurricane ...
214    (trump announces added huge star added campaig...
Name: text, Length: 215, dtype: object
--- valid pairs: 73, invalid pairs: 0
pairs 0     (watch donalds clashes host kristen welker rep...
1     (press secretary lays biden memory loss bigges...
2     (watch libs tiktok creator spars wapotaylor lo...
3     (report assassination attempt tucker carlson p...
4     (biden white ho

 79%|███████▊  | 843/1071 [00:46<00:22, 10.17it/s]

 0      (news california adam schiff news california r...
1      (vance makes surprise announcement hours debat...
2      (emergency kamala harris announcement time wor...
3      (news host jeanine pirro reveals fans unexpect...
4      (jesse watters makes stunning announcement dev...
                             ...                        
255    (ballots burned mailbox state police arrest sc...
256    (desantis makes heartbreaking announcement ahe...
257    (said stunned trump responds live kamala harri...
258    (supreme court issues massive mail ballot ruli...
259    (johnson mcconnell release rare statement vice...
Name: text, Length: 260, dtype: object
--- valid pairs: 68, invalid pairs: 0
pairs 0     (breaking benjamin netanyahu diagnosed severe ...
1     (breaking russia officially accuses ukraine in...
2     (breaking turkeymain opposition republican peo...
3     (breaking dangerous solar flares earth hours s...
4     (breaking russia arrests managers lgbtq nightc...
         

 79%|███████▉  | 845/1071 [00:46<00:19, 11.37it/s]

--- valid pairs: 50, invalid pairs: 0
pairs 0     (breaking multiple people missing avalanche ca...
1     (breaking large number israeli citizens perman...
2     (breaking poland preparing russia join conserv...
3     (breaking tucker carlson interviewed president...
4     (watch obamahouse chicago property largest jew...
5     (king charles diagnosed cancer postpone public...
6     (breaking president trump predicts biden publi...
7     (breaking ukrainians placed tucker carlson kil...
8     (breaking tucker carlsoninterview putin publis...
9     (tucker carlson afraid speak truth waiting new...
10    (breaking marines killed helicopter crash sout...
11    (breaking brazilian police investigating allie...
12    (breaking jetblue planes collide tarmac boston...
13    (breaking president zelensky fires commander z...
14    (good morning thank waiting patiently team cur...
15    (tucker blew nord stream putin sure tucker bus...
16    (vladimir putin interview join share tucker ca...
17  

 79%|███████▉  | 847/1071 [00:47<00:20, 11.16it/s]

--- valid pairs: 159, invalid pairs: 0
pairs 0      (breaking lightning strike hits water tower st...
1      (breaking parade democrats lined partynational...
2      (breaking rapper fatman scoop dead collapsing ...
3      (breaking biden kamala harris resumed secret f...
4      (breaking soros linked utah gill typically cha...
                             ...                        
155    (breaking united states deploy thousands troop...
156    (breaking biden announced additional resources...
157    (breaking georgia emergency management homelan...
158    (breaking media matters america funded george ...
159    (breaking russian president vladimir putin ret...
Name: text, Length: 159, dtype: object
--- valid pairs: 147, invalid pairs: 0
pairs 0      (breaking trump campaign annoucned raised tota...
1                                                (, unk)
2      (breaking texas paxton says dissolved filled c...
3      (breaking donald trump banned countries includ...
4      (breaking

 79%|███████▉  | 849/1071 [00:47<00:20, 10.73it/s]

--- valid pairs: 321, invalid pairs: 0
pairs 0      (trump sends shockwaves bidencampaign announce...
1      (marjorie taylor greene catches hosts guard an...
2      (huge federal judge rules mail ballots heats r...
3      (supreme court halt texas verification online ...
4      (judge delivers ruling million lawsuit huge re...
                             ...                        
316    (federal constitutional violations trump verdi...
317    (buckle alvin bragg makes huge announcement tr...
318    (breaking democrat senator leaves party keeps ...
319    (trump verdict backfires dems bombshell droppe...
320    (revenge biden horrified development latest tr...
Name: text, Length: 321, dtype: object
--- valid pairs: 1296, invalid pairs: 0
pairs 0       (revshare holders sent wallets claimed manuall...
1                           (claim manually revshare, en)
2                                         (sell news, en)
3                                         (sell news, en)
4          

 79%|███████▉  | 851/1071 [00:47<00:24,  9.13it/s]

--- valid pairs: 281, invalid pairs: 0
pairs 0      (excited speaker johnson shakes race announcem...
1      (taylor greene drops news primary election cal...
2      (foxharris faulkner major life change personal...
3      (trump makes historical announcement following...
4      (bombshell development trump gets support majo...
                             ...                        
276    (trumpstunning right debate proves deserves te...
277    (state supreme court issues massive ruling mai...
278    (calls biden step aside grow louder ally wants...
279    (news gets worse biden disastrous debate perfo...
280    (obama official drops biden bombshell tells am...
Name: text, Length: 281, dtype: object
--- valid pairs: 3137, invalid pairs: 0
pairs 0        (migration live read pinned message migrate, en)
1        (migration live read pinned message migrate, en)
2        (migration live read pinned message migrate, en)
3        (migration live read pinned message migrate, en)
4        (m

 80%|███████▉  | 853/1071 [00:47<00:26,  8.24it/s]

--- valid pairs: 328, invalid pairs: 0
pairs 0      (house leaders drop hammer biden phase impeach...
1      (romney makes staggering announcement shakes p...
2      (carlson drops shocking detail learned biden p...
3      (cruz drops bombshell revelation nightmare bid...
4      (trump smiling election analysis best news cam...
                             ...                        
323    (jack smithdocuments case trump takes huge tur...
324    (ashamed embarrassed letitia james devastated ...
325    (letitia james water shocking video outrageous...
326    (bidencamoaign gets devastating news damning r...
327    (tiered system bidendepartment justice water s...
Name: text, Length: 328, dtype: object


 80%|███████▉  | 854/1071 [00:48<00:27,  7.93it/s]

--- valid pairs: 34, invalid pairs: 0 
pairs--- valid pairs: 9, invalid pairs: 00                                               (, unk)
1     (live todayprophetic report good saying find t...
2          (questions ardis interview answer watch, en)
3     (business producer colton joined guest talk en...
4                                               (, unk)
5                         (florida state fake meat, en)
6     (italy similar nation implement positive measu...
7                                               (, unk)
8                           (prophetic report live, en)
9     (tonightbo polny excite blown mind wealth tran...
10            (issue live stream started feed live, en)
11                                              (, unk)
12    (latest episode alpha fitness build great life...
13                                              (, unk)
14                          (prophetic report live, en)
15                                              (, unk)
16    (truth hunter bide

 80%|████████  | 858/1071 [00:48<00:20, 10.19it/s]

0                                               (, unk)
1                                               (, unk)
2                                               (, unk)
3                                               (, unk)
4                                               (, unk)
5                                               (, unk)
6       (detroit stop reawaken tour hour road trip, en)
8                                               (, unk)
9     (hours team flyover stay tuned exclusive updat...
10                          (ready weekend letroll, en)
11                     (boom complete lights think, en)
12    (years spend time love getting work kids save ...
17                                              (, unk)
18                                              (, unk)
19    (clementprophecies fulfilled stay tuned specia...
23    (amazing start general michael flynn stay tune...
28    (tell based legacy hunter trial eric trump giv...
29    (reawaken america tour detroit recorded in

 80%|████████  | 860/1071 [00:48<00:18, 11.13it/s]

 0                                               (, unk)
1                                               (, unk)
2                                               (, unk)
3                                               (, unk)
4                                               (, unk)
5                                               (, unk)
6                                               (, unk)
7     (aaron prager dennis pragerson joins tonight d...
8                                               (, unk)
9                                               (, unk)
10                                              (, unk)
11    (stacy joins julie green morning break saying ...
12                                              (, unk)
13                        (conspiracy conversation, en)
14                                              (, unk)
15                                              (, unk)
16                                              (, unk)
17                                             

 81%|████████  | 863/1071 [00:48<00:14, 14.18it/s]

  0      (bidenfamily urges stay race huddling camp dav...
1      (grimacing biden grabs jillarm touching ritzy ...
2      (buckle elon musk makes announcement race turn...
3                                                (, unk)
4      (breaking supreme court rules trump immunity c...
                             ...                        
288    (kari lake incredible news arizona happy read,...
289       (michelle obama video people talking read, en)
290    (shares shocking detail trump attacker known c...
291    (harris makes statement upcoming debate trump ...
292    (news federal judge delivers ruling absentee b...
Name: text, Length: 289, dtype: object
0                                               (, unk)
1                                               (, unk)
2                                               (, unk)
3                                               (, unk)
4     (introducing alpha podcast redefines means ble...
                            ...                     

 81%|████████  | 868/1071 [00:48<00:10, 19.78it/s]

0                                               (, unk)
1                                    (live tonight, en)
2                                               (, unk)
3                                        (surprise, en)
4                                               (, unk)
5                              (going live central, en)
6     (tonightopen eyes educate people global elites...
7                                               (, unk)
8                                               (, unk)
9                                               (, unk)
10                                        (painful, it)
11                                          (irony, en)
12    (chance west coast week till reawaken america ...
13    (billionaire classsecret deals globalist plot ...
14    (loved interview seamus flyover worth listenin...
15                                              (, unk)
16                                              (, unk)
17                                              

 81%|████████▏ | 872/1071 [00:48<00:08, 22.43it/s]


pairs 0      (sickening jean carroll makes shocking confess...
1      (tragic news democratic senator passed away fa...
2      (inappropriate touching republican lauren boeb...
3      (nightmare nikki haley report finds struggling...
4      (massive update trump case special counsel jac...
                             ...                        
311    (damning fani willis text messages revealed ra...
312    (fani willis nathan wade referred georgia alle...
313    (said months nikki haley announces news haley ...
314    (happy hunting megyn kelly reveals texts willi...
315    (time foxjesse watters announces news host rea...
Name: text, Length: 316, dtype: object
--- valid pairs: 4, invalid pairs: 0
pairs 0    (diakuiemo khorunzhii sluzhbi brigadi azov uch...
1    (shanovni druzi kolegi prikhil niki diial nost...
2    (neosiazhna vtrata dlia turi movi ukrayini kol...
3    (ogorichnii samit nato vashingtonti stav vazhl...
Name: text, dtype: object
--- valid pairs: 243, invalid pairs: 0

 82%|████████▏ | 875/1071 [00:49<00:11, 16.79it/s]

--- valid pairs: 362, invalid pairs: 0
pairs 0      (swamp panics jordan makes blockbuster announc...
1      (trumplawyer drops bombshell donald trump goin...
2      (house speaker mike johnson makes power democr...
3      (breaking worst news history horrendous news r...
4      (swamp panics jordan makes blockbuster announc...
                             ...                        
357    (chaos house stuns lawmakers announcement fant...
358    (trump gets blockbuster election news celebrat...
359    (speaker johnson sends left democrats tizzy la...
360    (house speaker kevin mccarthy drops bombshell ...
361    (jack smith days numbered gets career news sca...
Name: text, Length: 362, dtype: object
--- valid pairs: 11, invalid pairs: 0
--- valid pairs: 11, invalid pairs: 0pairs
 0                                               (, unk)
1     (eduard iurchenko ekspert uikdi kandidat filos...
2     (veresnia roku rivnenshchini vnaslidok tragich...
3                                      

 82%|████████▏ | 878/1071 [00:49<00:12, 15.92it/s]

--- valid pairs: 10, invalid pairs: 0
pairs 0                                               (, unk)
1     (druzi chervnia roku vidbudet golosuvannia kan...
2     (druzi vzhe rozpochalos golosuvannia kandidati...
3                                               (, unk)
4     (oleksandr barvins odnim providnikh politikiv ...
11                                              (, unk)
13                                              (, unk)
18    (vsia dopomoga pidtrimka iaka nadaiet ogodni u...
22                                              (, unk)
23    (ukrayins komu instituti konservativnikh dosli...
Name: text, dtype: object
--- valid pairs: 18, invalid pairs: 0
pairs 0     (kvitnia richnitsia dnia progoloshennia manom ...
9                                               (, unk)
10                                              (, unk)
11                                              (, unk)
12    (uikdi proviv ekspertne obgovorennia temu tura...
19    (vidminu liberalizmu sotsializmu konserv

 82%|████████▏ | 880/1071 [00:49<00:19,  9.90it/s]

--- valid pairs: 11, invalid pairs: 0
pairs 0                                               (, unk)
1     (statti analizuiemo vazhlivii superechlivii pr...
2                                               (, unk)
3     (konstiantin kolesnikov ekspert uikdi doktor i...
4     (shcho potribno shchob natsiia stala dorosloiu...
5                                               (, unk)
6     (konservativni chaiuvannia nezalezhnosti ukray...
7                                               (, unk)
8                                               (, unk)
9     (chomu soldati tikaiut polia boiu tsilii kompl...
10    (oleksii samoilov ekspert uikdi kandidat istor...
Name: text, dtype: object
--- valid pairs: 299, invalid pairs: 0
pairs 0      (target responds criticism black friday sale j...
1      (watch wild video shows star sean strickland h...
2      (federal judge tosses trump ballot lawsuit rho...
3      (author discovered eerie incident occurred yea...
4      (students trump founder ryan fourn

 82%|████████▏ | 882/1071 [00:50<00:20,  9.10it/s]

--- valid pairs: 1222, invalid pairs: 0pairs
 0       (lambo lambo lambo letsgooo, sw)
1       (lambo lambo lambo letsgooo, sw)
2       (lambo lambo lambo letsgooo, sw)
3         (revenue share live claim, en)
4       (lambo lambo lambo letsgooo, sw)
                      ...               
1514      (revenue share live claim, en)
1515                     (sell news, en)
1516                     (sell news, en)
1517                     (sell news, en)
1518      (revenue share live claim, en)
Name: text, Length: 1222, dtype: object
--- valid pairs: 27, invalid pairs: 0
pairs 0                                               (, unk)
1                                               (, unk)
2     (partiia zminiuie strategiiu ievropeis komu so...
3     (konservator dzhamil dzhivani peremagaie dodat...
4                                               (, unk)
8     (naspravdi vdiagalisia zamozhni ukrayintsi nai...
18                                              (, unk)
19    (ogodni vidznachaiemo

 83%|████████▎ | 884/1071 [00:50<00:20,  8.92it/s]

--- valid pairs: 90, invalid pairs: 0
pairs 0                                                 (, pl)
1                                                (, unk)
2      (launched ballistic missiles israel today resp...
3                                       (interested, en)
4                                          (welcome, nl)
                             ...                        
105    (band korn know corny story band chose truth m...
106    (reports foreign meddling claim interference o...
107    (dealt tons drug users coke users tell crimina...
108    (watch documentary examines mediacomplicity at...
109    (kalergi plan germany syrian refugee stabbed y...
Name: text, Length: 90, dtype: object
--- valid pairs: 1370, invalid pairs: 0
pairs 0                   (time kill zero, it)
1                   (hold milestone, en)
2       (lambo lambo lambo letsgooo, sw)
3         (revenue share live claim, en)
4       (lambo lambo lambo letsgooo, sw)
                      ...               

 83%|████████▎ | 886/1071 [00:50<00:19,  9.57it/s]

 0     (privit ukrayintsi zapuskaiemo pravii intelekt...
1                                               (, unk)
2     (voni borot namagaiut zrobiti shliakhetnoiu ub...
3     (natsiia zakhidnii traditsiyi gromadians etnic...
4     (spravzhni plodi liuds koyi naturi mistetstva ...
6     (koli narod nash priblizit nareshti vivtaria d...
7     (iakshcho rozdilennia svitu bulo oporoiu ideol...
8                                               (, unk)
9     (movu povagu zakhisnikiv pershe ukrayinizatsii...
10    (dnia rotsi pishov zhittia fransisko franko za...
11    (ogodni gidnosti svobodi zgaduiemo maidan bulo...
16    (khochut vimagaiut svobodi patrioti populisti ...
17    (vistupaiesh frivol aborti znachit sovok dnia ...
18    (kolis ssha vmili kompromisi zlom dnia rotsi r...
19                                              (, unk)
20                                              (, unk)
21    (vibir mizh svobodoiu gnitom znaiu shcho sviti...
22    (khto ukrayintsiv spravdi vmiv tvoriti de

 83%|████████▎ | 889/1071 [00:50<00:16, 10.82it/s]

0      (crap portuguese restaurant avoid energy drink...
1      (waiting crusades practicing archery sword fig...
2      (congratulations successful withdrawal great p...
7      (thanks platform approving successful withdraw...
9      (teacher enoch burke arrested wilsonhospital s...
                             ...                        
131                                              (, unk)
132    (better murder avicii swedish musician produce...
133    (activism exposing child trafficking worked ma...
141    (month completely year started investing sterl...
142    (tension rises record breaking storm surges re...
Name: text, Length: 112, dtype: object--- valid pairs: 128, invalid pairs: 0

pairs 1                       (coming video knew planning, en)
2      (kalergi plan germany know saying germany germ...
3      (blind items happens vegas happens vegas doesn...
7      (little tyrant exposing justin trudeau justin ...
9      (military plans deliver gaza floating pier adm...
    

 83%|████████▎ | 892/1071 [00:51<00:16, 10.60it/s]

--- valid pairs: 360, invalid pairs: 0
pairs 0      (watch teens parents supposedly killed hamas u...
1      (breking yemen officially declared israel repu...
2      (breaking israeli military vehicles destroyed ...
3      (breaking anthony fauci state montana payer mo...
4      (watch antisemitism origin hate speech join co...
                             ...                        
356    (breaking argentina rejects join brics javier ...
357    (floodgates open americans covid vaccine manuf...
358    (charlie munger warren buffettright hand dead ...
359      (letdig flat earth join conservativenation, en)
360              (infamous congressmen booted house, en)
Name: text, Length: 360, dtype: object
--- valid pairs: 36, invalid pairs: 0
pairs 0     (stavlennia rozvitku natsional nogo viis kovo ...
1     (zimovi vikhidni iakraz chas dlia zatishnoyi s...
2     (dnia rotsi vidbuvsia pershii yizd organizatsi...
3                                               (, unk)
4     (dnem vsikh pri

 83%|████████▎ | 894/1071 [00:51<00:19,  9.02it/s]

--- valid pairs: 6, invalid pairs: 0
pairs 0                                              (, unk)
2    (tsitata vsia korolevskaia konnitsa vsia korol...
4                                              (, unk)
5                                              (, unk)
6                                              (, unk)
7                                              (, unk)
Name: text, dtype: object
--- valid pairs: 1532, invalid pairs: 0
pairs

 84%|████████▎ | 895/1071 [00:51<00:23,  7.56it/s]

--- valid pairs: 129, invalid pairs: 0 
1       (shouldn miss project golden opportunity earn ...
2                                         (sell news, en)
3       (best projects believe successful future know ...
4       (trend negative upset calm persevere rainbow s...
5       (months extraordinary patience rewarded extrem...
                              ...                        
2001    (kamala harris spent harris buyer position mar...
2002    (kamala harris spent harris buyer holder marke...
2003    (kamala harris spent harris xceptit holder mar...
2004    (kamala harris spent harris buyer position mar...
2005    (kamala harris whale spent harris buyer wallet...
Name: text, Length: 1532, dtype: object
pairs 0      (recent withdrawal rapid capital market reason...
1      (totaldisclosure hundreds documentaries join t...
3      (great catch famu basketball player legend sta...
4      (vaccine genocide pharma exposed genocide tabo...
5      (recent withdrawal rapid capital market r

 84%|████████▍ | 898/1071 [00:52<00:21,  8.17it/s]

--- valid pairs: 5, invalid pairs: 0
pairs 0                                              (, unk)
1    (obrashchenie pervuiu ochered vsemi pravdami n...
2                                              (, unk)
3                                              (, unk)
5                                              (, unk)
Name: text, dtype: object
--- valid pairs: 50, invalid pairs: 0
pairs --- valid pairs: 1, invalid pairs: 0
pairs 0    (scenes story ching governmenthuge propaganda ...
Name: text, dtype: object
0                                               (, unk)
1                                               (, unk)
2                                               (, unk)
3                                               (, unk)
4                                               (, unk)
5                                               (, unk)
6                                               (, unk)
7     (pered nashoiu moloddiu stoyit bliskuche zavda...
8     (shcho prigaduie nashikh predkiv y

 84%|████████▍ | 900/1071 [00:52<00:17,  9.73it/s]


--- valid pairs: 2338, invalid pairs: 0
pairs 0                                           (friends, da)
1       (biden spent weth bidenbuyer position trungduo...
2                                                 (, unk)
3                                                 (, unk)
4       (experience intersection politics crypto biden...
                              ...                        
2401    (dream come true double savings night happen, en)
2402    (bites dust banned dickson reason automated bl...
2403    (experience intersection politics crypto biden...
2404    (experience intersection politics crypto biden...
2405    (experience intersection politics crypto biden...
Name: text, Length: 2338, dtype: object


 84%|████████▍ | 902/1071 [00:52<00:19,  8.82it/s]

--- valid pairs: 40, invalid pairs: 0
pairs 0                                               (, unk)
1                                               (, unk)
2                                               (, unk)
3                                               (, unk)
5              (povtor proshlogodnego press reliza, sl)
6                            (tekst usama baisaeva, et)
8     (prosto posmotrite reaktsiiu vazhno pomnit pos...
9     (grazhdanskoi voiny chri rossii vidat budet zd...
10    (otryvok odnogo efirov otnositel prognoza pogo...
11                                              (, unk)
14         (voprosy teme pishite etoi publikatsiei, hr)
17                                              (, unk)
18                                  (odna istoriia, hr)
19                                               (, no)
20         (voprosy teme pishite etoi publikatsiei, sl)
21            (khot pozdnovato maem vsekh russiian, sl)
22                     (zdangalla ravno nokhchalla, sl)
23  

 85%|████████▍ | 905/1071 [00:52<00:15, 10.53it/s]

 0      (pervywood creepiest case hollywood shirley te...
1      (ends session bang ruling important cases incl...
2                                                (, unk)
3      (ruling presidential immunity affect legal cha...
4      (totaldisclosure hundreds documentaries join t...
                             ...                        
235    (rich mantrick watch rumble subscribe authorit...
236    (honestly place invest withdraw profits succes...
237                            (check video channel, en)
238    (sacrificial virgins watch rumble subscribe th...
239    (happy successfully receive withdrawal binance...
Name: text, Length: 205, dtype: object
--- valid pairs: 82, invalid pairs: 0
pairs 0                                               (, unk)
1     (turtsii mestnye zhiteli nachali pogromy migra...
8                                               (, unk)
9               (nemnogo dovoennogo groznogo lentu, sl)
10                                              (, unk)
         

 85%|████████▍ | 907/1071 [00:52<00:13, 11.95it/s]

--- valid pairs: 37, invalid pairs: 0
pairs 0                                               (, unk)
2                                               (, unk)
3                                               (, unk)
4                                               (, unk)
6     (nash geroi muchenik iusup temirkhanov umer ru...
7                                               (, unk)
8                                               (, unk)
10    (proshlo dnei svezho vypushchenno vykormlennye...
13                                              (, unk)
15    (ssha napravliaiut bespretsedentnye sily blizh...
16                                              (, unk)
17                                              (, unk)
20                                              (, unk)
21           (prazdnikom vsekh chechenskoi pobedoi, sk)
22    (etot znamenatel dlia chechenskogo naroda avgu...
24                                              (, unk)
25                         (dzhokhar dudaev fevral, sl)
26  

 85%|████████▍ | 909/1071 [00:52<00:14, 11.36it/s]

--- valid pairs: 2575, invalid pairs: 0
pairs 0       (kaunse nashe behen tune bill gates asked said...
1       (kahan uthayi pics whatsapp university crop we...
2       (congress khilaaf bolo proof dikhaao cell jata...
3       (aage modi baat uski iska jawab diya lekin shu...
4                                                 (, unk)
                              ...                        
2729                                               (, cy)
2730                                    (ireda sakta, et)
2731                                 (thanks brother, en)
2732                                (bhai ireda bare, id)
2733                                    (baata sakta, id)
Name: text, Length: 2575, dtype: object
--- valid pairs: 59, invalid pairs: 0
pairs 0                                               (, unk)
2                                        (fleshmob, sq)
3     (chechenskaia respublika ichkeriia russkie okk...
4                                               (, unk)
5     (v

 85%|████████▌ | 911/1071 [00:53<00:12, 12.60it/s]

--- valid pairs: 64, invalid pairs: 0
pairs 0                                               (, unk)
1                                               (, unk)
2                                               (, unk)
3     (presleduiut kefiry khotiat zabrat vashikh det...
4     (zamestitel zakaeva abubakar muradov utverzhda...
                            ...                        
72                                   (radio marsho, so)
73    (podpisyvaites mnogo interesnogo teme nokhchii...
74                                              (, unk)
75                        (sila semeinogo edinstva, sl)
76        (answer maria zakharovaquestion russians, en)
Name: text, Length: 64, dtype: object
--- valid pairs: 376, invalid pairs: 0
pairs 0                             (welcome jamie oliver, nl)
1      (trumpbuy spent trumpbuyer holder market dext ...
2      (trumpbuy spent trumpbuyer position market dex...
3      (trumpbuy spent trumpbuyer holder market dext ...
4      (trumpbuy spent trumpb

 85%|████████▌ | 913/1071 [00:54<00:37,  4.26it/s]

--- valid pairs: 1924, invalid pairs: 0
pairs 0            (received trustwallet took airdrop time, en)
5       (nice meetings check chats kind passive income...
6       (seen making giveaways appreciate entertainmen...
7       (bull markets born pessimism grow skepticism m...
8                        (lambo lambo lambo letsgooo, sw)
                              ...                        
2302                                   (claimed long, fr)
2303                                              (, unk)
2304                             (thanks want assure, en)
2305                                          (chart, en)
2306    (bull markets born pessimism grow skepticism m...
Name: text, Length: 1924, dtype: object
--- valid pairs: 145, invalid pairs: 0
pairs 0                                                (, unk)
1      (konstitutsiia chechenskoi respubliki ichkerii...
2                                       (ustav chdp, sk)
8      (prikhodite svobodnymi druz gosti chata pros s...
9  

 85%|████████▌ | 915/1071 [00:56<01:26,  1.80it/s]

--- valid pairs: 2301, invalid pairs: 0
pairs 0                                       (outstanding, af)
1                                    (trooper strong, en)
2                                                 (, unk)
3                                     (panic detroit, ro)
4                                                  (, tr)
                              ...                        
2891                                       (disagree, af)
2892                                              (, unk)
2893    (body power heal turn negative positive believ...
2894                                              (, unk)
2897                                              (, unk)
Name: text, Length: 2301, dtype: object


 86%|████████▌ | 916/1071 [00:58<01:53,  1.36it/s]

--- valid pairs: 3, invalid pairs: 0
pairs 0            (kapets podpischiki uzhe liubliuiuiu, lt)
1    (khotel predupredit chto menia skoro stupitel ...
2    (pokazyvaiu skechik sozdaiu illiuziiu raboty c...
Name: text, dtype: object


 86%|████████▌ | 917/1071 [00:58<01:33,  1.64it/s]

--- valid pairs: 1, invalid pairs: 0
pairs 0    (vsem privet segodnia administratora rozhdenii...
Name: text, dtype: object
--- valid pairs: 1, invalid pairs: 0
pairs 0    (prazdnuem segodnia vzial ruki stilus, sl)
Name: text, dtype: object


 86%|████████▌ | 919/1071 [00:58<01:03,  2.40it/s]

--- valid pairs: 8, invalid pairs: 0
pairs 0                            (postavit laik korol, sk)
1                                  (skechik magmy, sk)
2    (dorogie damy kotorye slediat moim kanalom poz...
4                                              (, unk)
5                                              (, unk)
6                                              (, unk)
8                     (tozhe reshil sdelat aeakha, en)
9                            (podpischiki liubliu, lt)
Name: text, dtype: object


 86%|████████▌ | 920/1071 [00:59<00:58,  2.59it/s]

--- valid pairs: 475, invalid pairs: 0
pairs 0      (therapy weak parenting broken children abigai...
1      (dkhln jyzh bhtryn bzykhn lndn mrsm football l...
5                                        (hfth brtr, en)
7                                                (, unk)
8      (khnl mytwnyd chnl tymy bzykhny khwstyd khnyd,...
                             ...                        
560    (cern large hadron collider large hadron colli...
561    (totaldisclosure hundreds documentaries join t...
563                                              (, unk)
564    (hero july year bridger walker saved little si...
565    (knows jesus saves little lives ranch whitman ...
Name: text, Length: 475, dtype: object
--- valid pairs: 4, invalid pairs: 0
pairs 

 86%|████████▌ | 921/1071 [00:59<00:50,  2.97it/s]

0                          (admin pravda rabotaet, sl)
1    (kartinke predstavleny geldva napoleon odin sk...
2    (segodnia bakhnulo luchshii podarok obeshchaiu...
9                 (davaite podpisyvat stavit like, hr)
Name: text, dtype: object
--- valid pairs: 2154, invalid pairs: 0
--- valid pairs: 2, invalid pairs: 0
pairs pairs 0                  (novye podpischiki oldy privet, sl)
1    (kollab pryvet podpischiki izvinite dolgoe ots...
Name: text, dtype: object0                                                 (, unk)
1                                                 (, unk)
2                        (wishing happy easter today, en)
3                                                 (, unk)
8       (vegetable seed oils unhealthy ingredient kids...
                              ...                        
2698                                              (, unk)
2699    (motherwomb twin babies asked believe life del...
2701    (think potential voters forget biden funding d...
2702    

 86%|████████▌ | 923/1071 [00:59<00:34,  4.35it/s]


--- valid pairs: 5, invalid pairs: 0
pairs 0    (novye podpischiki znaiu menia nashli uzhe liu...
1                                              (, unk)
2                                              (, unk)
3    (komfortiki klassnye daniki liubimye koroliii ...
5    (skechi poslednie tapaite ekranchiku zabyvaite...
Name: text, dtype: object
--- valid pairs: 3, invalid pairs: 0
pairs 0    (poshli otpiski poetomu podaiu priznaki zhizni...
1    (osnovnuiu rabotu zakonchil budet glavnoi troi...
2                  (predlagaiu vzglianut poblizhe, sl)
Name: text, dtype: object


 86%|████████▋ | 926/1071 [00:59<00:23,  6.08it/s]

--- valid pairs: 13, invalid pairs: 0
pairs 0     (samopiar saiklo skreym gravstain usloviia rep...
1                        (novye podpischiki privet, sl)
2                                               (, unk)
5     (vsem privet dorogie podpischiki pokazyvaiu no...
6                                               (, unk)
7                             (postav laiki skoree, sl)
8       (skechi kotorye nakopilis poslednie nedeli, sl)
12    (speshu soobshchit chto polnotsenki obiazatel ...
13                                              (, unk)
14    (uzhe obozhaiu vsei dushoi proshu obozhaite to...
16    (spasibo reaktsii liubimye podpischiki kazhdog...
17                                              (, unk)
18    (dnem rozhdeniia danik poslednikh risoval klia...
Name: text, dtype: object
--- valid pairs: 21, invalid pairs: 0
pairs 0                                               (, unk)
3                                     (zavtra drop, sl)
4     (podprygivaiu esli postavite million lai

 87%|████████▋ | 928/1071 [01:00<00:29,  4.86it/s]

--- valid pairs: 46, invalid pairs: 0
pairs 0     (trump campaign calling kamalasupport taxpayer...
1     (look forward senator vance speaking truthfull...
2                                         (amazing, en)
3                                               (, unk)
4     (walz refuses supports limits abortion demand ...
5                (vivek watch planes head michigan, en)
6     (night remember kamala harris power years open...
7     (mamaw forgo buying medicine feed family ameri...
8     (lawyer president donald trump managing partne...
9     (vance tours hurricane damage community long l...
10                                              (, unk)
14           (proud stand butler ready white house, en)
15         (scare hell cheney lining kamala harris, en)
16    (heard shots blood feared worst knew okay pres...
17    (prayers allies israel morning remember americ...
18    (view asked kamala harris thing differently bi...
19    (president trump smart president knows deal go...
20  

 87%|████████▋ | 930/1071 [01:00<00:22,  6.27it/s]

 0      (predictive programming lady gaga portrayed el...
1                                                (, unk)
2                                                (, unk)
3      (vaccine tyler christopherdeath american actor...
4      (israeli plot force million palestinians gaza ...
                             ...                        
623    (rykh Hmyt andrh drwzh khwd khwhd hmchnn ntkhb...
624                                              (, unk)
625    (ireland limerick city west councillor abul ka...
626    (louder love tragic year music fans worldwide ...
627    (pneumonia outbreak warren county seeing alarm...
Name: text, Length: 573, dtype: object
--- valid pairs: 216, invalid pairs: 0
pairs 0                                                (, unk)
1      (samaia liga arabskikh gosudarstv mantru povto...
2      (lozung riuzgie zdaiutsia riuzgie idut dokonts...
3                                                (, unk)
4      (kommentarii odnomu video moem kanale iutube c...
   

 87%|████████▋ | 932/1071 [01:00<00:20,  6.90it/s]

--- valid pairs: 44, invalid pairs: 0
pairs 0     (overwhelmed gratitude honor alongside preside...
1     (greatest threat american people overseas russ...
2     (hello official telegram channel post activiti...
3     (donald trumpvp nominee vance effectively coun...
4     (fight fight fight guess maga unity looks like...
5     (hanging backstage getting ready tonightfestiv...
6     (biden want iran putin instead pennsylvania co...
7     (elon great example american entrepreneur buil...
8     (trumprunning mate vance people social securit...
9     (worth watching marco incredibly talented unde...
10                     (letwin thing country great, en)
11                             (come america great, en)
13    (biden doesn cognitive function election certa...
14    (eric trump mentioned guardian angels saved ri...
15    (calling biden stop running calling resign pre...
16                                               (, en)
17                                              (, unk)
18  

 87%|████████▋ | 934/1071 [01:00<00:17,  7.73it/s]

--- valid pairs: 64, invalid pairs: 0
pairs 0     (president trump walked right nabj conference ...
1     (vance speaks southern border cochise county a...
2     (president trump answers tough questions time ...
3     (days kamala harris presumptive nominee democr...
4     (kamala harris want face disloyalty look damn ...
                            ...                        
59    (incredible interview critical points kamala h...
60    (massive story kamala harrisadministration enc...
61    (going defend right free speech including righ...
62    (breaking gotten ahold kamala harris interview...
63    (kamala harris changed mind american energy po...
Name: text, Length: 64, dtype: object
--- valid pairs: 1149, invalid pairs: 0
pairs 0       (kamala harris spent harris buyer holder marke...
1       (kamala harris spent harris buyer position mar...
2       (kamala harris harris dpshnz qytxn position ma...
3       (kamala harris harris skcmnatxn holder market ...
4                       

 87%|████████▋ | 936/1071 [01:01<00:38,  3.54it/s]

--- valid pairs: 158, invalid pairs: 0
pairs 1                                         (revshare, sq)
2      (positive thinker recent market news making ha...
3        (moon chart healthy bearish market bullish, en)
4                       (lambo lambo lambo letsgooo, sw)
5                       (lambo lambo lambo letsgooo, sw)
                             ...                        
191                                              (, unk)
192                             (thanks want assure, en)
193                                          (chart, en)
194    (share feedback possibilities revenue share st...
195    (bull markets born pessimism grow skepticism m...
Name: text, Length: 158, dtype: object


 87%|████████▋ | 937/1071 [01:02<00:34,  3.92it/s]

--- valid pairs: 666, invalid pairs: 0
pairs 0      (evidence foreign meddling election board illi...
1      (signs demonstrate truly bizarre society wizar...
2               (nwnyz wlyn bzykhn brtr drwzh myznh, cy)
3                    (plystry bSwrt qrDy grnd pywst, en)
4                                                (, unk)
                             ...                        
729                                              (, unk)
730                                              (, unk)
731    (pedophile diddy poor year justin bieber start...
732                                               (, en)
733    (agrees hear arguments trumpimmunity gets part...
Name: text, Length: 666, dtype: object


 88%|████████▊ | 938/1071 [01:02<00:31,  4.24it/s]

--- valid pairs: 2406, invalid pairs: 0
pairs 0                                                 (, unk)
1                                                 (, unk)
2                            (time podcast wednesday, en)
3                                        (post kelli, en)
4                                                 (, unk)
                              ...                        
3016                       (mins long hopefully fake, en)
3017                                              (, unk)
3018    (song truly amazingly beautiful enjoy listenin...
3020                                              (, unk)
3021                                              (, unk)
Name: text, Length: 2406, dtype: object


 88%|████████▊ | 939/1071 [01:02<00:44,  2.95it/s]

--- valid pairs: 5, invalid pairs: 0
pairs 0    (vance token community airdrop alert introduci...
1    (vance token community airdrop alert introduci...
2                            (welcome adam sandra, cy)
3                          (welcome debie gregory, af)
4    (vance token community airdrop alert introduci...
Name: text, dtype: object


 88%|████████▊ | 940/1071 [01:03<00:37,  3.53it/s]

--- valid pairs: 1576, invalid pairs: 0
pairs 0       (breaking senator dick durbin blocked senator ...
1                       (trump epic biden impression, en)
2                   (follow numbers telegram channel, en)
3                                              (spam, lv)
4                                                  (, en)
                              ...                        
2027                                           (hire, sq)
2029                                              (, unk)
2031    (period look starting remember things better c...
2032                                               (, en)
2033                   (happened weeks said happened, en)
Name: text, Length: 1576, dtype: object


 88%|████████▊ | 941/1071 [01:04<00:58,  2.23it/s]

--- valid pairs: 3458, invalid pairs: 0
pairs 0                              (Tian Shang Shou Jian, id)
1                                                 (, unk)
2                              (Tian Shang Shou Jian, id)
3                                                 (, unk)
4                              (Tian Shang Shou Jian, id)
                              ...                        
3454    (Bian Liao Tian Shuang Xiao Jing Shuang Xiang ...
3455    (breaking news donald trump unveils trump libe...
3456    (Bian Duan Duan Shuang Xiang Ping Yuan Feng Bi...
3457    (Bian Liao Tian Xiao Jing Shuang Xiang Xiao Pi...
3458                 (Hong Yuan Chang changsongshiwo, vi)
Name: text, Length: 3458, dtype: object
--- valid pairs: 4, invalid pairs: 0
pairs 0                 (entourage reboot going awesome, en)
1    (long kamala harris known biden mentally unfit...
2    (president trump taking questions reporters ka...
3    (convince kamala harris illegal aliens actuall...
Name: text, d

 88%|████████▊ | 943/1071 [01:04<00:36,  3.47it/s]

--- valid pairs: 8, invalid pairs: 0
pairs 0           (courageous united defiant leadership, en)
1                                              (, unk)
2    (overwhelmed gratitude honor alongside preside...
3    (hanging backstage getting ready tonightfestiv...
4    (worth watching marco incredibly talented unde...
5                             (come america great, en)
7    (biden doesn cognitive function election certa...
8    (biden worst president lifetime kamala harris ...
Name: text, dtype: object
--- valid pairs: 1527, invalid pairs: 0
pairs 0                                                  (, en)
1                                                  (, en)
3       (hope pray close year start journey chapter st...
4                                          (jealousy, cs)
5       (vale janet ossebaard honour memory sharing le...
                              ...                        
1929                                              (, unk)
1930    (hunter biden facing legal acti

 88%|████████▊ | 945/1071 [01:04<00:28,  4.38it/s]

--- valid pairs: 330, invalid pairs: 0
pairs 0                             (load your ready bull, en)
1                       (lambo lambo lambo letsgooo, sw)
2                             (load your ready bull, en)
3                       (lambo lambo lambo letsgooo, sw)
4                             (load your ready bull, en)
                             ...                        
394    (reward claiming live read pinned message clai...
395                                              (, unk)
396    (esta plataforma acabou lancada taxa implosao ...
397                 (wouldn able receive help admin, en)
399                                       (revshare, sq)
Name: text, Length: 330, dtype: object


 88%|████████▊ | 946/1071 [01:04<00:25,  4.84it/s]

--- valid pairs: 2423, invalid pairs: 0
pairs 1                                                  (, pt)
2                                           (dilemma, en)
3       (trump raised whopping million hours verdict c...
5       (listen carefully speak refer environment toxi...
6                                                  (, da)
                              ...                        
3145                                              (, unk)
3146                               (evil demons stop, en)
3147                           (national bank outage, en)
3148                                              (, unk)
3149                         (china floods chongqing, en)
Name: text, Length: 2423, dtype: object


 88%|████████▊ | 947/1071 [01:04<00:27,  4.57it/s]

--- valid pairs: 2849, invalid pairs: 0
pairs 0                                           (morning, en)
1                                                  (, en)
2               (kick trust plan country goes toilet, en)
3                                        (spam links, en)
4                                    (problem solved, en)
                              ...                        
2844                                          (ditto, it)
2845                                      (post pics, en)
2846                                     (room sucks, en)
2847                                (room came china, en)
2848    (putin joke putin dies goes hell years behaves...
Name: text, Length: 2849, dtype: object


 89%|████████▊ | 948/1071 [01:05<00:27,  4.52it/s]

--- valid pairs: 15, invalid pairs: 0
pairs 0                       (cryptozombi welcome vance, en)
1                              (ivan welcome vance, en)
2                     (limbless baby welcome vance, en)
3                              (liam welcome vance, en)
4                                               (, unk)
5                             (token welcome vance, en)
6                              (rich welcome vance, en)
7                                       (adminlist, et)
8             (admins vance owner note date values, en)
9     (cincinnati zoobaby rhino spent buyer position...
10    (cincinnati zoobaby rhino spent buyer holder m...
11                                              (, unk)
12                            (ariwa welcome vance, en)
13                    (krypto junkie welcome vance, en)
14                         (kramhole welcome vance, en)
Name: text, dtype: object


 89%|████████▊ | 949/1071 [01:05<00:31,  3.83it/s]

--- valid pairs: 2642, invalid pairs: 0
pairs--- valid pairs: 58, invalid pairs: 0
pairs 1       (watch trump courtroom sketch artist light smi...
5       (massive billion foreign bill passed speaker j...
6                                        (incredible, en)
7       (live london thoughts tactical voting thursday...
8                                                 (, unk)
                              ...                        
3336    (alex soros urges democrats trump convicted fe...
3337    (bank america lost case scotus decision confir...
3338    (satanist gives hand account significance hall...
3339    (thursday senator johnson held sovereignty sum...
3342                                           (maga, so)
Name: text, Length: 2642, dtype: object 
0                           (sanders welcome vance, en)
1                     (roaring kitty welcome vance, en)
2                        (rootriches welcome vance, en)
3                          (patricia welcome vance, en)
4       

 89%|████████▊ | 950/1071 [01:05<00:30,  4.02it/s]


--- valid pairs: 406, invalid pairs: 0
pairs 0                                                (, unk)
1      (takoi pozvolit sozdat konkurentnosposobnuiu e...
5                                                (, unk)
6      (nastoiashchii drug veren vezde schast bede tr...
7                (kakie protivorechiia antiterrorom, sk)
                             ...                        
539    (russkie zaskulili posle osoznaniia togo chto ...
540    (rossii nachale shirokomasshtabnoi voiny ukrai...
541                                              (, unk)
542                 (matil leon chut pozzhe zaidiot, it)
543                                     (dzurzuketi, lv)
Name: text, Length: 406, dtype: object


 89%|████████▉ | 952/1071 [01:06<00:34,  3.45it/s]

--- valid pairs: 3045, invalid pairs: 0
pairs 1                                                 (, unk)
2                                     (truth painful, en)
3                                                 (, unk)
4                                                 (, unk)
5                                                 (, unk)
                              ...                        
4177                                              (, unk)
4194    (madison marsh male tranny fraud rough male fe...
4195                                              (, unk)
4196    (charlie ward admits procuring underage girls ...
4197    (democrat jerry nadler says needs illegal immi...
Name: text, Length: 3045, dtype: object
--- valid pairs: 2803, invalid pairs: 0
pairs 0                                          (category, en)
1                         (taken cocoa beach florida, en)
7       (york midget dressed chucky tries woman york t...
8       (japansenior oncologist prof fukushima genetic..

 89%|████████▉ | 953/1071 [01:06<00:41,  2.86it/s]


--- valid pairs: 811, invalid pairs: 0
pairs 0      (hold good newly released documents supported ...
1      (intel massive heads huge preparation underway...
2                                       (bkhyr bzrg, da)
3               (dymwn wrtwn mZlwmh jdwl khly khwry, cy)
4                                                (, unk)
                             ...                        
905                                     (jdwl prymr, no)
907                                              (, unk)
908    (khylyn mbph jnwyh qrrddy mdryd nkhnd mdrydyh ...
909    (wlyn chnl lndn khwsh mdyd pwshsh tmmy khbr ln...
910    (happy year want year peace warmth love single...
Name: text, Length: 811, dtype: object


 89%|████████▉ | 955/1071 [01:07<00:31,  3.70it/s]

--- valid pairs: 132, invalid pairs: 0
pairs 0      (announcement time change rules airdrop vance ...
1                                                (, unk)
2      (announcement time change rules airdrop vance ...
3      (announcement time change rules airdrop vance ...
4                                                (, unk)
                             ...                        
127                                              (, unk)
128                                              (, unk)
129    (vance spent vance buyer holder price market d...
130    (vance spent vance buyer holder price market d...
131    (announcement time change rules airdrop vance ...
Name: text, Length: 132, dtype: object


 89%|████████▉ | 956/1071 [01:07<00:27,  4.11it/s]

--- valid pairs: 1682, invalid pairs: 0
pairs 0                                                 (, unk)
2       (biden family occupying white house real hallo...
3       (things getting juicy snow york countries turn...
4                                                 (, unk)
7       (breathing energy registration confirmation en...
                              ...                        
2054                                     (time light, en)
2055    (pizzagate associate james alefantis owner com...
2056                               (good observation, en)
2057           (video disappeared couple months days, en)
2058                                               (, cy)
Name: text, Length: 1682, dtype: object


 89%|████████▉ | 957/1071 [01:07<00:24,  4.71it/s]

--- valid pairs: 894, invalid pairs: 0
pairs --- valid pairs: 152, invalid pairs: 0
0       (yearlive yorktimes square year celebrations w...
1                            (bkhyr myldy mbrkh bzrg, no)
2                                              (mrwz, en)
3                                        (happy year, en)
4                                                 (, unk)
                              ...                        
1028                                          (ttnhm, cy)
1029                           (ggllll lywrpwl brdly, cy)
1030                             (nymh chlsy lywrpwl, cy)
1031    (message couldn displayed device copyright inf...
1032    (hermon hebrew translates forbidden known watc...
Name: text, Length: 894, dtype: object
pairs 

 89%|████████▉ | 958/1071 [01:07<00:26,  4.19it/s]

0      (vance whale spent vance buyer wallet value ho...
1      (vance spent vance buyer holder price market d...
2      (vance spent vance buyer holder price market d...
3      (vance spent vance buyer holder price market d...
4                                                (, unk)
                             ...                        
147    (scarlett welcome vance trumpvp elon endorsed,...
148    (announcement time change rules airdrop vance ...
149    (announcement time change rules airdrop vance ...
150    (announcement time change rules airdrop vance ...
151    (announcement time change rules airdrop vance ...
Name: text, Length: 152, dtype: object
--- valid pairs: 4, invalid pairs: 0
pairs 0    (hello honorable boss excellency benefactor de...
2                                     (hello boss, cy)
3                                              (, unk)
4                                          (hello, fi)
Name: text, dtype: object
--- valid pairs: 217, invalid pairs: 0

pairs

 90%|████████▉ | 960/1071 [01:07<00:20,  5.45it/s]

 --- valid pairs: 151, invalid pairs: 0pairs 0                                        (, pl)
1                            (paying promo, en)
2                        (organic memecoin, en)
3      (hyfriend project successful chance, en)
4                                        (, ro)
                         ...                   
213                    (yeti welcome vance, en)
214                          (nice knowing, pl)
215                  (julito welcome vance, en)
216                                     (, unk)
217                          (nice knowing, pl)
Name: text, Length: 217, dtype: object
--- valid pairs: 1, invalid pairs: 0
0      (vance whale spent vance buyer wallet value ho...
1      (announcement time change rules airdrop vance ...
2                                                (, unk)
3      (vance spent vance labs listing position price...
4                                                (, unk)
                             ...                        
179    (v

 90%|█████████ | 967/1071 [01:08<00:07, 13.80it/s]

pairs 0    (, unk)
Name: text, dtype: object--- valid pairs: 2, invalid pairs: 0

pairs--- valid pairs: 4, invalid pairs: 0
pairs 0    (civil antifa start gone radio silent posts on...
1       (russell brand endorses funniest possible, en)
2                                              (, unk)
4    (announces withdrawing presidential race cites...
Name: text, dtype: object 
0    (dedicated correcting post users sharing stati...
1    (welcome communities haven advertised congratu...
Name: text, dtype: object


 91%|█████████ | 972/1071 [01:08<00:05, 19.60it/s]

--- valid pairs: 9, invalid pairs: 0
pairs 1          (need usdt genuine guys message friends, da)
3              (flash usdt trcavailable price want, en)
5     (mobile deposit checks good balanced checks wo...
7              (flash usdt trcavailable price want, en)
25                (flash usdt available price want, en)
26    (company need usdt daily basis price gaming fu...
30    (mobile deposit checks good balanced checks wo...
32    (mobile deposit checks good balanced checks wo...
35    (BOOKING LIgHT TICKETs MOVIE TICKETs REnTAL HO...
Name: text, dtype: object
--- valid pairs: 10, invalid pairs: 0
pairs--- valid pairs: 14, invalid pairs: 0 
pairs0     (mobile deposit checks good balanced checks wo...
1     (joeljohnsondo quizzes earn dollars easily ear...
2     (mobile deposit checks good balanced checks wo...
3     (mobile deposit checks good balanced checks wo...
4     (mobile deposit checks good balanced checks wo...
5     (mobile deposit checks good balanced checks wo...
6   

 91%|█████████ | 975/1071 [01:08<00:05, 16.47it/s]

--- valid pairs: 7, invalid pairs: 0
pairs --- valid pairs: 6, invalid pairs: 0
pairs0                                              (, unk)
2    (free free earning dollar watching videos join...
3    (order product want amazon half price shipped ...
4                               (join thank later, en)
5    (free free earning dollar watching videos join...
6                                              (, unk)
7    (escrow service provider community guarantee p...
Name: text, dtype: object 0                                       (usdt euro, da)
1     (hello welcome join dorado tanzania gold limit...
7     (hello free free free free free free welcome j...
9     (hello free free free free free free welcome j...
15    (hello good project start daily message info w...
18                                               (, lv)
Name: text, dtype: object
--- valid pairs: 32, invalid pairs: 0

pairs 0     (rate usdt minimum usdt funds accept clean mix...
1     (rate usdt minimum usdt funds accep

 91%|█████████▏| 978/1071 [01:08<00:06, 13.48it/s]

--- valid pairs: 16, invalid pairs: 0
pairs 3     (mobile deposit checks good balanced checks wo...
9     (mobile deposit checks good balanced checks wo...
10                                              (, unk)
12                                              (, unk)
13                                              (, unk)
14                        (flash usdt trcavailable, en)
15        (fully active apple auto method required, en)
16    (mobile deposit checks good balanced checks wo...
18    (mobile deposit checks good balanced checks wo...
19                        (flash usdt trcavailable, en)
21    (mobile deposit checks good balanced checks wo...
22    (mobile deposit checks good balanced checks wo...
24    (mobile deposit checks good balanced checks wo...
25    (details flash usdt confirmation transaction u...
27    (mobile deposit checks good balanced checks wo...
29             (flash usdt trcavailable price want, en)
Name: text, dtype: object
--- valid pairs: 4, invalid pairs:

 92%|█████████▏| 980/1071 [01:08<00:06, 13.19it/s]

--- valid pairs: 1, invalid pairs: 0
 0                  (project executed thanks airdrop, en)
1      (recieved like magic gotten years interest cry...
4                               (benefited giveaway, en)
5      (feel better receiving bonus wallet thanks giv...
6      (earn bonus stress wallet increased minutes pa...
                             ...                        
200    (perfect reward xpayment method fast amazing y...
201    (balance increased doubtful initially thanks t...
202                                (received thanks, en)
203                  (guys incredible finally bonus, en)
204    (event recovered happy firm opportunity comes,...
Name: text, Length: 201, dtype: object
pairs --- valid pairs: 52, invalid pairs: 03    (usdt sale bank transfer american company acco...
Name: text, dtype: object

--- valid pairs: 12, invalid pairs: 0
pairs pairs0     (free task site Virtu sign link sign bonus aut...
1     (looking platform earn couple hour capital sta...
2     (ear

 92%|█████████▏| 983/1071 [01:09<00:05, 14.98it/s]

--- valid pairs: 20, invalid pairs: 0
pairs 0                                               (, unk)
1                                               (, unk)
2     (apex finance earns online business investment...
3     (financial freedom world apex financeearns ape...
4     (financial freedom world apex financeearns ape...
5     (financial freedom world apex financeearns ape...
6     (financial freedom world apex financeearns ape...
7     (mobile deposit checks good balanced checks wo...
8     (mobile deposit checks good balanced checks wo...
9     (mobile deposit checks good balanced checks wo...
10    (financial freedom world apex financeearns ape...
11                                 (need buyr rate, en)
12    (mobile deposit checks good balanced checks wo...
13                                              (, unk)
14                                   (trcavailable, fr)
15                                              (, unk)
16                                   (selling usdt, no)
18  

 92%|█████████▏| 985/1071 [01:09<00:05, 15.05it/s]

--- valid pairs: 2965, invalid pairs: 0
pairs 0       (right wing obama liar actual bowman corrupt s...
1       (breaking virginia found guilty stealing justi...
2                                             (waves, en)
3       (breaking illegal aliens dropped skis coast oc...
4                          (trump fire tonight rally, en)
                              ...                        
3672    (police officers arrived burke home arrest tea...
3673                                              (, unk)
3674                                              (, unk)
3675                                              (, unk)
3676                                              (, unk)
Name: text, Length: 2965, dtype: object
--- valid pairs: 6, invalid pairs: 0
pairs 0                              (fosfomycin sachet, en)
1          (manufacturing dextro ambroxol menthol, tl)
2                                   (cough syrups, en)
3    (sodium alginate sodium bicarbonate calcium ca...
4          (c

 92%|█████████▏| 987/1071 [01:09<00:08,  9.72it/s]


pairs 0                      (manufacturing tummy rollon, en)
1                                               (, unk)
2             (manufacturing norethisterone tablet, en)
3                                               (, unk)
4     (manufacturing lornoxicam thiocolchicoside tab...
5       (manufacturing ursodeoxycholic acid tablet, en)
6     (manufacturing acetylcysteine arginine calcium...
7              (manufacturing racecadotril capsule, ro)
8     (manufacturing saccharomyces boulardii billion...
9                 (manufacturing cetirizine tablet, ro)
10    (manufacturing game changer derma range cream ...
11               (manufacturing misoprostol tablet, en)
12                                              (, unk)
13            (manufacturing dydrogesterone tablet, da)
14                                              (, unk)
15                  (manufacturing carnosine syrup, ro)
16              (manufacturing levofloxacin tablet, en)
17                      (manufacturing de

 92%|█████████▏| 990/1071 [01:09<00:07, 10.63it/s]

0                                              (, unk)
1    (manufacturing esomeprazole domperidone tablet...
2                                              (, unk)
Name: text, dtype: object
--- valid pairs: 36, invalid pairs: 0
pairs 0       (manufacturing ursodeoxycholic acid tablet, en)
1           (manufacturing linezolid tablets party, en)
2     (manufacturing diclofenac methyl salicylate vi...
3       (manufacturing zinc oxide cream nappy heal, en)
4     (diclofenac linseed methyl salicylate menthol,...
6     (manufacturing diphenhydramine hydrochloride a...
7               (manufacturing famciclovir tablets, en)
8                 (manufacturing linezolid tablets, en)
9             (manufacturing sucralfate oxetacaine, ro)
10               (manufacturing deflazacort tablet, ro)
11                                      (carnosine, it)
12    (manufacturing etroricoxib thiocolchicoside ta...
13              (manufacturing luliconazole lotion, ro)
14        (manufacturing cholecalcife

 93%|█████████▎| 992/1071 [01:10<00:06, 11.57it/s]

--- valid pairs: 2, invalid pairs: 0
pairs 0    (need large honest reliable company game funds...
1                                               (, sk)
Name: text, dtype: object
--- valid pairs: 6, invalid pairs: 0
pairs 0    (manufacturing diclofenac linseed thiocolchico...
1    (manufacturing etodolac thiocolchicoside table...
2             (manufacturing levosulpiride tablet, ro)
3             (manufacturing trypsin chymotrypsin, en)
4                                              (, unk)
6       (manufacturing desonide ointment ointment, fr)
Name: text, dtype: object


 93%|█████████▎| 994/1071 [01:10<00:06, 11.36it/s]

--- valid pairs: 0, invalid pairs: 0
pairs Series([], Name: text, dtype: object)
--- valid pairs: 1, invalid pairs: 0
pairs 0    (watch, en)
Name: text, dtype: object
--- valid pairs: 1, invalid pairs: 0
pairs 0    (welcome, cy)
Name: text, dtype: object


 93%|█████████▎| 997/1071 [01:10<00:05, 13.52it/s]

--- valid pairs: 3259, invalid pairs: 0
pairs 0       (adrenochrome psychedelic drug extracted brain...
1       (elon musk corporations pulling advertise goin...
2       (breaking shane macgowan dies aged bullshit dy...
3                                                 (, unk)
4                                                 (, unk)
                              ...                        
4658    (raganrok revolte france century castle caught...
4660                                               (, so)
4661                   (guilty fuck able walk street, en)
4662                                              (, unk)
4663    (best explanation weird traffic europe night l...
Name: text, Length: 3259, dtype: object
--- valid pairs: 43, invalid pairs: 0
pairs --- valid pairs: 3568, invalid pairs: 0--- valid pairs: 4, invalid pairs: 0

pairspairs 0                                               (, unk)
1                                               (, unk)
2     (sachin mishra account ifs

 93%|█████████▎| 999/1071 [01:10<00:06, 11.11it/s]

--- valid pairs: 3, invalid pairs: 0
pairs 0    (group protected click link start human verifi...
1                    (portal acts jill spare link, en)
2                                               (, so)
Name: text, dtype: object
--- valid pairs: 2737, invalid pairs: 0
pairs 0                              (scary stuff thankyou, en)
1                                                 (, unk)
2       (crazy kids group composed song called tomorro...
3                               (totally interesting, en)
4       (need meds pharma need better diet diet absolu...
                              ...                        
3815    (happens boil lemon know lemon peel nutritious...
3816    (science grounding body energy earth energy we...
3817    (reason told tesla airships involved deliverin...
3818    (fryers convenience design optimal health stri...
3819    (deodorant benzene cancer causing chemical fou...
Name: text, Length: 2737, dtype: object


 94%|█████████▎| 1003/1071 [01:10<00:04, 14.16it/s]

--- valid pairs: 40, invalid pairs: 0
pairs --- valid pairs: 40, invalid pairs: 0
pairs 0          (manufacturing oxymetazolin nasal spray, tl)
1     (manufacturing collagen peptide type sodium hy...
2     (broaden horizons skin care cream production, en)
4     (manufacturing tannic acid zinc chloride cetri...
5                                               (, unk)
6                                               (, unk)
7     (manufacturing antioxidant lycopene vitamins t...
8     (manufacturing sitagliptin metformin tablets, en)
9             (manufacturing iron folic acid syrup, en)
10                     (manufacturing testosterone, en)
11     (manufacturing sitagliptin metformin tablet, en)
12    (manufacturing ferrous bisglycinate folic acid...
13    (manufacturing ambroxol terubtaline guaiphensi...
14                                              (, unk)
15             (manufacturing anti cold range kids, en)
16            (manufacturing methylcobalmin tablet, en)
18    (manufactu

 94%|█████████▍| 1005/1071 [01:10<00:05, 12.96it/s]

--- valid pairs: 2201, invalid pairs: 0
pairs 0       (alert getting reports users admins group cont...
1                              (operating satellites, en)
2            (agree permit holders cars aren allowed, en)
3                                                 (, unk)
4       (special guest debbie solaris contactee interd...
                              ...                        
2637    (breaking buckley step trillion asset manager ...
2638                                               (, da)
2639                                   (trump people, en)
2640                   (chicken farm destroyed texas, en)
2641    (breaking maryland state house lockdown baltim...
Name: text, Length: 2201, dtype: object
--- valid pairs: 425, invalid pairs: 0
pairs--- valid pairs: 337, invalid pairs: 0 
pairs0                 (received thanks brandon inviting, en)
1        (whooooohhhh event awesome received reward, en)
2                      (thank today happened needed, en)
3               

 94%|█████████▍| 1007/1071 [01:11<00:08,  7.46it/s]

--- valid pairs: 91, invalid pairs: 0
pairs 0                                                (, unk)
1                   (manufacturing entecavir tablet, en)
3                                                (, unk)
4      (manufacturing fosfomycin tromethamine sachet,...
5                    (manufacturing ubtan face wash, en)
                             ...                        
96     (manufacturing ivermectin albendazole suspensi...
98                                               (, unk)
99                        (manufacturing cetirizine, ro)
100                     (manufacturing pimecrolimus, ro)
101    (manufacturing sodium alignate sodium bicarbon...
Name: text, Length: 91, dtype: object


 94%|█████████▍| 1009/1071 [01:11<00:07,  8.40it/s]

--- valid pairs: 552, invalid pairs: 0
pairs 1      (vydaiushchiisia gruzinskii pisatel veka kazbe...
2                                                (, unk)
3                                                (, unk)
4      (nobelevskaia rech dmitriia muratova muratov v...
5                                                (, unk)
                             ...                        
803                                              (, unk)
804             (kogda stal zhertvoi svoego proekta, hr)
805                  (stanovlenie russkogo natsizma, hr)
806    (pozdravliaem vsekh druzei otmechaiushchikh pa...
807    (allakh kakoi molodets etot igilov stal predse...
Name: text, Length: 552, dtype: object
--- valid pairs: 266, invalid pairs: 0--- valid pairs: 942, invalid pairs: 0

pairspairs  0      (migration update vance migration improved sma...
1      (vance vance holder market chart trade trendin...
2      (vance vance fdtxn holder market chart trade t...
3                           

 94%|█████████▍| 1011/1071 [01:12<00:08,  6.77it/s]

--- valid pairs: 843, invalid pairs: 0
pairs 2                                                  (, ro)
3                        (lambo lambo lambo letsgooo, sw)
4                          (revenue share live claim, en)
5                        (lambo lambo lambo letsgooo, sw)
8                          (revenue share live claim, en)
                              ...                        
1083    (currently wallets love token consolidation wa...
1084    (whales sniffing want know alot opportunity bi...
1085                       (revenue share live claim, en)
1086                       (revenue share live claim, en)
1087                       (revenue share live claim, en)
Name: text, Length: 843, dtype: object


 95%|█████████▍| 1013/1071 [01:12<00:07,  7.79it/s]

--- valid pairs: 125, invalid pairs: 0
pairs 0      (manufacturing trypsin bromelain rutoside acec...
1       (manufacturing fosfomycin trometamol sachet, en)
2                  (manufacturing febuxostat tablet, en)
3                                                (, unk)
4      (manufacturing sodium alginate sodium bicarbon...
                             ...                        
124             (manufacturing oxcarbazepine tablet, ro)
125    (manufacturing streptococcus faecalis million ...
126          (manufacturing gamma benzene cetrimide, ro)
127                     (manufacturing iron formula, ro)
128    (manufacturing iron folic acid vitamin bsyrup,...
Name: text, Length: 125, dtype: object
--- valid pairs: 2532, invalid pairs: 0
pairs 1                                           (sheriff, en)
2                          (love book awesome little, en)
3                     (surprising transgender antifa, en)
4        (didn expect suge prison advice puffy squat, en)
5          

 95%|█████████▍| 1015/1071 [01:12<00:07,  7.55it/s]

--- valid pairs: 1227, invalid pairs: 0
pairs 0       (revshare holders sent wallets claimed manuall...
1                           (claim manually revshare, en)
2                                         (sell news, en)
3                                         (sell news, en)
4                                         (sell news, en)
                              ...                        
1585                                           (dude, fr)
1586                             (thanks want assure, en)
1587                                          (chart, en)
1588    (share feedback possibilities revenue share st...
1589    (bull markets born pessimism grow skepticism m...
Name: text, Length: 1227, dtype: object
--- valid pairs: 2704, invalid pairs: 0
pairs 0       (raid gone minutes tweet reaching likes replie...
1       (trumpbuy trumpfuvrmkftxn position trumpprice ...
2       (trumpbuy spent trumpbuyer holder market dext ...
3       (trumpbuy trumpwnfssnh holder trumpprice marke..

 95%|█████████▍| 1017/1071 [01:12<00:06,  8.28it/s]

--- valid pairs: 146, invalid pairs: 0
pairs 0                                                (, unk)
1      (manufacturing clobetasol calcipotriol ointmem...
2                               (manufacturing wash, en)
3      (manufacturing pyridoxine niacinamide cyanocob...
4                                                (, unk)
                             ...                        
148    (manufacturing clotrimazole lignocaine neomyci...
149               (manufacturing sitagliptin tablet, tl)
150                       (manufacturing artemether, en)
151                            (manufacturing liver, en)
152               (manufacturing clopidogrel tablet, en)
Name: text, Length: 146, dtype: object


 95%|█████████▌| 1018/1071 [01:13<00:07,  6.93it/s]

--- valid pairs: 2535, invalid pairs: 0
pairs 0                               (welcome samuel bayu, so)
1       (trumpbuy trumpapunps position trumpprice mark...
2       (trumpbuy trumpapunps position trumpprice mark...
3       (trumpbuy trumpixgfp lvvb holder trumpprice ma...
4       (follow alert augustinbrianfollowed trumponsol...
                              ...                        
2546    (trumpbuy spent trumpbuyer position market dex...
2547    (trumpbuy trumpthekfya position trumpprice mar...
2548    (website amazing best crypto meme project site...
2549    (trumpbuy trumpeyibas position trumpprice mark...
2550    (trumpbuy spent trumpbuyer position market dex...
Name: text, Length: 2535, dtype: object
--- valid pairs: 1367, invalid pairs: 0--- valid pairs: 3473, invalid pairs: 0

pairspairs  0                                                 (, unk)
1       (good morning world push friend follow johnny,...
2       (united states corporation america follow john...
3         

 95%|█████████▌| 1020/1071 [01:13<00:12,  4.23it/s]

--- valid pairs: 2828, invalid pairs: 0
pairs 0         (starting drama group wall automatic games, en)
2       (president donald trump took campaign trail go...
3       (watch police moved tuesday evening hamas prot...
4                                             (great, en)
5       (watch police moved tuesday evening hamas prot...
                              ...                        
3456                                               (, en)
3457                                              (, unk)
3458                           (trumplatest campaign, en)
3459                                   (creepy movie, en)
3460    (mich democrats passing laws systemic voter fr...
Name: text, Length: 2828, dtype: object


 95%|█████████▌| 1022/1071 [01:14<00:12,  3.90it/s]

--- valid pairs: 856, invalid pairs: 0
pairs 1                  (welcome hesitate need assistance, en)
2        (migration live read pinned massage migrate, en)
3        (migration live read pinned massage migrate, en)
4       (little confused stuff thanks admin help airdr...
5                              (load your ready bull, en)
                              ...                        
1104                                              (, unk)
1105    (mood latest market news trading volumes incre...
1106                         (claim yout tokens coin, en)
1107    (sure follow instructions group help messages,...
1108       (wife brought coin today hope gain profit, en)
Name: text, Length: 856, dtype: object
--- valid pairs: 673, invalid pairs: 0
pairs 0      (knigu iasa pereveli mozhno skachat perevodom,...
1      (reparatsii pridetsia platit chechenskoi respu...
3      (singapur goda rybatskoi derevushki prevratils...
5      (mobilizovannyi novisibirska aleksei kuliaev p...
6    

 96%|█████████▌| 1024/1071 [01:14<00:09,  5.15it/s]

--- valid pairs: 2726, invalid pairs: 0
pairs 0       (house oversight releases hunter biden transcr...
1                        (future medicine looks like, en)
2                                                 (, unk)
3                                      (pawning face, cy)
4                    (miranda devine face right time, en)
                              ...                        
3269    (tina peters epitome lesser magistrate doctrin...
3270                                              (, unk)
3271                                (lord proffessor, en)
3272                                              (, unk)
3273                                              (, unk)
Name: text, Length: 2726, dtype: object
--- valid pairs: 1056, invalid pairs: 0
pairs 0       (feel better receiving bonus wallet thanks giv...
1                            (good received giveaway, en)
2       (sincere gratitude team putting thanks bonus c...
3       (gotten bonus giveaway competition thanks team..

 96%|█████████▌| 1026/1071 [01:15<00:09,  4.85it/s]

--- valid pairs: 2960, invalid pairs: 0
pairs 1       (breaking israel launches ground invasion sout...
2       (armored truck dumped cash diego freeway trigg...
3                                              (hero, nl)
4                    (seen happen come camera idiots, en)
5                                                  (, sl)
                              ...                        
3735    (chinese hackers target trump candidate vance ...
3736    (jeff bezos killed washington post endorsement...
3737                                              (, unk)
3738                           (video weird platform, en)
3739                                           (live, da)
Name: text, Length: 2960, dtype: object


 96%|█████████▌| 1027/1071 [01:15<00:14,  3.10it/s]

--- valid pairs: 1047, invalid pairs: 0
pairs 0           (bonus came estimated time posting thank, en)
1                              (knew real appreciate, en)
2       (works real bonus minutes sending contribution...
3        (blessed bonus received minutes thanks team, en)
4              (happy coins today biggest best event, en)
                              ...                        
1122    (suprising mate platform filled surprises rece...
1123                (project executed thanks airdrop, en)
1124                                (received thanks, en)
1125         (wins participated event received bonus, en)
1126    (feel better receiving bonus wallet thanks giv...
Name: text, Length: 1047, dtype: object


 96%|█████████▌| 1028/1071 [01:16<00:12,  3.54it/s]

--- valid pairs: 725, invalid pairs: 0
--- valid pairs: 1297, invalid pairs: 0pairs
 pairs 0              (bull flag forming breakout, en)
1              (bull flag forming breakout, en)
2              (bull flag forming breakout, en)
3              (bull flag forming breakout, en)
4              (bull flag forming breakout, en)
                         ...                   
1653                       (boys wait load, en)
1654    (smart guys going heavy price good, en)
1659                (looks like start soon, af)
1660                                    (, unk)
1661                                (chart, en)
Name: text, Length: 1297, dtype: object
0                                                (, unk)
1      (vance spent vance buyer position price market...
2      (announcement time change rules airdrop vance ...
3                                  (welcome taisiia, en)
4                       (welcome liliia amvrosimova, it)
                             ...                        

 96%|█████████▌| 1029/1071 [01:16<00:12,  3.24it/s]

--- valid pairs: 3026, invalid pairs: 0
pairs 1                              (feel ladies emotions, en)
2                                  (interesting true, da)
3                                (stop spamming wall, en)
4                                                 (, unk)
5                        (come chat explain research, en)
                              ...                        
3988    (louis missouri louis firefighters responded n...
3990    (code rebel participant film rough learned thi...
3991                                              (, unk)
3992            (watch second time today interesting, en)
3993                                           (good, af)
Name: text, Length: 3026, dtype: object


 96%|█████████▋| 1031/1071 [01:16<00:11,  3.51it/s]

--- valid pairs: 2677, invalid pairs: 0
pairs 0       (jeff walz endorses trump president instead br...
1       (said happening illegals control know chat rem...
2       (talk vigilante justice going happening people...
3                          (obviously globalist king, en)
4                                                  (, pl)
                              ...                        
3641               (people taking start clean helene, en)
3642              (favorite video young youth gettin, en)
3644                                              (, unk)
3645                                   (fraud rumble, en)
3646    (like maui fires south east floods outcome pur...
Name: text, Length: 2677, dtype: object
--- valid pairs: 7006, invalid pairs: 0
pairs 0       (enemy state rubbish removes enemy state free ...
1       (artificial scarcity getting rich stuff produc...
2       (aristotle speaking similar plot write vince, en)
3                  (learned aristotle orwell masters, en

 96%|█████████▋| 1033/1071 [01:17<00:11,  3.35it/s]

--- valid pairs: 1676, invalid pairs: 0
pairs 0       (volume share started autocompound working use...
1       (revshare holders sent wallets claimed manuall...
2                           (claim manually revshare, en)
3                                         (sell news, en)
4                                         (sell news, en)
                              ...                        
2031                                           (here, en)
2033                                           (here, en)
2034                                           (here, en)
2035                                           (here, en)
2036                                           (here, en)
Name: text, Length: 1676, dtype: object


 97%|█████████▋| 1034/1071 [01:18<00:14,  2.57it/s]

--- valid pairs: 1063, invalid pairs: 0
pairs 0       (people knowing price works learn liquidity kn...
2                  (claimed token today brought coin, en)
3                         (alot balance claimed coin, en)
4                                     (trust process, en)
5                                     (trust process, en)
                              ...                        
1261    (vance vance xbtxn position market chart trade...
1262    (vance whale alert value bought xbtxn position...
1263                                              (, unk)
1264    (vance vance xtxn position market chart trade ...
1265    (announcement weekly revenue share update atte...
Name: text, Length: 1063, dtype: object


 97%|█████████▋| 1035/1071 [01:18<00:12,  2.89it/s]

--- valid pairs: 3161, invalid pairs: 0
pairs 0                                   (trump sentenced, en)
1                                                 (, unk)
3                                      (voting trump, da)
5                                         (look like, et)
6                                       (wasn creepy, en)
                              ...                        
3790    (cover white house photographer blows whistle ...
3791                                              (, unk)
3792    (breaking boeing charged criminal fraud compan...
3793        (vibe check capt kyle patriots meme team, en)
3794                     (improved design home depot, en)
Name: text, Length: 3161, dtype: object


 97%|█████████▋| 1036/1071 [01:19<00:18,  1.87it/s]

--- valid pairs: 981, invalid pairs: 0
pairs 2                                                 (, unk)
4       (prodvigaetsia uspekhu parallel iakoby nenavis...
6                                                 (, unk)
8                     (palestine ulitsa chest putina, fr)
10                                     (masha allakh, so)
                              ...                        
1234                                   (veruiushchii, et)
1235                    (pokhozhe chto dolgu sluzhby, sk)
1236                               (otriad bagateria, id)
1237                                   (voiska salat, fi)
1240            (pora podnimat tochechno terroristam, hr)
Name: text, Length: 981, dtype: object


 97%|█████████▋| 1037/1071 [01:20<00:18,  1.85it/s]

--- valid pairs: 2080, invalid pairs: 0
pairs 0                            (chart, en)
1                                (, unk)
2                   (hold milestone, en)
3       (lambo lambo lambo letsgooo, sw)
4         (revenue share live claim, en)
                      ...               
2640                     (sell news, en)
2641                     (sell news, en)
2642                     (sell news, en)
2643      (revenue share live claim, en)
2644                     (sell news, en)
Name: text, Length: 2080, dtype: object


 97%|█████████▋| 1038/1071 [01:21<00:23,  1.42it/s]

--- valid pairs: 2176, invalid pairs: 0
pairs 0                                     (welcome vance, en)
1                                              (dead, es)
2                                 (dead action vence, en)
3                                                  (, it)
4                                                 (, unk)
                              ...                        
2203                                              (, unk)
2204                                  (pump moooooon, tl)
2205    (launching soon pumpfun solana hyped pumpfun c...
2206                                  (pump moooooon, tl)
2207                                  (pump moooooon, tl)
Name: text, Length: 2176, dtype: object


 97%|█████████▋| 1039/1071 [01:23<00:33,  1.03s/it]

--- valid pairs: 4033, invalid pairs: 0
pairs 0                                                 (, unk)
1                            (mark levin trump cases, en)
3       (president trump elected november republicans ...
6       (structures damaged brush fire continued grow ...
7                                                 (, unk)
                              ...                        
5252                                              (, unk)
5253                                              (, unk)
5255    (join telegram follow rumble follow spotify fo...
5256                                      (good news, en)
5258         (know shot swearing wasn president vice, en)
Name: text, Length: 4033, dtype: object


 97%|█████████▋| 1040/1071 [01:24<00:36,  1.18s/it]

--- valid pairs: 7930, invalid pairs: 0
pairs 0       (think know truth think think living need wake...
1       (good morning believe time shine providing tru...
2                              (great weekend people, en)
3          (children world belonglove like subscribe, en)
4       (badass photo taken history remember president...
                              ...                        
7928                                    (watch share, en)
7929    (raids hunter biden found massive leaked child...
7930                                    (watch share, en)
7931    (raids hunter biden found massive leaked child...
7932                                    (watch share, en)
Name: text, Length: 7930, dtype: object


 97%|█████████▋| 1041/1071 [01:28<00:59,  2.00s/it]

--- valid pairs: 9174, invalid pairs: 0
pairs 0            (samhain prepare winter southern aryans, en)
1       (spoken speculated previous video keramik oche...
2                                                  (, en)
3                   (going fortifications hills town, en)
4                        (maybe advanced mkrematorsk, en)
                              ...                        
9585                                   (votes counts, en)
9586    (georgian prime minister irakli kobakhidze say...
9587                                              (, unk)
9588                        (vince public healthcare, en)
9589                      (think copayments increase, en)
Name: text, Length: 9174, dtype: object


 97%|█████████▋| 1042/1071 [01:28<00:42,  1.45s/it]

In [None]:
print("--- first rows of df_channels_without_message", df_channels_without_message.head(1))
print("--- first rows of df_preprocessed_non_empty_channels", df_preprocessed_non_empty_channels.head(1))
print("--- first rows of df_english_preprocessed_non_empty_channels:", df_english_preprocessed_non_empty_channels.head(1))
print("\n---\n--- number of initial seeds:", str(len(df_first_nodes)))
print("--- Unique channels in df_channels_without_message:", df_channels_without_message['channel_id'].nunique())
print("--- Number of distinct channel_ids in file_args: 180")
print("--- Distinct channels: Unique channel ids with messages in df_preprocessed_non_empty_channels(also before modification)", df_preprocessed_non_empty_channels['channel_id'].nunique())
print("--- Distinct english channels: Unique channels in df_english_preprocessed_non_empty_channels:", df_english_preprocessed_non_empty_channels['channel_id'].nunique())


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Group by channel_id and language, count the number of messages
lang_counts = df_preprocessed_non_empty_channels.groupby(['channel_id', 'language']).size().reset_index(name='count')

# Sort by channel_id and message count in descending order
lang_counts_sorted = lang_counts.sort_values(['channel_id', 'count'], ascending=[True, False])

# Keep only the most frequent language for each channel
dominant_lang = lang_counts_sorted.drop_duplicates(subset='channel_id', keep='first')

# Plot the distribution of dominant languages
plt.figure(figsize=(10, 5))
dominant_lang['language'].value_counts().plot(kind='bar')
plt.title("Dominant language per channel")
plt.xlabel("Language")
plt.ylabel("Number of channels")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Istogram of all the dates of the messages
df_english_preprocessed_non_empty_channels['date'].hist(bins=120)
plt.title("Tutti i messaggi pre-processati")
plt.xlabel("Data")
plt.ylabel("Numero di messaggi")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

print("Numero di testi pre-processati:", len(df_english_preprocessed_non_empty_channels['text_preprocessed']))
print("---")
print("Numero di canali unici:", df_english_preprocessed_non_empty_channels['channel_id'].nunique())
print("---")

# Istogramm of the date of the last message for every channel
df_english_preprocessed_non_empty_channels.sort_values(by='date') \
    .drop_duplicates(subset='channel_id', keep='last')['date'] \
    .hist(bins=100)
plt.title("Data dell’ultimo messaggio per ciascun canale")
plt.xlabel("Data")
plt.ylabel("Numero di canali")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:
'''
os.environ["TOKENIZERS_PARALLELISM"] = "true"

device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

model_name = 'all-distilroberta-v1'
embedding_model=SentenceTransformer(model_name)

if os.path.exists(f'../final/final_embeddings_{model_name}.npy'):
    print('file found')
    embeddings = np.load(f'../final/final_embeddings_{model_name}.npy')
    print(f'embedding {model_name} loaded')
else:
    embedding_model = embedding_model.to(device)
    embeddings = embedding_model.encode(df_english_preprocessed_non_empty_channels['text_preprocessed'].tolist(), show_progress_bar=True, device=device)
    np.save(f'../final/final_embeddings_{model_name}.npy', embeddings)

print(f"df_english_preprocessed_non_empty_channels['text_preprocessed'].tolist(): {df_english_preprocessed_non_empty_channels['text_preprocessed'].tolist()}")
print(f"embeddings: {embeddings}")

'''

In [None]:
'''
umap_params = {'n_components': 5, 'n_neighbors': 5, 'min_dist': 0.0}
hdbscan_params = {'min_cluster_size': 500,'min_samples':100,'prediction_data':True}

output_path = f'../final/final_{model_name}.pkl'

if not os.path.exists(output_path):
    print(f"debug {len(df_english_preprocessed_non_empty_channels)} messaggi pre-processati.")

    umap_model = UMAP(**umap_params)
    hdbscan_model = HDBSCAN(**hdbscan_params)

    max_features_vectorizer = 1024
    min_df_vectorizer = 0.01
    max_df_vectorizer = 0.99 

    vectorizer_model = CountVectorizer(
        max_features=max_features_vectorizer, 
        min_df=min_df_vectorizer,
        max_df=max_df_vectorizer
    )

    print("embeddings.shape", embeddings.shape)

    t0 = time.time()
    topic_model = BERTopic(
        embedding_model=None,
        umap_model=umap_model,
        hdbscan_model=hdbscan_model,
        verbose=True,
        top_n_words=20,
        language='english', 
        vectorizer_model=vectorizer_model
    )

    print("embedding shape 0" + str(embeddings.shape[0]))
    print("embedding first", embeddings[0])
    print("len df "+ str(len(df_english_preprocessed_non_empty_channels['text_preprocessed'])))
    assert embeddings.shape[0] == len(df_english_preprocessed_non_empty_channels['text_preprocessed'])

    topics, probs = topic_model.fit_transform(df_english_preprocessed_non_empty_channels['text_preprocessed'], embeddings=embeddings)
    print(f"Execution time for {model_name} UMAP: {time.time()-t0}s")

    topics = np.array(topics)

    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    
    topic_model.save(output_path)
else:
    print(f"Model already saved at {output_path}")

topic_model=BERTopic.load(f'../final/final_{model_name}.pkl')
topics=np.array(topic_model.topics_)

'''

In [None]:
#from cuml.cluster import HDBSCAN
#from cuml.manifold import UMAP
from sentence_transformers import SentenceTransformer
from sklearn.metrics import silhouette_score
from octis.evaluation_metrics.diversity_metrics import TopicDiversity
from octis.evaluation_metrics.coherence_metrics import Coherence


df_sampled = df_english_preprocessed_non_empty_channels.sample(frac=0.1)

texts=[sentence.split() for sentence in df_sampled['text_preprocessed'].to_list()]
'''
texts = [
    ["i", "love", "machine", "learning"],
    ["data", "science", "is", "fun"],
    ["deep", "learning", "models", "are", "powerful"]
]*/
'''
def get_metrics(topic_model,texts=texts):
    '''
    Return: diversity_score,coherence_score
    '''
    topics = topic_model.get_topics()
    '''
    topics={
    0: [('apple', 0.3), ('banana', 0.2), ...],
    1: [('data', 0.4), ('science', 0.3), ...],
    ...
    }
    '''
    topics_list = []
    for topic_id, topic_words in topics.items():
        '''
        topic_words=[('apple', 0.3), ('banana', 0.2), ...]
        '''
        if topic_id!=-1:
            # Take words for each topic where >= 10
            words=[word[0] for word in topic_words if word[0]!='']
            if len(words)>=10:
                topics_list.append(words)  # Extracting only words from (word, probability)

    #topics_list = [["apple", "banana",...], ["data", "science"...], ...]
    # Wrap the topics into the expected format
    model_output = {"topics": topics_list}

    '''
    model_output = {
    "topics": [
        ["apple", "banana", ...],
        ["data", "science", ...],
        ...
    ]
    }
    '''
    

    # Now calculate diversity using the correct format
    topic_diversity = TopicDiversity(topk=10)  # Specify how many top words you want to consider
    diversity_score = topic_diversity.score(model_output)  # Pass the wrapped topics

    # Calculate coherence score
    coherence_metric = Coherence(topk=10,texts=texts)  # Specify top_n for coherence calculation
    coherence_score = coherence_metric.score(model_output)  # Pass the wrapped topics
    print(coherence_score)
    return diversity_score,coherence_score

# Step 2: Set device
os.environ["TOKENIZERS_PARALLELISM"] = "true"
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
# List of SentenceTransformer models
models = {
    'all-distilroberta-v1': SentenceTransformer('all-distilroberta-v1'),
    'paraphrase-MiniLM-L6-v2': SentenceTransformer('paraphrase-MiniLM-L6-v2'),
    'all-MiniLM-L6-v2': SentenceTransformer('all-MiniLM-L6-v2')
}

# Step 3: Precompute embeddings for each model


# Step 4: Define grid search for UMAP and HDBSCAN parameters
umap_params = [
    {'n_components': 5, 'n_neighbors': 5, 'min_dist': 0.0},
    {'n_components': 5, 'n_neighbors': 25, 'min_dist': 0.0},
    {'n_components': 5, 'n_neighbors': 125, 'min_dist': 0.0},
    
    {'n_components': 5, 'n_neighbors': 5, 'min_dist': 0.1},
    {'n_components': 5, 'n_neighbors': 25, 'min_dist': 0.1},
    {'n_components': 5, 'n_neighbors': 125, 'min_dist': 0.1},
]

hdbscan_params = [
    {'min_cluster_size': 100},
    {'min_cluster_size': 500}
     ]

# Step 5: Check if results already exist
if os.path.exists('../final/grid_search_results.csv'):
    results_df = pd.read_csv('../final/grid_search_results.csv')
else:
    results_df = pd.DataFrame(columns=['model',
                'umap_n_components',
                'umap_n_neighbors',
                'umap_min_dist',
                'hdbscan_min_cluster_size',
                'coherence',
                'diversity',
                'silhouette'])

# Initialize the results list
results = []

# Step 6: Perform grid search
best_score = -1
best_model = None
best_params = None

'''
models = {
    'all-distilroberta-v1': SentenceTransformer('all-distilroberta-v1'),
    'paraphrase-MiniLM-L6-v2': SentenceTransformer('paraphrase-MiniLM-L6-v2'),
    'all-MiniLM-L6-v2': SentenceTransformer('all-MiniLM-L6-v2')
}

umap_params = [
    {'n_components': 3, 'n_neighbors': 5, 'min_dist': 0.0},
    {'n_components': 3, 'n_neighbors': 25, 'min_dist': 0.0},
]
'''
for model_name, model_instance in tqdm(models.items()):
    # for every possible model of Sentence Transformer
    if os.path.exists(f'../final/final_embeddings_{model_name}.npy'):
        embeddings = np.load(f'../final/final_embeddings_{model_name}.npy')
        print(f'embedding {model_name} loaded')
    else:
        model_instance = model_instance.to(device)
        embeddings = model_instance.encode(df_sampled['text_preprocessed'].tolist(), show_progress_bar=True, device=device)
        np.save(f'../final/final_embeddings_{model_name}.npy', embeddings)
    
    for umap_config in umap_params:
        for hdbscan_config in hdbscan_params:
            # Check if this combination has already been run
            '''
                results_df = pd.DataFrame(columns=[
                'model',
                'umap_n_components',
                'umap_n_neighbors',
                'umap_min_dist',
                'hdbscan_min_cluster_size',
                'coherence',
                'diversity',
                'silhouette'])
            '''
            if ((len(results_df)>0) and
                ((results_df['model'] == model_name) &
                (results_df['umap_n_components'] == umap_config['n_components']) &
                (results_df['umap_n_neighbors'] == umap_config['n_neighbors']) &
                (results_df['umap_min_dist'] == umap_config['min_dist']) &
                (results_df['hdbscan_min_cluster_size'] == hdbscan_config['min_cluster_size'])).any()):
                print(f"Skipping already tested configuration: {model_name} with UMAP {umap_config} and HDBSCAN {hdbscan_config}")
                continue

            # Initialize UMAP and HDBSCAN models with current parameters
            umap_model = UMAP(**umap_config)
            hdbscan_model = HDBSCAN(**hdbscan_config)
            
            # Step 7: Apply UMAP and HDBSCAN to the embeddings
            t0 = time.time()
            topic_model = BERTopic(
                embedding_model=None,
                umap_model=umap_model,
                hdbscan_model=hdbscan_model,
                verbose=True,
                top_n_words=20,
                language = 'english',
            )
            
            topics, probs = topic_model.fit_transform(df_sampled['text_preprocessed'],embeddings=embeddings)
            print(f"Execution time for {model_name} UMAP: {time.time()-t0}s")
            
            topics=np.array(topics)
            
            
            # Step 8: Save the model
            model_filename = f'../final/bertopic_models/{model_name}_umap{umap_config["n_components"]}_umap{umap_config["n_neighbors"]}_umap{umap_config["min_dist"]}_hdbscan{hdbscan_config["min_cluster_size"]}.pkl'
            topic_model.save(model_filename)

            # Step 9: Compute evaluation metrics
            diversity,coherence=get_metrics(topic_model)
            
            
            umap_model = topic_model.umap_model
            reduced_embedding=umap_model.transform(embeddings)
            topics=np.array(topic_model.topics_)
            silhouette=silhouette_score(reduced_embedding[topics!=-1],topics[topics!=-1])
            
            # Store the best model based on silhouette score
            avg_score = (coherence + diversity + silhouette) / 3
            
            if avg_score > best_score:
                best_score = avg_score
                best_model = topic_model


           
            topics=pd.Series(topics)
            
            # Step 10: Log the results
            results.append({
                'model': model_name,
                'umap_n_components': umap_config['n_components'],
                'umap_n_neighbors': umap_config['n_neighbors'],
                'umap_min_dist': umap_config['min_dist'],
                'hdbscan_min_cluster_size': hdbscan_config['min_cluster_size'],
                'coherence': coherence,
                'diversity': diversity,
                'silhouette': silhouette,
                'n_outliers':(topics==-1).sum(),
                'n_topics':topics.nunique()-1,
                'min_topic':topics.value_counts().min(),
                'max_topic':topics.value_counts().max(),
                
                })
            print(results[-1])
                
                
            pd.concat([results_df, pd.DataFrame(results)], ignore_index=True).to_csv('../final/grid_search_results.csv', index=False)
                


In [None]:
import pandas as pd
from bertopic import BERTopic

df_grid = pd.read_csv("../final/grid_search_results.csv")
df_grid['avg_score'] = (df_grid['silhouette'] + df_grid['coherence'] + df_grid['diversity'])/3

best_models = {
    'silhouette': df_grid.sort_values(by='silhouette', ascending=False).iloc[0],
    'coherence': df_grid.sort_values(by='coherence', ascending=False).iloc[0],
    'diversity': df_grid.sort_values(by='diversity', ascending=False).iloc[0],
    'avg_score': df_grid.sort_values(by='avg_score', ascending=False).iloc[0]
}

#load the corresponding BERTopics
topic_models = {}
for key, row in best_models.items():
    model_filename = f"../final/bertopic_models/{row['model']}_umap{row['umap_n_components']}_umap{row['umap_n_neighbors']}_umap{row['umap_min_dist']}_hdbscan{row['hdbscan_min_cluster_size']}.pkl"
    print(f"Loading model for best {key} from: {model_filename}")
    print(row)
    topic_models[key] = BERTopic.load(model_filename)
    topic_models[key].visualize_barchart(top_n_topics=-1,n_words=20, width = 350,height=450).show()

#top_n_topics=-1	Mostra tutti i topic (eccetto outlier -1)	top_n_topics=5 mostrerebbe solo i primi 5 topic più grandi
#n_words=20	Mostra 20 parole per ogni topic nel grafico	Se vuoi vederne solo 10, metti n_words=10
#width=350	Larghezza (in pixel) del grafico	Cambia la dimensione orizzontale del plot
#height=450	Altezza (in pixel) del grafico	Cambia la dimensione verticale del plot

'''
df_grid = pd.read_csv("../final/grid_search_results.csv")
df_grid['avg_score'] = (df_grid['silhouette'] + df_grid['coherence'] + df_grid['diversity'])/3
model_with_top_silouette = df_grid.sort_values(by='silhouette').iloc[-1]
print(model_with_top_silouette)
print("----")
model_with_top_coherence = df_grid.sort_values(by='coherence').iloc[-1]
print(model_with_top_coherence)
print("----")
model_with_top_diversity = df_grid.sort_values(by='diversity').iloc[-1]
print(model_with_top_diversity)
print("---")
model_with_top_avg_score = df_grid.sort_values(by='avg_score').iloc[-1]
print(model_with_top_avg_score)
'''

In [None]:
texts = df_english_preprocessed_non_empty_channels['text_preprocessed']
timestamps = pd.to_datetime(df_english_preprocessed_non_empty_channels['date'],format="%Y-%m-%d")

topics_over_time = topic_model.topics_over_time(
    list(df_english_preprocessed_non_empty_channels['text_preprocessed']),
    list(df_english_preprocessed_non_empty_channels['date']),
    nr_bins = timestamps.nunique(),
    global_tuning = False,
    evolution_tuning = True)


In [None]:

fig = topic_model.visualize_topics_over_time(
    topics_over_time,
    top_n_topics=5
)

distinct_colors = px.colors.qualitative.Alphabet

for i, trace in enumerate(fig.data):
    if i < len(distinct_colors):
        trace.line.color = distinct_colors[i]

fig.show()


- reduce outliers

In [None]:
umap_model=topic_model.umap_model
reduced_embeddings=umap_model.transform(embeddings)

new_topics = topic_model.reduce_outliers(list(texts), topics , strategy="c-tf-idf", threshold=0.1)

#Quando BERTopic prova a riassegnare un outlier, confronta il suo contenuto testuale con i topic usando una misura di similarità tra vettori TF-IDF.
#Di solito si tratta di una similarità coseno, che varia tra:
#0.0 = nessuna somiglianza
#1.0 = perfetta somiglianza
#0.1 è molto poco

topic_model.update_topics(list(texts), topics=new_topics,
                          vectorizer_model=vectorizer_model,top_n_words=20)
topic_model.get_topic_info()


In [None]:
new_topics=np.array(new_topics)
pd.Series(new_topics).value_counts(normalize=True)