Connected to .venv (Python 3.9.16)

In [1]:
#!/usr/bin/env python
# coding: utf-8

# # Import libraries and models

# In[74]:


import spacy
from tqdm import tqdm
import requests
from bs4 import BeautifulSoup
import pandas as pd
# Load the Spanish language model
nlp = spacy.load("es_core_news_sm")


# # Declare function

In [2]:
# Function to lemmatize a verb in Spanish
def lemmatize_spanish_verb(text):
    doc = nlp(text)
    lemmatized_verb = ""
    
    for token in doc:
        print(token.text, token.pos_, token.lemma_)
        if token.pos_ == "VERB":
            lemmatized_verb = token.lemma_
            break  # Stop after finding the first verb
    
    return lemmatized_verb
verbo = "fliparás"
print(lemmatize_spanish_verb(verbo))

fliparás ADJ fliparás



In [3]:
import json
import time
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
import re
from unidecode import unidecode

modes_index = {
    10: "Indicativo",
    18: "Subjuntivo",
    20: "Imperativo",
    22: "Infinitivo",
    24: "Gerundio",
    25: "Participio"
}

def get_mode_by_index(index):
    for mode_index in modes_index.keys():
        if index < mode_index:
            return modes_index[mode_index]

def initialize_dict_mode_time(gauche_div):
    index = 0
    conjugations = {}  # verbs_df to store conjugations
    for mode in modes_index.values():
        conjugations[mode] = {}
    all_times_modes = gauche_div.find_all("div", class_="tempstab")
    for all_times in all_times_modes:
        all_times = all_times.find_all("h3", class_="tempsheader")
        for time in all_times:
            time = time.text.strip()
            # Get the mode by index
            mode = get_mode_by_index(index)
            conjugations[mode][time] = [] 
        index += 1
    return conjugations

def get_time(tense_div):
    return tense_div.find("h3", class_="tempsheader").text.strip()

def get_conjugations(tense_div, mode):
    # Extract conjugation time for the tense
    html_elements = tense_div.find_all("div", class_="tempscorps")[0]
    # Get all the html elements inside the div and print them
    conjugations = ""
    # Loop over all the elements of the div with class "_tempscorps"
    for element in html_elements:
        # Omit the <br> elements
        if element.name != "br":
            conjugations += element.text
        # Add a new line when the element is <br>
        else:
            conjugations += "\n"
    pronuons = ["yo", "tú", "él", "nosotros", "vosotros", "ellos"]
    # Remove the pronouns from the conjugations
    for pronuon in pronuons:
        conjugations = conjugations.replace(pronuon, "")
    # Remove unnecessary spaces regex
    conjugations = unidecode(re.sub(' +', ' ', conjugations))
    if "Imperativo" in mode:
        # Remove "no", "-" and " " from the conjugations
        conjugations = conjugations.replace("no", "").replace("-", "").replace(" ", "")
        result = conjugations.strip().split("\n")
    else:
        result = conjugations.strip().split("\n ")
    return result
        
def cast_dict_to_df(conjugations_verb):
    # Define the mode prefixes
    mode_prefixes = {
        'Indicativo': 'Indicativo_',
        'Subjuntivo': 'Subjuntivo_',
        'Imperativo': 'Imperativo_',
        'Infinitivo': 'Infinitivo_',
        'Gerundio': 'Gerundio_',
        'Participio': 'Participio_'
    }

    # Initialize an empty dictionary to store the modified conjugations
    modified_conjugations = {}

    # Loop through the original conjugations
    for mode, mode_conjugations in conjugations_verb.items():
        # Get the appropriate prefix for the mode
        mode_prefix = mode_prefixes.get(mode, '')

        # Initialize an empty dictionary for the mode's conjugations
        modified_mode_conjugations = {}

        # Loop through the tense and conjugation data for the mode
        for tense, tense_conjugations in mode_conjugations.items():
            # Add the tense with the prefix to the modified conjugations
            modified_tense = mode_prefix + tense
            modified_mode_conjugations[modified_tense] = tense_conjugations

        # Add the modified mode and its conjugations to the result
        modified_conjugations.update(modified_mode_conjugations)

    # Create a DataFrame from the modified conjugations
    df = pd.DataFrame([modified_conjugations])

    # Return the DataFrame
    return df

def scrape_verb_conjugations(verb):
    # URL of the website to scrape
    url = f"https://www.conjugacion.es/del/verbo/{verb}.php"
    # Send an HTTP GET request to the URL
    response = requests.get(url)
    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the HTML content of the page using BeautifulSoup
        soup = BeautifulSoup(response.text, "html.parser")
        # Find the element with id "gauche" that contains verb conjugation data
        gauche_div = soup.find("div", id="gauche")
        if gauche_div is not None:
            # Loop through all conjugation tenses
            conjugations_verb = initialize_dict_mode_time(gauche_div)
            index = 0
            for tense_div in gauche_div.find_all("div", class_="tempstab"):
                # Inference the mode with the current index of the loop
                mode = get_mode_by_index(index)
                # Get time
                time = get_time(tense_div)
                # Add the verb time and its conjugations to the verbs_df
                conjugations_verb[mode][time] = get_conjugations(tense_div, mode)
                # Add the number of conjugations
                # Add 1 to the index
                index += 1
            # Stablish infinitive
            
            return cast_dict_to_df(conjugations_verb)
        else:
            return {}
    else:
        return {}
    
def read_verbs(verbs_path):
    # Read txt file with verbs
    with open(verbs_path, "r", encoding="utf-8") as f:
        verbs = f.readlines()
        # Remove "\n" from the end of each verb
        verbs = [verb.strip() for verb in verbs]
    return verbs

def read_dictionary(dictionary_path):
    # This functions allows to read the verbs_df
    try:
        with open(dictionary_path, "r", encoding="utf-8") as f:
            verbs_df = json.load(f, ensure_ascii=False, indent=4)
        return verbs_df
    except:
        return {}

def create_dictionary(verbs):
    verbs_df_conjugated = pd.DataFrame()
    errors = []
    pbar = tqdm(verbs, total=len(verbs))
    # Loop through the verbs you want to scrape
    for verb in pbar:
        last_chars = verb[-2:]
        if last_chars in ["ar", "er", "ir"]:
            pbar.set_description(f"Processing {verb}")
            time.sleep(0.1)
            try:
                conjugation_data = scrape_verb_conjugations(verb)
                if conjugation_data is not None:  # Ensure data is not empty
                    # Convert conjugation_data (a dictionary) to a DataFrame
                    conjugation_df = pd.DataFrame.from_dict(conjugation_data)
                    # Add the new conjugation DataFrame to verbs_df_conjugated
                    verbs_df_conjugated = pd.concat([verbs_df_conjugated, conjugation_df], ignore_index=True)
            except Exception as e:
                errors.append(verb)
                # Handle errors during scraping (e.g., connection issues)
                print(f"Error scraping {verb}: {str(e)}")
                continue
        errors.append(verb)
    # Move the column "Infinitivo_Simple" to the first position
    if 'Infinitivo_Simple' in verbs_df_conjugated.columns:
        infinitivo_simple = verbs_df_conjugated['Infinitivo_Simple']
        verbs_df_conjugated.drop(labels=['Infinitivo_Simple'], axis=1, inplace=True)
        verbs_df_conjugated.insert(0, 'Infinitivo_Simple', infinitivo_simple)
    return verbs_df_conjugated, errors

def write_dictionary(verbs_df, dictionary_path):
    # Save .csv of the verbs_df
    verbs_df.to_csv(dictionary_path, index=False)
        
def create_all_conjugations_list(verbs_df):
    all_conjugations = []
    # Now loop over rows
    for index, row in verbs_df.iterrows():
        for column in verbs_df.columns:
            all_conjugations.extend(row[column])
    return all_conjugations


def get_infinitive(verbs_df, range_index):
    return verbs_df[range_index]['Infinitivo_Simple'][0]

def verify_is_verb(word, all_conjugations, verbs_df):
    # Find word in all_conjugations and get the infinitive
    try:
        index = all_conjugations.index(unidecode(word.lower()))
        if index != -1:
            infinitive = get_infinitive(verbs_df, index)
            return infinitive
        else:
            return ""
    except:
        return ""

In [4]:
verbo = "aullar"
conjugations = scrape_verb_conjugations(verbo)
conjugations.head()

Unnamed: 0,Indicativo_Presente,Indicativo_Pretérito perfecto compuesto,Indicativo_Pretérito imperfecto,Indicativo_Pretérito pluscuamperfecto,Indicativo_Pretérito perfecto simple,Indicativo_Pretérito anterior,Indicativo_Futuro,Indicativo_Futuro perfecto,Indicativo_Condicional,Indicativo_Condicional perfecto,...,Subjuntivo_Pretérito pluscuamperfecto 2,Subjuntivo_Futuro,Subjuntivo_Futuro perfecto,Imperativo_Imperativo,Imperativo_Imperativo negativo,Infinitivo_Simple,Infinitivo_Compuesto,Gerundio_Simple,Gerundio_Compuesto,Participio_Pasado
0,"[aullo, aullas, aulla, aullamos, aullais, aullan]","[he aullado, has aullado, ha aullado, hemos au...","[aullaba, aullabas, aullaba, aullabamos, aulla...","[habia aullado, habias aullado, habia aullado,...","[aulle, aullaste, aullo, aullamos, aullasteis,...","[hube aullado, hubiste aullado, hubo aullado, ...","[aullare, aullaras, aullara, aullaremos, aulla...","[habre aullado, habras aullado, habra aullado,...","[aullaria, aullarias, aullaria, aullariamos, a...","[habria aullado, habrias aullado, habria aulla...",...,"[hubiese aullado, hubieses aullado, hubiese au...","[aullare, aullares, aullare, aullaremos, aulla...","[hubiere aullado, hubieres aullado, hubiere au...","[aulla, aulle, aullemos, aullad, aullen]","[aulles, aulle, aullemos, aulleis, aullen]",[aullar],[haber aullado],[aullando],[habiendo aullado],[aullado]


In [5]:
import random
verbs = read_verbs("./data/diccionario/verbos-espanol.txt")
# Take 20 random verbs not in order using random.sample
verbs = random.sample(verbs, 20)
# Read the dictionary
verbs_df = read_dictionary("./data/diccionario/verbos-espanol.json")
# Create the dictionary
verbs_df, errores = create_dictionary(verbs)
# Create all conjugations list
all_conjugations = create_all_conjugations_list(verbs_df)
# Write the dictionary
write_dictionary(verbs_df, "./data/diccionario/verbos-espanol.csv")
# Save all conjugations list
with open("./data/diccionario/all_conjugations.txt", "w", encoding="utf-8") as f:
    f.write("\n".join(all_conjugations))
# Save errors
with open("./data/diccionario/errores.txt", "w", encoding="utf-8") as f:
    f.write("\n".join(errores))

Processing encorachar:  80%|████████  | 16/20 [00:06<00:01,  2.34it/s]  


KeyboardInterrupt: 

In [6]:
import random
verbs = read_verbs("./data/diccionario/verbos-espanol.txt")
# Take 20 random verbs not in order using random.sample
# verbs = random.sample(verbs, 20)
# Read the dictionary
verbs_df = read_dictionary("./data/diccionario/verbos-espanol.json")
# Create the dictionary
verbs_df, errores = create_dictionary(verbs)
# Create all conjugations list
all_conjugations = create_all_conjugations_list(verbs_df)
# Write the dictionary
write_dictionary(verbs_df, "./data/diccionario/verbos-espanol.csv")
# Save all conjugations list
with open("./data/diccionario/all_conjugations.txt", "w", encoding="utf-8") as f:
    f.write("\n".join(all_conjugations))
# Save errors
with open("./data/diccionario/errores.txt", "w", encoding="utf-8") as f:
    f.write("\n".join(errores))

Processing acolchar:   3%|▎         | 311/10783 [01:53<1:01:43,  2.83it/s]     

Error scraping acolar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing acolgar:   3%|▎         | 313/10783 [01:54<54:20,  3.21it/s]     

Error scraping acolchonar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing acollarar:   3%|▎         | 316/10783 [01:55<53:43,  3.25it/s] 

Error scraping acollar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing acomodar:   3%|▎         | 321/10783 [01:56<35:27,  4.92it/s]   

Error scraping acombar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))
Error scraping acometer: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing acomplejar:   3%|▎         | 324/10783 [01:57<48:27,  3.60it/s]

Error scraping acompasar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing acondicionar:   3%|▎         | 327/10783 [01:57<36:56,  4.72it/s]

Error scraping aconchar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing aconsonantar:   3%|▎         | 331/10783 [01:58<41:51,  4.16it/s]

Error scraping aconsejar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing adestrar:   4%|▍         | 425/10783 [02:34<1:02:41,  2.75it/s]   


KeyboardInterrupt: 

In [7]:
#!/usr/bin/env python
# coding: utf-8

# # Import libraries and models

# In[74]:


import spacy
from tqdm import tqdm
import requests
from bs4 import BeautifulSoup
import pandas as pd
# Load the Spanish language model
nlp = spacy.load("es_core_news_sm")


# # Declare function

In [8]:
# Function to lemmatize a verb in Spanish
def lemmatize_spanish_verb(text):
    doc = nlp(text)
    lemmatized_verb = ""
    
    for token in doc:
        print(token.text, token.pos_, token.lemma_)
        if token.pos_ == "VERB":
            lemmatized_verb = token.lemma_
            break  # Stop after finding the first verb
    
    return lemmatized_verb
verbo = "fliparás"
print(lemmatize_spanish_verb(verbo))

fliparás ADJ fliparás



In [9]:
import json
import time
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
import re
from unidecode import unidecode

modes_index = {
    10: "Indicativo",
    18: "Subjuntivo",
    20: "Imperativo",
    22: "Infinitivo",
    24: "Gerundio",
    25: "Participio"
}

def get_mode_by_index(index):
    for mode_index in modes_index.keys():
        if index < mode_index:
            return modes_index[mode_index]

def initialize_dict_mode_time(gauche_div):
    index = 0
    conjugations = {}  # verbs_df to store conjugations
    for mode in modes_index.values():
        conjugations[mode] = {}
    all_times_modes = gauche_div.find_all("div", class_="tempstab")
    for all_times in all_times_modes:
        all_times = all_times.find_all("h3", class_="tempsheader")
        for time in all_times:
            time = time.text.strip()
            # Get the mode by index
            mode = get_mode_by_index(index)
            conjugations[mode][time] = [] 
        index += 1
    return conjugations

def get_time(tense_div):
    return tense_div.find("h3", class_="tempsheader").text.strip()

def get_conjugations(tense_div, mode):
    # Extract conjugation time for the tense
    html_elements = tense_div.find_all("div", class_="tempscorps")[0]
    # Get all the html elements inside the div and print them
    conjugations = ""
    # Loop over all the elements of the div with class "_tempscorps"
    for element in html_elements:
        # Omit the <br> elements
        if element.name != "br":
            conjugations += element.text
        # Add a new line when the element is <br>
        else:
            conjugations += "\n"
    pronuons = ["yo", "tú", "él", "nosotros", "vosotros", "ellos"]
    # Remove the pronouns from the conjugations
    for pronuon in pronuons:
        conjugations = conjugations.replace(pronuon, "")
    # Remove unnecessary spaces regex
    conjugations = unidecode(re.sub(' +', ' ', conjugations))
    if "Imperativo" in mode:
        # Remove "no", "-" and " " from the conjugations
        conjugations = conjugations.replace("no", "").replace("-", "").replace(" ", "")
        result = conjugations.strip().split("\n")
    else:
        result = conjugations.strip().split("\n ")
    return result
        
def cast_dict_to_df(conjugations_verb):
    # Define the mode prefixes
    mode_prefixes = {
        'Indicativo': 'Indicativo_',
        'Subjuntivo': 'Subjuntivo_',
        'Imperativo': 'Imperativo_',
        'Infinitivo': 'Infinitivo_',
        'Gerundio': 'Gerundio_',
        'Participio': 'Participio_'
    }

    # Initialize an empty dictionary to store the modified conjugations
    modified_conjugations = {}

    # Loop through the original conjugations
    for mode, mode_conjugations in conjugations_verb.items():
        # Get the appropriate prefix for the mode
        mode_prefix = mode_prefixes.get(mode, '')

        # Initialize an empty dictionary for the mode's conjugations
        modified_mode_conjugations = {}

        # Loop through the tense and conjugation data for the mode
        for tense, tense_conjugations in mode_conjugations.items():
            # Add the tense with the prefix to the modified conjugations
            modified_tense = mode_prefix + tense
            modified_mode_conjugations[modified_tense] = tense_conjugations

        # Add the modified mode and its conjugations to the result
        modified_conjugations.update(modified_mode_conjugations)

    # Create a DataFrame from the modified conjugations
    df = pd.DataFrame([modified_conjugations])

    # Return the DataFrame
    return df

def scrape_verb_conjugations(verb):
    # URL of the website to scrape
    url = f"https://www.conjugacion.es/del/verbo/{verb}.php"
    # Send an HTTP GET request to the URL
    response = requests.get(url)
    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Parse the HTML content of the page using BeautifulSoup
        soup = BeautifulSoup(response.text, "html.parser")
        # Find the element with id "gauche" that contains verb conjugation data
        gauche_div = soup.find("div", id="gauche")
        if gauche_div is not None:
            # Loop through all conjugation tenses
            conjugations_verb = initialize_dict_mode_time(gauche_div)
            index = 0
            for tense_div in gauche_div.find_all("div", class_="tempstab"):
                # Inference the mode with the current index of the loop
                mode = get_mode_by_index(index)
                # Get time
                time = get_time(tense_div)
                # Add the verb time and its conjugations to the verbs_df
                conjugations_verb[mode][time] = get_conjugations(tense_div, mode)
                # Add the number of conjugations
                # Add 1 to the index
                index += 1
            # Stablish infinitive
            
            return cast_dict_to_df(conjugations_verb)
        else:
            return {}
    else:
        return {}
    
def read_verbs(verbs_path):
    # Read txt file with verbs
    with open(verbs_path, "r", encoding="utf-8") as f:
        verbs = f.readlines()
        # Remove "\n" from the end of each verb
        verbs = [verb.strip() for verb in verbs]
    return verbs

def read_dictionary(dictionary_path):
    # This functions allows to read the verbs_df
    try:
        with open(dictionary_path, "r", encoding="utf-8") as f:
            verbs_df = json.load(f, ensure_ascii=False, indent=4)
        return verbs_df
    except:
        return {}

def create_dictionary(verbs):
    verbs_df_conjugated = pd.DataFrame()
    errors = []
    pbar = tqdm(verbs, total=len(verbs))
    # Loop through the verbs you want to scrape
    for verb in pbar:
        last_chars = verb[-2:]
        if last_chars in ["ar", "er", "ir"]:
            pbar.set_description(f"Processing {verb}")
            time.sleep(0.3)
            try:
                conjugation_data = scrape_verb_conjugations(verb)
                if conjugation_data is not None:  # Ensure data is not empty
                    # Convert conjugation_data (a dictionary) to a DataFrame
                    conjugation_df = pd.DataFrame.from_dict(conjugation_data)
                    # Add the new conjugation DataFrame to verbs_df_conjugated
                    verbs_df_conjugated = pd.concat([verbs_df_conjugated, conjugation_df], ignore_index=True)
            except Exception as e:
                errors.append(verb)
                # Handle errors during scraping (e.g., connection issues)
                print(f"Error scraping {verb}: {str(e)}")
                continue
        errors.append(verb)
    # Move the column "Infinitivo_Simple" to the first position
    if 'Infinitivo_Simple' in verbs_df_conjugated.columns:
        infinitivo_simple = verbs_df_conjugated['Infinitivo_Simple']
        verbs_df_conjugated.drop(labels=['Infinitivo_Simple'], axis=1, inplace=True)
        verbs_df_conjugated.insert(0, 'Infinitivo_Simple', infinitivo_simple)
    return verbs_df_conjugated, errors

def write_dictionary(verbs_df, dictionary_path):
    # Save .csv of the verbs_df
    verbs_df.to_csv(dictionary_path, index=False)
        
def create_all_conjugations_list(verbs_df):
    all_conjugations = []
    # Now loop over rows
    for index, row in verbs_df.iterrows():
        for column in verbs_df.columns:
            all_conjugations.extend(row[column])
    return all_conjugations


def get_infinitive(verbs_df, range_index):
    return verbs_df[range_index]['Infinitivo_Simple'][0]

def verify_is_verb(word, all_conjugations, verbs_df):
    # Find word in all_conjugations and get the infinitive
    try:
        index = all_conjugations.index(unidecode(word.lower()))
        if index != -1:
            infinitive = get_infinitive(verbs_df, index)
            return infinitive
        else:
            return ""
    except:
        return ""

In [10]:
import random
verbs = read_verbs("./data/diccionario/verbos-espanol.txt")
# Take 20 random verbs not in order using random.sample
# verbs = random.sample(verbs, 20)
# Read the dictionary
verbs_df = read_dictionary("./data/diccionario/verbos-espanol.json")
# Create the dictionary
verbs_df, errores = create_dictionary(verbs)
# Create all conjugations list
all_conjugations = create_all_conjugations_list(verbs_df)
# Write the dictionary
write_dictionary(verbs_df, "./data/diccionario/verbos-espanol.csv")
# Save all conjugations list
with open("./data/diccionario/all_conjugations.txt", "w", encoding="utf-8") as f:
    f.write("\n".join(all_conjugations))
# Save errors
with open("./data/diccionario/errores.txt", "w", encoding="utf-8") as f:
    f.write("\n".join(errores))

Processing adormir:   4%|▍         | 455/10783 [04:05<1:27:43,  1.96it/s]      

Error scraping adormentar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing adscribir:   4%|▍         | 464/10783 [04:09<1:03:51,  2.69it/s]

Error scraping adrizar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing azafranar:  16%|█▌        | 1728/10783 [15:39<1:02:19,  2.42it/s]      

Error scraping azacanear: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing bonificar:  18%|█▊        | 1941/10783 [17:45<1:19:41,  1.85it/s]   

Error scraping bombear: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing borbollar:  18%|█▊        | 1943/10783 [17:46<1:18:08,  1.89it/s]

Error scraping boquear: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing cacear:  19%|█▉        | 2036/10783 [18:44<2:05:48,  1.16it/s]      

Error scraping cacarear: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing cachañear:  19%|█▉        | 2037/10783 [18:44<1:44:08,  1.40it/s]

Error scraping cacear: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing caligrafiar:  19%|█▉        | 2087/10783 [19:13<1:18:25,  1.85it/s]  

Error scraping calificar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing coyotear:  27%|██▋       | 2957/10783 [27:54<1:10:48,  1.84it/s]          

Error scraping covar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing desimanar:  37%|███▋      | 3986/10783 [38:04<1:01:43,  1.84it/s]        

Error scraping desimaginar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing desimponer:  37%|███▋      | 3988/10783 [38:05<58:16,  1.94it/s]  

Error scraping desimantar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing desinfartar:  37%|███▋      | 3995/10783 [38:09<1:00:59,  1.85it/s]      

Error scraping desindustrializar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing ensoberbecer:  52%|█████▏    | 5648/10783 [53:30<30:36,  2.80it/s]     

Error scraping ensobear: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing escoscar:  56%|█████▌    | 5996/10783 [56:50<43:59,  1.81it/s]       

Error scraping escosar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing escrachar:  56%|█████▌    | 5999/10783 [56:52<41:24,  1.93it/s] 

Error scraping escotorrar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing escribir:  56%|█████▌    | 6001/10783 [56:52<29:04,  2.74it/s] 

Error scraping escrachar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing garrafiñar:  61%|██████▏   | 6612/10783 [1:03:04<38:24,  1.81it/s]     

Error scraping garpar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing garrar:  61%|██████▏   | 6616/10783 [1:03:06<37:44,  1.84it/s]     

Error scraping garrapiñar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing hambrear:  63%|██████▎   | 6781/10783 [1:04:53<38:50,  1.72it/s]    

Error scraping hamaquear: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing individualizar:  65%|██████▌   | 7033/10783 [1:07:33<25:48,  2.42it/s] 

Error scraping indisponer: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing parcializar:  76%|███████▌  | 8207/10783 [1:20:03<24:30,  1.75it/s]      

Error scraping parchear: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing proclamar:  80%|████████  | 8643/10783 [1:24:42<20:23,  1.75it/s]        

Error scraping procesar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing procrastinar:  80%|████████  | 8644/10783 [1:24:43<18:32,  1.92it/s]

Error scraping proclamar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing prodigar:  80%|████████  | 8647/10783 [1:24:44<18:59,  1.87it/s]    

Error scraping procurar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing proferir:  80%|████████  | 8652/10783 [1:24:47<20:00,  1.77it/s]

Error scraping profazar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing profesar:  80%|████████  | 8653/10783 [1:24:48<18:13,  1.95it/s]

Error scraping proferir: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing profesionalizar:  80%|████████  | 8654/10783 [1:24:48<17:11,  2.06it/s]

Error scraping profesar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing ranchar:  82%|████████▏ | 8816/10783 [1:26:32<18:38,  1.76it/s]         

Error scraping rampar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing ranciar:  82%|████████▏ | 8819/10783 [1:26:33<13:21,  2.45it/s] 

Error scraping ranchear: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing roncear:  89%|████████▊ | 9565/10783 [1:34:30<11:19,  1.79it/s]        

Error scraping roncar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing ronquear:  89%|████████▊ | 9569/10783 [1:34:32<11:13,  1.80it/s] 

Error scraping rondinear: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing tatarear:  94%|█████████▍| 10131/10783 [1:40:30<06:41,  1.62it/s]       

Error scraping tataratear: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing tatemar:  94%|█████████▍| 10132/10783 [1:40:30<05:57,  1.82it/s] 

Error scraping tatarear: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing tatuar:  94%|█████████▍| 10133/10783 [1:40:31<05:26,  1.99it/s] 

Error scraping tatemar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing taucar:  94%|█████████▍| 10134/10783 [1:40:31<05:05,  2.12it/s]

Error scraping tatuar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing techar:  94%|█████████▍| 10138/10783 [1:40:33<05:36,  1.92it/s]     

Error scraping teatralizar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing traspalar:  96%|█████████▋| 10385/10783 [1:43:10<02:56,  2.26it/s]      

Error scraping trasoñar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing traspalear:  96%|█████████▋| 10386/10783 [1:43:10<02:50,  2.32it/s]

Error scraping traspalar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing trasparecer:  96%|█████████▋| 10388/10783 [1:43:12<03:04,  2.15it/s]

Error scraping traspapelar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing traspintar:  96%|█████████▋| 10394/10783 [1:43:15<03:13,  2.01it/s] 

Error scraping traspillar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing vampirizar:  98%|█████████▊| 10564/10783 [1:45:01<02:07,  1.72it/s]   

Error scraping valuar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing verter:  99%|█████████▊| 10626/10783 [1:45:56<13:35,  5.19s/it]       

Error scraping vertebrar: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))


Processing zurrir: 100%|██████████| 10783/10783 [1:47:35<00:00,  1.67it/s]      
