In [59]:
from src.crawler import WebCrawler
from src.parser import VerbParser
import pandas as pd

crawler = WebCrawler()
parser = VerbParser()
search_verb = "ser"
url = f"https://www.spanishdict.com/conjugate/{search_verb}"

html = crawler.crawl_page(url)
parser.set_html_soup(html)

In [26]:
doc = parser.html_soup
tables = pd.read_html(html)
tables

[          0       1
 0  Present:  siendo
 1     Past:    sido,
                   0        1          2          3            4        5
 0               NaN  Present  Preterite  Imperfect  Conditional   Future
 1                yo      soy        fui        era        sería     seré
 2                tú     eres     fuiste       eras       serías    serás
 3       él/ella/Ud.       es        fue        era        sería     será
 4          nosotros    somos     fuimos     éramos     seríamos  seremos
 5          vosotros     sois   fuisteis      erais      seríais   seréis
 6  ellos/ellas/Uds.      son     fueron       eran       serían    serán,
                   0        1                   2         3
 0               NaN  Present           Imperfect    Future
 1                yo      sea        fuera, fuese     fuere
 2                tú     seas      fueras, fueses    fueres
 3       él/ella/Ud.      sea        fuera, fuese     fuere
 4          nosotros   seamos  fuéramos, fu

In [57]:
"""
    For each table. convert columns 1 - finish to root keys 
    column name become root
    for tense in (column row 0 )in table from 1 -> X:
        for each pronoun in rows:
            pronoun_dictionary = { pronoun: [data at pronoun, column]}
        tense_dictionary = { tense: "pronoun_dictionary" }
    table_dictionary = {... table_name : tense_dictionary}        
        
    I need to somehow know the table name!
        
    then create sub dictionary with
    row name (col 0 row X) as key and cel [root key, sub key]
    such as "Present" :  and columns to sub keys such as 
    "Present" : {"yo"}
                                        
    
"""
pronoun_dict = {
        1: 'yo',
        2: 'tú',
        3: 'él/ella/Ud.',
        4: 'nosotros',
        5: 'vosotros',
        6: 'ellos/ellas/Uds.'
    }
table_names = {
    0: "Participles", # requires special case as pronouns dont match up
    1: "Indicative",
    2: "Subjunctive",
    3: "Imperative",
    4: "Progressive",
    5: "Perfect",
    6: "Perfect Subjunctive"
}



In [61]:
"""
    This is the code that creates a verb deck!
    Now I need to make it pretty :)
"""

verb_deck = {search_verb: {}}
for i, table in enumerate(tables):
    is_participles_table = i == 0
    if is_participles_table:
        # Need to create special case for this table at a later time.
        continue
    dict_t = table.to_dict()

    tenses_dict = {}
    for column_idx in dict_t:
        column = dict_t[column_idx]
        tense_header = dict_t[column_idx][0]
        # firsts column may or not be a pronoun column
        # If the first column is only containing tenses the result will be a float type (nan in pandas)
        pronoun_column = isinstance(tense_header, float)
        if pronoun_column:
            continue
        pronoun_conjugate_pairs = {}
        for conjugated_verbs_idx in column:
            if conjugated_verbs_idx == 0:
                # index 0 does not contain a conjugated verb. It contains a tense_header!
                continue
            current_pronoun = pronoun_dict[conjugated_verbs_idx]
            conjugated_verb = column[conjugated_verbs_idx]
            pronoun_conjugate_pairs[current_pronoun] = conjugated_verb
            pass
        tenses_dict[tense_header] = pronoun_conjugate_pairs
    current_table_name = table_names[i]
    verb_deck[search_verb][current_table_name] = tenses_dict    
    
   

verb_deck

{'ser': {'Indicative': {'Present': {'yo': 'soy',
    'tú': 'eres',
    'él/ella/Ud.': 'es',
    'nosotros': 'somos',
    'vosotros': 'sois',
    'ellos/ellas/Uds.': 'son'},
   'Preterite': {'yo': 'fui',
    'tú': 'fuiste',
    'él/ella/Ud.': 'fue',
    'nosotros': 'fuimos',
    'vosotros': 'fuisteis',
    'ellos/ellas/Uds.': 'fueron'},
   'Imperfect': {'yo': 'era',
    'tú': 'eras',
    'él/ella/Ud.': 'era',
    'nosotros': 'éramos',
    'vosotros': 'erais',
    'ellos/ellas/Uds.': 'eran'},
   'Conditional': {'yo': 'sería',
    'tú': 'serías',
    'él/ella/Ud.': 'sería',
    'nosotros': 'seríamos',
    'vosotros': 'seríais',
    'ellos/ellas/Uds.': 'serían'},
   'Future': {'yo': 'seré',
    'tú': 'serás',
    'él/ella/Ud.': 'será',
    'nosotros': 'seremos',
    'vosotros': 'seréis',
    'ellos/ellas/Uds.': 'serán'}},
  'Subjunctive': {'Present': {'yo': 'sea',
    'tú': 'seas',
    'él/ella/Ud.': 'sea',
    'nosotros': 'seamos',
    'vosotros': 'seáis',
    'ellos/ellas/Uds.': 'sean'},