In [1]:
import sqlite3
import pandas as pd

In [2]:
conn = sqlite3.connect('collection.anki21')
cursor = conn.cursor()

## Get the different prompts

In [55]:
# spanish <-> catalan verbs
cat_sp_df = pd.read_csv('spanish_to_catalan_verbs.tsv', sep='\t',
                        encoding='latin1')
cat_sp_df = cat_sp_df[['spanish_verb', 'catalan_verb']]


In [47]:
# get the tags and notes for each card
cursor.execute("SELECT sfld, tags FROM notes")
rows = cursor.fetchall()
notes = []
tags = []
intro_phrases = ["El verbo en",
                 "Before",
                 "During",
                 "Grammar",
                 "For ", 
                 "I hesitated",
                 "If you see"]
for row in rows:
    if row[0].startswith(intro_phrases[0]): pass
    elif row[0].startswith(intro_phrases[1]): pass
    elif row[0].startswith(intro_phrases[2]): pass
    elif row[0].startswith(intro_phrases[3]): pass
    elif row[0].startswith(intro_phrases[4]): pass
    elif row[0].startswith(intro_phrases[5]): pass
    elif row[0].startswith(intro_phrases[6]): pass
    else:
        notes.append(row[0])
        tags.append(row[1])

In [56]:
# make into a df
df = pd.DataFrame(data=notes,
                  columns=['note'])
df['tags'] = tags

# get the base verb and add catalan equivalent
# (inner merge to get rid of verbs I didn't add equivalents for)
df['spanish_verb'] = df['note'].str.split('::…', expand=True)[1].str.split('…}}', expand=True)[0]
df = df.merge(cat_sp_df,
              how='inner',
              on='spanish_verb')

In [67]:
df.tags[1]

' ends_in_er extreme_irregularity irregular_verb participio regular_form ser '

In [74]:
# get rid of tú_vos
print(len(df.index))
df = df.loc[~df.tags.str.contains(' tú_vos ')]
print(len(df.index))

3761
2991


In [77]:
# get rid of subjunctive future
print(len(df.index))
df = df.loc[~df.tags.str.contains(' subjuntivo_futuro ')]
print(len(df.index))

2991
2666


In [78]:
# tags I care about that I can merge on 

# 1, 2, 3rd person
person_map = {'yo': '1',
              'tú': '2',
              'él_ella_usted': '3',
              'nosotros': '1',
              'vosotros': '2',
              'ellos_ellas_ustedes': '3'}
df['person'] = '0'
for key, item in person_map.items():
    thing = ' '+key+' '
    df.loc[df.tags.str.contains(thing), 'person'] = person_map[key]
    
# singular or plural
sing_or_plural_map = {'yo': 'S',
              'tú': 'S',
              'él_ella_usted': 'S',
              'nosotros': 'P',
              'vosotros': 'P',
              'ellos_ellas_ustedes': 'P'}
df['sing_or_plural'] = '0'
for key, item in sing_or_plural_map.items():
    thing = ' '+key+' '
    df.loc[df.tags.str.contains(thing), 'sing_or_plural'] = sing_or_plural_map[key]

# mood
mood_map = {'presente': 'I',
           'imperfecto': 'I',
           'indefinido': 'I',
           'futuro': 'I',
           'condicional': 'I',
           'subjuntivo_presente': 'S',
           'subjuntivo_pasado': 'S',
           'imperativo': 'M',
           'negative_imperativo': 'M',
           'gerundio': 'G',
           'participio': 'P'}
df['mood'] = '0'
for key, item in mood_map.items():
    thing = ' '+key+' '
    df.loc[df.tags.str.contains(thing), 'mood'] = mood_map[key]

# tense
tense_map = {'presente': 'I',
           'imperfecto': 'I',
           'indefinido': 'passat_perifrastic',
           'futuro': 'I',
           'condicional': 'I',
           'subjuntivo_presente': 'S',
           'subjuntivo_pasado': 'S',
           'imperativo': 'M',
           'negative_imperativo': 'M',
           'gerundio': 'G',
           'participio': 'P'}
df['tense'] = '0'
for key, item in tense_map.items():
    thing = ' '+key+' '
    df.loc[df.tags.str.contains(thing), 'tense'] = tense_map[key]


                                 # 'tense'


# 2. By Tense
# tag:presente
# tag:imperfecto
# tag:indefinido
# tag:futuro
# tag:condicional
# tag:subjuntivo_presente
# tag:subjuntivo_pasado
# tag:imperativo
# tag:negative_imperativo
# tag:subjuntivo_futuro
# tag:gerundio
# tag:participio

In [None]:
# add a version of the past using "ahir" for the passat simple