In [1]:
import sqlite3
import pandas as pd

In [2]:
conn = sqlite3.connect('collection.anki21')
cursor = conn.cursor()

## Get the different prompts

In [3]:
# spanish <-> catalan verbs
cat_sp_df = pd.read_csv('spanish_to_catalan_verbs.csv', sep=',',
                        encoding='utf-8')
cat_sp_df = cat_sp_df[['spanish_verb', 'catalan_verb']]
cat_sp_df.rename({'catalan_verb': 'inf_verb'}, axis=1, inplace=True)

In [4]:
cat_sp_df.tail()

Unnamed: 0,spanish_verb,inf_verb
61,delinquir,delinquir
62,reír,riure
63,corroer,corroure
64,raer,raer
65,europeizar,europeïtzar


In [5]:
# get the tags and notes for each card
cursor.execute("SELECT sfld, tags FROM notes")
rows = cursor.fetchall()
notes = []
tags = []
intro_phrases = ["El verbo en",
                 "Before",
                 "During",
                 "Grammar",
                 "For ", 
                 "I hesitated",
                 "If you see"]
for row in rows:
    if row[0].startswith(intro_phrases[0]): pass
    elif row[0].startswith(intro_phrases[1]): pass
    elif row[0].startswith(intro_phrases[2]): pass
    elif row[0].startswith(intro_phrases[3]): pass
    elif row[0].startswith(intro_phrases[4]): pass
    elif row[0].startswith(intro_phrases[5]): pass
    elif row[0].startswith(intro_phrases[6]): pass
    else:
        notes.append(row[0])
        tags.append(row[1])

In [6]:
# make into a df
df = pd.DataFrame(data=notes,
                  columns=['note'])
df['tags'] = tags

# get the base verb and add catalan equivalent
# (inner merge to get rid of verbs I didn't add equivalents for)
df['spanish_verb'] = df['note'].str.split('::…', expand=True)[1].str.split('…}}', expand=True)[0]
df = df.merge(cat_sp_df,
              how='inner',
              on='spanish_verb')

In [7]:
# get rid of tú_vos
print(len(df.index))
df = df.loc[~df.tags.str.contains(' tú_vos ')]
print(len(df.index))

3761
2991


In [8]:
# get rid of subjunctive future
print(len(df.index))
df = df.loc[~df.tags.str.contains(' subjuntivo_futuro ')]
print(len(df.index))

2991
2666


In [9]:
# tags I care about that I can merge on 

# 1, 2, 3rd person
person_map = {'yo': '1',
              'tú': '2',
              'él_ella_usted': '3',
              'nosotros': '1',
              'vosotros': '2',
              'ellos_ellas_ustedes': '3'}
df['person'] = '0'
for key, item in person_map.items():
    thing = ' '+key+' '
    df.loc[df.tags.str.contains(thing), 'person'] = person_map[key]
    
# singular or plural
sing_or_plural_map = {'yo': 'S',
              'tú': 'S',
              'él_ella_usted': 'S',
              'nosotros': 'P',
              'vosotros': 'P',
              'ellos_ellas_ustedes': 'P'}
df['sing_or_plural'] = '0'
for key, item in sing_or_plural_map.items():
    thing = ' '+key+' '
    df.loc[df.tags.str.contains(thing), 'sing_or_plural'] = sing_or_plural_map[key]
df.loc[df.tags.str.contains(' participio '), 'sing_or_plural'] = 'S'

# mood
mood_map = {'presente': 'I',
           'imperfecto': 'I',
           'indefinido': 'I',
           'futuro': 'I',
           'condicional': 'I',
           'subjuntivo_presente': 'S',
           'subjuntivo_pasado': 'S',
           'imperativo': 'M',
           'negative_imperativo': 'M',
           'gerundio': 'G',
           'participio': 'P'}
df['mood'] = '0'
for key, item in mood_map.items():
    thing = ' '+key+' '
    df.loc[df.tags.str.contains(thing), 'mood'] = mood_map[key]

# tense
tense_map = {'presente': 'P',
           'imperfecto': 'I',
           'indefinido': 'passat_perifrastic',
           'futuro': 'F',
           'condicional': 'C',
           'subjuntivo_presente': 'P',
           'subjuntivo_pasado': 'I'}
df['tense'] = '0'
for key, item in tense_map.items():
    thing = ' '+key+' '
    df.loc[df.tags.str.contains(thing), 'tense'] = tense_map[key]

# pos_neg_cmd
pos_neg_map = {'imperativo': 'pos',
               'negative_imperativo': 'neg'}
df['pos_neg_cmd'] = '0'
for key, item in pos_neg_map.items():
    thing = ' '+key+' '
    df.loc[df.tags.str.contains(thing), 'pos_neg_cmd'] = pos_neg_map[key]

In [10]:
# TODO add a version of the past using "ahir" 
# for the passat simple (ie he anat)

In [11]:
## TODO -- add neg. commands for everything by just sticking a no everywhere in the pos. cmds

In [12]:
# # split the note up so I can translate and replace shit
# # by the different tense / mood combos
# split_dict = {'gerundio': ['Mientras estoy estudiando…', 2],
#                   'participio': ['Hugo ha', 1]}
#               # 'presente': ['⊙ Ahora mismo, ⊙', 1],
#               # 'imperfecto': ['⇠ En esa época, a menudo, ⇠', 1],
#               # 'indefinido': ['↧ En aquel momento ↧', 1],
#               # 'futuro': ['→ En el futuro, →', 1],
#               # 'condicional': ['…', 2],
#               # 'subjuntivo_presente': ['〰 Resulta divertido que 〰', 1],
#               # 'subjuntivo_pasado': ['↫ Fue sorprendente que ↫', 1],
#               # 'imperativo': ['¡', 1]}
# # tag:negative_imperativo}
# df['note1'] = ''
# for key, item in split_dict.items():
#     split_phrase = item[0]
#     split_num = item[1]-1
#     print(df.loc[df.tags.str.contains(' '+key+' ')].note.str.split(split_phrase,
#                                                                     expand=True))
#     # df.loc[df.tags.str.contains(' '+key+' '), 'note1'] = df.loc[df.tags.str.contains(' '+key+' ')].note.str.split(split_phrase,
#     #                                                                 expand=True)[split_num]

In [13]:
# context phrases to translate
df['context_phrase' ] = df.note.str.split('(', expand=True)[1].str.split(')', expand=True)[0]
temp = pd.DataFrame(data=df.context_phrase.unique().tolist(), columns=['spanish_phrase'])
temp.to_csv('spanish_context_phrases.tsv', sep='\t', index=False)

In [14]:
# condicional phrases to translate
temp = df.loc[df.tense=='C']
temp['spanish_phrase'] = temp.note.str.split('…', expand=True)[1]
temp.spanish_phrase.unique()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


array([' Si ocurriese esto, ', ' Si acaeciera algo, ',
       ' Si pasara algo, ', ' Si aconteciese esto, ',
       ' Si se produjera esto, '], dtype=object)

In [15]:
# imperative phrases to translate
temp = df.loc[df.mood=='M']
temp['spanish_phrase'] = temp.note.str.split(',¡', expand=True)[0]
temp.spanish_phrase.unique()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


array(['Señora, por favor', 'Vámonos equipo, por favor',
       'Tú y tú, por favor', 'Señores, por favor',
       '—No le alcanza para reembolsar el préstamo.—Pues, ¡ {{c1::deba::…deber…}} el monto, y ya está !(él/ella/usted)',
       '—No nos alcanza para devolver el dinero.—Pues, ¡ {{c1::debamos::…deber…}} el monto, y ya está !(nosotros)',
       '—No nos alcanza para restituir la fianza.—Pues, ¡ {{c1::debed::…deber…}} el monto, y ya está !(vosotros)',
       '—No les alcanza para abonar el tratamiento.—Pues, ¡ {{c1::deban::…deber…}} el monto, y ya está !(ellos/ellas/ustedes)'],
      dtype=object)

In [16]:
# translated spanish to cat. phrases to stick on the cards
trans_df = pd.read_csv('spanish_catalan_context_phrases.csv', sep=',',
                        encoding='utf-8')

In [17]:
# replace all translated df stuff w/ catalan
for ind, entry in trans_df.iterrows():
    sp = entry.spanish_phrase
    ca = entry.catalan_phrase
    df.note = df.note.str.replace(sp, ca)

  """


In [18]:
merge_cols = ['inf_verb', 'mood', 'tense', 'person',
              'sing_or_plural', 'pos_neg_cmd']

In [19]:
df.loc[df.duplicated(subset=merge_cols, keep=False)].sort_values(by=merge_cols).head()

Unnamed: 0,note,tags,spanish_verb,inf_verb,person,sing_or_plural,mood,tense,pos_neg_cmd,context_phrase


In [20]:
# add the catalan conjugations
cat_conj_df = pd.read_csv('catalan_verbs_parsed.tsv', sep='\t', encoding='utf-8')
# cat_conj_df.loc[cat_conj_df.duplicated(subset=merge_cols, keep=False)].sort_values(by=merge_cols)

In [21]:

# df.note.values[0]

print(len(df.index))
cat_conj_df.head()

for c in merge_cols:
    df[c] = df[c].astype(str)
    cat_conj_df[c] = cat_conj_df[c].astype(str)
df = df.merge(cat_conj_df, 
              how='left', 
              on=merge_cols)
print(len(df.index))

# # replace the infinitive and conjugated verb in the notes
# df['pref'] = df.note.str.split('{{c1::', expand=True)[0]+'{{c1::'
# df['suff'] = '…}}'+df.note.str.split('…}}', expand=True)[1]
# df['cat_note'] df.pref+




# # also make the question


2666
2666


In [22]:
df.loc[df.conj_verb=='prohibeixin'].sort_values(by=merge_cols)

Unnamed: 0,note,tags,spanish_verb,inf_verb,person,sing_or_plural,mood,tense,pos_neg_cmd,context_phrase,conj_verb,code,region,verb_type,gender,haver_verb,anar_verb
1804,"Senjors, si us plau, (que) {{c1::prohíban::…p...",ellos_ellas_ustedes ends_in_ir imperativo pro...,prohibir,prohibir,3,P,M,0,pos,que,prohibeixin,VMM03P0Y,central,M,0,,
1795,〰 Resulta divertit que 〰ells/elles/vostès {{c1...,ellos_ellas_ustedes ends_in_ir prohibir regul...,prohibir,prohibir,3,P,S,P,0,a los niños que griten,prohibeixin,VMSP3P0Y,central,M,0,,


In [23]:
df.loc[df.duplicated(subset=merge_cols, keep=False)].sort_values(by=merge_cols).head()

Unnamed: 0,note,tags,spanish_verb,inf_verb,person,sing_or_plural,mood,tense,pos_neg_cmd,context_phrase,conj_verb,code,region,verb_type,gender,haver_verb,anar_verb


In [24]:
# cat_conj_df.loc[(cat_conj_df.inf_verb=='ser')]
                # (cat_conj_df.person=='0')&\
                # (cat_conj_df.sing_or_plural=='0')&\
                # (cat_conj_df.mood=='G')&\
                # (cat_conj_df.tense=='0')&\
                # (cat_conj_df.pos_neg_cmd=='0')]

In [26]:
df.to_csv('table_to_make_cards.csv', sep=',', index=False)