In [1]:
import sqlite3
import pandas as pd

In [2]:
conn = sqlite3.connect('collection.anki21')
cursor = conn.cursor()

## Get the different prompts

In [3]:
# spanish <-> catalan verbs
cat_sp_df = pd.read_csv('spanish_to_catalan_verbs.csv', sep=',',
                        encoding='utf-8')
cat_sp_df = cat_sp_df[['spanish_verb', 'catalan_verb']]
cat_sp_df.rename({'catalan_verb': 'inf_verb'}, axis=1, inplace=True)

In [4]:
# get the tags and notes for each card
cursor.execute("SELECT sfld, tags FROM notes")
rows = cursor.fetchall()
notes = []
tags = []
intro_phrases = ["El verbo en",
                 "Before",
                 "During",
                 "Grammar",
                 "For ", 
                 "I hesitated",
                 "If you see"]
for row in rows:
    if row[0].startswith(intro_phrases[0]): pass
    elif row[0].startswith(intro_phrases[1]): pass
    elif row[0].startswith(intro_phrases[2]): pass
    elif row[0].startswith(intro_phrases[3]): pass
    elif row[0].startswith(intro_phrases[4]): pass
    elif row[0].startswith(intro_phrases[5]): pass
    elif row[0].startswith(intro_phrases[6]): pass
    else:
        notes.append(row[0])
        tags.append(row[1])

In [5]:
# make into a df
df = pd.DataFrame(data=notes,
                  columns=['note'])
df['tags'] = tags

# get the base verb and add catalan equivalent
# (inner merge to get rid of verbs I didn't add equivalents for)
df['spanish_verb'] = df['note'].str.split('::…', expand=True)[1].str.split('…}}', expand=True)[0]
df = df.merge(cat_sp_df,
              how='inner',
              on='spanish_verb')

In [6]:
df.loc[df.note.str.contains('::eras')].tags.values[0]

' ends_in_er extreme_irregularity imperfecto irregular_form irregular_verb ser tú_vos '

In [7]:
# print(len(df.loc[df.tags.str.contains(' tú_vos ')]))
# print(len(df.loc[df.tags.str.contains(' tú ')]))

# print(df.loc[df.tags.str.contains(' tú_vos ')].tags.values[0])
# print(df.loc[df.tags.str.contains(' tú ')].tags.values[0])
# df.loc[df.tags.str.contains('yo')]

In [8]:
# # get rid of tú_vos
# print(len(df.index))
# df = df.loc[~((df.tags.str.contains(' tú_vos '))&
#             ~(df.tags.str.contains(' tú ')))]
# print(len(df.index))

In [9]:
# get rid of subjunctive future
print(len(df.index))
df = df.loc[~df.tags.str.contains(' subjuntivo_futuro ')]
print(len(df.index))

3761
3371


In [10]:
# remove hay 
df = df.loc[~df.tags.str.contains('idiom')]

In [11]:
# tags I care about that I can merge on 

# 1, 2, 3rd person
person_map = {'yo': '1',
              'tú': '2',
              'tú_vos': '2',
              'él_ella_usted': '3',
              'nosotros': '1',
              'vosotros': '2',
              'ellos_ellas_ustedes': '3'}
df['person'] = '0'
for key, item in person_map.items():
    thing = ' '+key+' '
    df.loc[df.tags.str.contains(thing), 'person'] = person_map[key]
    
# singular or plural
sing_or_plural_map = {'yo': 'S',
              'tú': 'S',
              'tú_vos': 'S',
              'él_ella_usted': 'S',
              'nosotros': 'P',
              'vosotros': 'P',
              'ellos_ellas_ustedes': 'P'}
df['sing_or_plural'] = '0'
for key, item in sing_or_plural_map.items():
    thing = ' '+key+' '
    df.loc[df.tags.str.contains(thing), 'sing_or_plural'] = sing_or_plural_map[key]
df.loc[df.tags.str.contains(' participio '), 'sing_or_plural'] = 'S'

# mood
mood_map = {'presente': 'I',
           'imperfecto': 'I',
           'indefinido': 'I',
           'futuro': 'I',
           'condicional': 'I',
           'subjuntivo_presente': 'S',
           'subjuntivo_pasado': 'S',
           'imperativo': 'M',
           'negative_imperativo': 'M',
           'gerundio': 'G',
           'participio': 'P'}
df['mood'] = '0'
for key, item in mood_map.items():
    thing = ' '+key+' '
    df.loc[df.tags.str.contains(thing), 'mood'] = mood_map[key]

# tense
tense_map = {'presente': 'P',
           'imperfecto': 'I',
           'indefinido': 'passat_perifrastic',
           'futuro': 'F',
           'condicional': 'C',
           'subjuntivo_presente': 'P',
           'subjuntivo_pasado': 'I'}
df['tense'] = '0'
for key, item in tense_map.items():
    thing = ' '+key+' '
    df.loc[df.tags.str.contains(thing), 'tense'] = tense_map[key]

# pos_neg_cmd
pos_neg_map = {'imperativo': 'pos',
               'negative_imperativo': 'neg'}
df['pos_neg_cmd'] = '0'
for key, item in pos_neg_map.items():
    thing = ' '+key+' '
    df.loc[df.tags.str.contains(thing), 'pos_neg_cmd'] = pos_neg_map[key]

In [12]:
merge_cols = ['inf_verb', 'mood', 'tense', 'person',
              'sing_or_plural', 'pos_neg_cmd']
df.loc[df.duplicated(subset=merge_cols, keep=False)].sort_values(by=merge_cols).head()

Unnamed: 0,note,tags,spanish_verb,inf_verb,person,sing_or_plural,mood,tense,pos_neg_cmd


In [13]:
# TODO add a version of the past using "ahir" 
# for the passat simple (ie he anat)

In [14]:
## TODO -- add neg. commands for everything by just sticking a no everywhere in the pos. cmds

In [15]:
# # split the note up so I can translate and replace shit
# # by the different tense / mood combos
# split_dict = {'gerundio': ['Mientras estoy estudiando…', 2],
#                   'participio': ['Hugo ha', 1]}
#               # 'presente': ['⊙ Ahora mismo, ⊙', 1],
#               # 'imperfecto': ['⇠ En esa época, a menudo, ⇠', 1],
#               # 'indefinido': ['↧ En aquel momento ↧', 1],
#               # 'futuro': ['→ En el futuro, →', 1],
#               # 'condicional': ['…', 2],
#               # 'subjuntivo_presente': ['〰 Resulta divertido que 〰', 1],
#               # 'subjuntivo_pasado': ['↫ Fue sorprendente que ↫', 1],
#               # 'imperativo': ['¡', 1]}
# # tag:negative_imperativo}
# df['note1'] = ''
# for key, item in split_dict.items():
#     split_phrase = item[0]
#     split_num = item[1]-1
#     print(df.loc[df.tags.str.contains(' '+key+' ')].note.str.split(split_phrase,
#                                                                     expand=True))
#     # df.loc[df.tags.str.contains(' '+key+' '), 'note1'] = df.loc[df.tags.str.contains(' '+key+' ')].note.str.split(split_phrase,
#     #                                                                 expand=True)[split_num]

In [16]:
# context phrases to translate
df['context_phrase' ] = df.note.str.split('(', expand=True)[1].str.split(')', expand=True)[0]
temp = pd.DataFrame(data=df.context_phrase.unique().tolist(), columns=['spanish_phrase'])
temp.to_csv('spanish_context_phrases.tsv', sep='\t', index=False)

In [17]:
# condicional phrases to translate
temp = df.loc[df.tense=='C']
temp['spanish_phrase'] = temp.note.str.split('…', expand=True)[1]
# temp.spanish_phrase.unique()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [18]:
# imperative phrases to translate
temp = df.loc[df.mood=='M']
temp['spanish_phrase'] = temp.note.str.split(',¡', expand=True)[0]
temp.spanish_phrase.unique()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


array(['Hombre, por favor', 'Mujer, por favor', 'Señora, por favor',
       'Vámonos equipo, por favor', 'Tú y tú, por favor',
       'Señores, por favor',
       '—No me alcanza para saldar la deuda.—Pues, ¡ {{c1::debe::…deber…}} el monto, y ya está !(tú)',
       '—No me alcanza para garpar el alquiler.—Pues, ¡ {{c1::debé::…deber…}} el monto, y ya está !(vos)',
       '—Me molesta estar endeudado.—Pues, ¡ no {{c1::debas::…deber…}} nada a nadie !(tú)',
       '—No le alcanza para reembolsar el préstamo.—Pues, ¡ {{c1::deba::…deber…}} el monto, y ya está !(él/ella/usted)',
       '—No nos alcanza para devolver el dinero.—Pues, ¡ {{c1::debamos::…deber…}} el monto, y ya está !(nosotros)',
       '—No nos alcanza para restituir la fianza.—Pues, ¡ {{c1::debed::…deber…}} el monto, y ya está !(vosotros)',
       '—No les alcanza para abonar el tratamiento.—Pues, ¡ {{c1::deban::…deber…}} el monto, y ya está !(ellos/ellas/ustedes)'],
      dtype=object)

In [19]:
# translated spanish to cat. phrases to stick on the cards
trans_df = pd.read_csv('spanish_catalan_context_phrases.csv', sep=',',
                        encoding='utf-8')

In [20]:
# replace all translated df stuff w/ catalan
df['cast_note'] = df.note.tolist()
for ind, entry in trans_df.iterrows():
    sp = entry.spanish_phrase
    ca = entry.catalan_phrase
    df.note = df.note.str.replace(sp, ca)

  


In [21]:
merge_cols = ['inf_verb', 'mood', 'tense', 'person',
              'sing_or_plural', 'pos_neg_cmd']

In [22]:
df.loc[df.duplicated(subset=merge_cols, keep=False)].sort_values(by=merge_cols).head()

Unnamed: 0,note,tags,spanish_verb,inf_verb,person,sing_or_plural,mood,tense,pos_neg_cmd,context_phrase,cast_note


In [23]:
# add the catalan conjugations
cat_conj_df = pd.read_csv('catalan_verbs_parsed.tsv', sep='\t', encoding='utf-8')
# cat_conj_df.loc[cat_conj_df.duplicated(subset=merge_cols, keep=False)].sort_values(by=merge_cols)

In [24]:
cat_conj_df.loc[cat_conj_df.conj_verb=='eres'][merge_cols+['conj_verb']]

Unnamed: 0,inf_verb,mood,tense,person,sing_or_plural,pos_neg_cmd,conj_verb
994,ser,I,I,2,S,0,eres


In [25]:
df.loc[(df.mood=='I')&
       (df.tense=='I')&
       (df.inf_verb=='ser')]
# [merge_cols]

Unnamed: 0,note,tags,spanish_verb,inf_verb,person,sing_or_plural,mood,tense,pos_neg_cmd,context_phrase,cast_note
9,"⇠ En aquella època, sovint, ⇠jo {{c1::era::…se...",ends_in_er extreme_irregularity imperfecto ir...,ser,ser,1,S,I,I,0,consciente de ello,"⇠ En esa época, a menudo, ⇠yo {{c1::era::…ser…..."
10,"⇠ En aquella època, sovint, ⇠tu {{c1::eras::…s...",ends_in_er extreme_irregularity imperfecto ir...,ser,ser,2,0,I,I,0,consciente de ello,"⇠ En esa época, a menudo, ⇠tú/vos {{c1::eras::..."
11,"⇠ En aquella època, sovint, ⇠ell/ella/vostè {{...",ends_in_er extreme_irregularity imperfecto ir...,ser,ser,3,S,I,I,0,consciente de ello,"⇠ En esa época, a menudo, ⇠él/ella/usted {{c1:..."
12,"⇠ En aquella època, sovint, ⇠nosaltres {{c1::é...",dicho ends_in_er extreme_irregularity imperfe...,ser,ser,1,P,I,I,0,conscientes de ello,"⇠ En esa época, a menudo, ⇠nosotros {{c1::éram..."
13,"⇠ En aquella època, sovint, ⇠vosaltres {{c1::e...",ends_in_er extreme_irregularity imperfecto ir...,ser,ser,2,P,I,I,0,conscientes de ello,"⇠ En esa época, a menudo, ⇠vosotros {{c1::erai..."
14,"⇠ En aquella època, sovint, ⇠ells/elles/vostès...",ellos_ellas_ustedes ends_in_er extreme_irregu...,ser,ser,3,P,I,I,0,conscientes de ello,"⇠ En esa época, a menudo, ⇠ellos/ellas/ustedes..."


In [26]:

# df.note.values[0]

print(len(df.index))
cat_conj_df.head()

for c in merge_cols:
    df[c] = df[c].astype(str)
    cat_conj_df[c] = cat_conj_df[c].astype(str)
df = df.merge(cat_conj_df, 
              how='left', 
              on=merge_cols)
print(len(df.index))

# # replace the infinitive and conjugated verb in the notes
# df['pref'] = df.note.str.split('{{c1::', expand=True)[0]+'{{c1::'
# df['suff'] = '…}}'+df.note.str.split('…}}', expand=True)[1]
# df['cat_note'] df.pref+




# # also make the question


3370
3370


In [27]:
l = len(df.loc[df.duplicated(subset=merge_cols, keep=False)].sort_values(by=merge_cols))
assert l == 0 
# df.loc[df.duplicated(subset=merge_cols, keep=False)].sort_values(by=merge_cols)

In [28]:
# replace inf verb and conj verb in note
df['note1'] = df.note.str.split('{{', expand=True)[0]
df['note2'] = df.note.str.split('}}', expand=True)[1]
df['cat_note'] = df.note1+'{{c1::'+df.conj_verb+'::…'+\
                 df.inf_verb+'…}}'+df.note2
df.cat_note[0]

'A mesura que estic estudiant…ella està {{c1::sent::…ser…}} (vigilada)'

In [29]:
df.loc[df.note.str.contains('::eras')]

Unnamed: 0,note,tags,spanish_verb,inf_verb,person,sing_or_plural,mood,tense,pos_neg_cmd,context_phrase,...,conj_verb,code,region,verb_type,gender,haver_verb,anar_verb,note1,note2,cat_note
10,"⇠ En aquella època, sovint, ⇠tu {{c1::eras::…s...",ends_in_er extreme_irregularity imperfecto ir...,ser,ser,2,0,I,I,0,consciente de ello,...,,,,,,,,"⇠ En aquella època, sovint, ⇠tu",(conscient d'això),


In [30]:
# tags for tense
d = {'P': 'present',
     'I': 'imperfet',
     'passat_perifrastic': 'passat_perifrastic',
     'F': 'futur', 
     'C': 'condicional',
     '0': ''}
df['tense_tag'] = df.tense.map(d)

# tags for mood
d = {'G': 'gerundi',
     'P': 'participi',
     'I': 'indicatiu',
     'S': 'subjuntiu',
     'M': 'imperatiu'}
df['mood_tag'] = df.mood.map(d)

# tags for ending
df['ending_tag'] = ''
df.loc[df.inf_verb.str.endswith('ir'), 'ending_tag'] = '-ir'
df.loc[df.inf_verb.str.endswith('re'), 'ending_tag'] = '-re'
df.loc[df.inf_verb.str.endswith('ar'), 'ending_tag'] = '-ar'
df.loc[df.inf_verb.str.endswith('er'), 'ending_tag'] = '-er'

# tags for personal pronoun

df['pronoun_tag'] = ''

# jo
df.loc[(df.sing_or_plural=='S')&
       (df.person=='1'), 'pronoun_tag'] = 'jo'

# tu 
df.loc[(df.sing_or_plural=='S')&
       (df.person=='2'), 'pronoun_tag'] = 'tu'

# ell/ella/vostè
df.loc[(df.sing_or_plural=='S')&
       (df.person=='3'), 'pronoun_tag'] = 'ell/ella/vostè'

# nosaltres
df.loc[(df.sing_or_plural=='P')&
       (df.person=='1'), 'pronoun_tag'] = 'nosaltres'

# vosaltres
df.loc[(df.sing_or_plural=='P')&
       (df.person=='2'), 'pronoun_tag'] = 'vosaltres'

# ells/elles/vostès
df.loc[(df.sing_or_plural=='P')&
       (df.person=='3'), 'pronoun_tag'] = 'ells/elles/vostès'

df['cat_tags'] = ' '+df['tense_tag']+\
                 ' '+df['mood_tag']+\
                 ' '+df['ending_tag']+\
                 ' '+df['pronoun_tag']+\
                 ' '+df['inf_verb']+' '
df.cat_tags = df.cat_tags.str.replace('  ', ' ')
df.cat_tags = df.cat_tags.str.replace('  ', ' ')
df.cat_tags = df.cat_tags.str.replace('  ', ' ')
df.loc[df.cat_tags.str.contains('  ')]

Unnamed: 0,note,tags,spanish_verb,inf_verb,person,sing_or_plural,mood,tense,pos_neg_cmd,context_phrase,...,haver_verb,anar_verb,note1,note2,cat_note,tense_tag,mood_tag,ending_tag,pronoun_tag,cat_tags


In [40]:
# where's my tu form imperfect indicativ?
df.loc[(df.mood == 'I')&
       (df.tense=='I')&
       (df.sing_or_plural=='S')&
       (df.person=='0')].head()

Unnamed: 0,note,tags,spanish_verb,inf_verb,person,sing_or_plural,mood,tense,pos_neg_cmd,context_phrase,...,haver_verb,anar_verb,note1,note2,cat_note,tense_tag,mood_tag,ending_tag,pronoun_tag,cat_tags


In [41]:
df.loc[df.note.str.contains('::eras')]

Unnamed: 0,note,tags,spanish_verb,inf_verb,person,sing_or_plural,mood,tense,pos_neg_cmd,context_phrase,...,haver_verb,anar_verb,note1,note2,cat_note,tense_tag,mood_tag,ending_tag,pronoun_tag,cat_tags
10,"⇠ En aquella època, sovint, ⇠tu {{c1::eras::…s...",ends_in_er extreme_irregularity imperfecto ir...,ser,ser,2,0,I,I,0,consciente de ello,...,,,"⇠ En aquella època, sovint, ⇠tu",(conscient d'això),,imperfet,indicatiu,-er,,imperfet indicatiu -er ser


In [38]:

df.loc[df.note.str.contains('::eras')][['note1', 'note2', 'cat_note', 'conj_verb', 'inf_verb']]

Unnamed: 0,note1,note2,cat_note,conj_verb,inf_verb
10,"⇠ En aquella època, sovint, ⇠tu",(conscient d'això),,,ser


In [32]:
df.cat_tags.values[:5]

array([' gerundi -er ser ', ' participi -er ser ',
       ' present indicatiu -er jo ser ', ' present indicatiu -er tu ser ',
       ' present indicatiu -er ser '], dtype=object)

In [33]:
# df.loc[df.pronoun_tag == ''].mood_tag.unique()
# df.loc[(df.pronoun_tag == '')&
#        (df.mood_tag=='indicatiu')].note.values[0]
# df.loc[df.tags.str.contains('idiom')]

In [34]:
# add new tags
df.to_csv('table_to_make_cards.csv', sep='|', index=False)
# replace in db in update_anki_db.ipynb


In [35]:
len(df.index)

3370

In [36]:
temp = df.loc[df.note.str.contains('::eras')]
temp.to_csv('table_to_make_cards_eras.csv', sep='|', index=False, 
           encoding='utf-8')


In [37]:
df.loc[df.cat_note.isnull()]

Unnamed: 0,note,tags,spanish_verb,inf_verb,person,sing_or_plural,mood,tense,pos_neg_cmd,context_phrase,...,haver_verb,anar_verb,note1,note2,cat_note,tense_tag,mood_tag,ending_tag,pronoun_tag,cat_tags
4,"⊙ Ara mateix, ⊙vos {{c1::sos::…ser…}} (conscie...",ends_in_er extreme_irregularity irregular_for...,ser,ser,2,0,I,P,0,consciente de ello,...,,,"⊙ Ara mateix, ⊙vos",(conscient d'això),,present,indicatiu,-er,,present indicatiu -er ser
10,"⇠ En aquella època, sovint, ⇠tu {{c1::eras::…s...",ends_in_er extreme_irregularity imperfecto ir...,ser,ser,2,0,I,I,0,consciente de ello,...,,,"⇠ En aquella època, sovint, ⇠tu",(conscient d'això),,imperfet,indicatiu,-er,,imperfet indicatiu -er ser
16,↧ En aquell moment ↧tu {{c1::fuiste::…ser…}} (...,ends_in_er extreme_irregularity indefinido ir...,ser,ser,2,0,I,passat_perifrastic,0,consciente de ello,...,,,↧ En aquell moment ↧tu,(conscient d'això),,passat_perifrastic,indicatiu,-er,,passat_perifrastic indicatiu -er ser
22,"→ En el futur, →tu {{c1::serás::…ser…}} (consc...",ends_in_er extreme_irregularity futuro irregu...,ser,ser,2,0,I,F,0,consciente de ello,...,,,"→ En el futur, →tu",(conscient d'això),,futur,indicatiu,-er,,futur indicatiu -er ser
28,"… Si sucediera esto, …tu {{c1::serías::…ser…}}...",condicional ends_in_er extreme_irregularity i...,ser,ser,2,0,I,C,0,consciente de ello,...,,,"… Si sucediera esto, …tu",(conscient d'això),,condicional,indicatiu,-er,,condicional indicatiu -er ser
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3346,"… Si sucediera esto, …tu {{c1::europeizarías::...",condicional ends_in_ar europeizar irregular_v...,europeizar,europeïtzar,2,0,I,C,0,el sabor del plato,...,,,"… Si sucediera esto, …tu",(el gust del plat),,condicional,indicatiu,-ar,,condicional indicatiu -ar europeïtzar
3352,〰 Resulta divertit que 〰tu {{c1::europeíces::…...,ends_in_ar europeizar irregular_form irregula...,europeizar,europeïtzar,2,0,S,P,0,el sabor del plato,...,,,〰 Resulta divertit que 〰tu,(el gust del plat),,present,subjuntiu,-ar,,present subjuntiu -ar europeïtzar
3358,↫ Va ser sorprenent que ↫tu {{c1::europeizaras...,ends_in_ar europeizar irregular_verb low_irre...,europeizar,europeïtzar,2,0,S,I,0,el sabor del plato,...,,,↫ Va ser sorprenent que ↫tu,(el gust del plat),,imperfet,subjuntiu,-ar,,imperfet subjuntiu -ar europeïtzar
3364,"Hombre, si us plau, {{c1::europeizá::…europei...",ends_in_ar europeizar imperativo irregular_ve...,europeizar,europeïtzar,2,0,M,0,pos,el sabor del plato,...,,,"Hombre, si us plau,",(el gust del plat) !(vos),,,imperatiu,-ar,,imperatiu -ar europeïtzar
