In [1]:
import pandas as pd

## Figure out how to parse the different verb forms

In [48]:
df = pd.read_csv('esquitxar_example.txt', sep=' ', header=None, names=['conj_verb', 'inf_verb', 'code'])

In [49]:
# keep only verbal forms
df = df.loc[df.code.str.startswith('VM')]

In [50]:
# region
df['region'] = 'central'
df.loc[df.code.str.endswith('B'), 'region'] = 'balear'
df.loc[df.code.str.endswith('V'), 'region'] = 'valencia'
df.head()

Unnamed: 0,conj_verb,inf_verb,code,region
0,esquitx,esquitxar,VMIP1S0B,balear
1,esquitxa,esquitxar,VMIP3S00,central
2,esquitxa,esquitxar,VMM02S00,central
3,esquitxada,esquitxar,VMP00SF0,central
4,esquitxades,esquitxar,VMP00PF0,central


In [51]:
# mood
# I = indicative
# M = imperative
# P = participle
# G = gerund
# N = infinitive
# S = subjunctive

df['mood'] = df.code.str.split('VM', expand=True)[1].str.slice(0,1)

In [76]:
# gender for participles
df['gender'] = df.code.str.split('VM', expand=True)[1].str.slice(4,5)

In [52]:
# tense
# P = present
# I = past imperfect
# F = future
# S = simple past
# C = conditional
df['tense'] = df.code.str.split('VM', expand=True)[1].str.slice(1,2)

In [53]:
# person (1st 2nd 3rd)
df['person'] = df.code.str.split('VM', expand=True)[1].str.slice(2,3)

In [54]:
# singular or plural
df['sing_or_plural'] = df.code.str.split('VM', expand=True)[1].str.slice(3,4)
df.sing_or_plural.unique()

array(['S', 'P', '0'], dtype=object)

In [57]:
# keep only central forms
df = df.loc[df.region == 'central']

# remove passat simple 
df = df.loc[df.tense != 'S']

In [71]:
# add perfet, passat perifrastic, negative command

In [81]:
# perfet
haver = [['he', '1', 'S', 'I', 'perfet'],
         ['has', '2', 'S', 'I', 'perfet'],
         ['ha', '3', 'S', 'I', 'perfet'],
         ['hem', '1', 'P', 'I', 'perfet'],
         ['heu', '2', 'P', 'I', 'perfet'],
         ['han', '3', 'P', 'I', 'perfet']]
haver_df = pd.DataFrame(data=haver,
                        columns=['haver_verb',
                                 'person', 
                                 'sing_or_plural', 
                                 'mood',
                                 'tense'])
participle = df.loc[(df.mood=='P')&\
                    (df.gender=='M')&\
                    (df.sing_or_plural=='S')].conj_verb.values[0]
haver_df['conj_verb'] = haver_df.haver_verb+' '+participle
df = pd.concat([df, haver_df], axis=0)

In [87]:
# passat perifrastic
anar = [['vaig', '1', 'S', 'I', 'passat_perifrastic'],
         ['vas', '2', 'S', 'I', 'passat_perifrastic'],
         ['va', '3', 'S', 'I', 'passat_perifrastic'],
         ['vam', '1', 'P', 'I', 'passat_perifrastic'],
         ['vau', '2', 'P', 'I', 'passat_perifrastic'],
         ['van', '3', 'P', 'I', 'passat_perifrastic']]
anar_df = pd.DataFrame(data=anar,
                        columns=['anar_verb',
                                 'person', 
                                 'sing_or_plural', 
                                 'mood',
                                 'tense'])
infinitive = df.loc[(df.mood=='N')].conj_verb.values[0]
anar_df['conj_verb'] = anar_df.anar_verb+' '+infinitive
df = pd.concat([df, anar_df], axis=0)

In [99]:
# negative command -- just the subjunctive present
neg_cmd_df = df.loc[(df.mood=='S')&(df.tense=='P')]
neg_cmd_df['pos_neg_cmd'] = 'neg'

# add positive / neg designations for commands
df['pos_neg_cmd'] = '0'
df.loc[df.mood=='M', 'pos_neg_cmd'] = 'pos'

# now add 
df = pd.concat([df, neg_cmd_df], axis=0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,conj_verb,inf_verb,code,region,mood,tense,person,sing_or_plural,gender,haver_verb,anar_verb,pos_neg_cmd
44,esquitxem,esquitxar,VMSP1P00,central,S,P,1,P,0,,,neg
56,esquitxeu,esquitxar,VMSP2P00,central,S,P,2,P,0,,,neg
58,esquitxi,esquitxar,VMSP1S0Y,central,S,P,1,S,0,,,neg
59,esquitxi,esquitxar,VMSP3S0Y,central,S,P,3,S,0,,,neg
61,esquitxin,esquitxar,VMSP3P0Y,central,S,P,3,P,0,,,neg
62,esquitxis,esquitxar,VMSP2S0Y,central,S,P,2,S,0,,,neg


In [73]:
# other synthetic tenses?
# plusquamperfet, passat anterior, passat anterior preifrastic, 
# futur perfet... etc.

Unnamed: 0,haver_verb,person,sing_or_plural,mood,conj_verb,inf_verb,code,region,tense,gender
0,he,1,S,I,esquitxaria,esquitxar,VMIC1S00,central,C,0
1,he,1,S,I,esquitxaré,esquitxar,VMIF1S00,central,F,0
2,he,1,S,I,esquitxava,esquitxar,VMII1S00,central,I,0
3,he,1,S,I,esquitxo,esquitxar,VMIP1S0C,central,P,0
4,has,2,S,I,esquitxaries,esquitxar,VMIC2S00,central,C,0
5,has,2,S,I,esquitxaràs,esquitxar,VMIF2S00,central,F,0
6,has,2,S,I,esquitxaves,esquitxar,VMII2S00,central,I,0
7,has,2,S,I,esquitxes,esquitxar,VMIP2S00,central,P,0
8,ha,3,S,I,esquitxa,esquitxar,VMIP3S00,central,P,0
9,ha,3,S,I,esquitxaria,esquitxar,VMIC3S00,central,C,0
