In [377]:
import pandas as pd
import numpy as np
import pickle
import re,pyperclip
debug=0

### Déclarations

In [2]:
class formesPatron:
    '''
    Accumulateur de formes correspondant à un patron pour calcul de la Généralisation Minimale (cf. MGL)
    '''
    def __init__(self):
        self.formes=[]

#    def __repr__(self):
#        return ','.join(self.calculerGM())
        
    def ajouterForme(self,forme):
        self.formes.append(forme)
        
    def calculerGM(self):
        minLongueur=len(min(self.formes, key=len))
        maxLongueur=len(max(self.formes, key=len))
        if debug: 
            # print (minLongueur, maxLongueur, file=logfile)
            print (minLongueur, maxLongueur)
        positions=[]
        if maxLongueur>minLongueur:
            positions.append("*")
        for i in xrange(minLongueur, 0, -1):
            phonemes=set([x[-i] for x in self.formes])
            if debug: 
                # print (phonemes, file=logfile)
                print (phonemes)
            if "." in phonemes:
                positions.append(".")
            else:
                positions.append("".join(fs.lattice[phonemes].extent))
        return patron2regexp(positions)

class pairePatrons:
    '''
    Accumulateur de triplets (f1,f2,patron) correspondant à une paire pour calcul des Généralisations Minimales (cf. MGL)
    '''
    def __init__(self,case1,case2):
        self.patrons1={}
        self.patrons2={}
        self.case1=case1
        self.case2=case2

#    def __repr__(self):
#        return ','.join(self.calculerGM())
        
    def ajouterFormes(self,forme1,forme2,patron):
#        print (forme1,forme2,patron, file=logfile)
        patron12=patron
        (pat1,pat2)=patron.split("-")
        patron21=pat2+"-"+pat1
#        print (patron12,patron21, file=logfile)
        if not patron12 in self.patrons1:
            self.patrons1[patron12]=formesPatron()
        self.patrons1[patron12].ajouterForme(forme1)
        if not patron21 in self.patrons2:
            self.patrons2[patron21]=formesPatron()
        self.patrons2[patron21].ajouterForme(forme2)
        
        
    def calculerGM(self):
        resultat1={}
        for patron in self.patrons1:
            if debug: 
                # print ("patron1", patron, file=logfile)
                print ("patron1", patron)
            resultat1[patron]=self.patrons1[patron].calculerGM()
        resultat2={}
        for patron in self.patrons2:
            if debug: 
                # print ("patron2", patron, file=logfile)
                print ("patron2", patron)
            resultat2[patron]=self.patrons2[patron].calculerGM()
        return (resultat1,resultat2) 

In [3]:
class paireClasses:
    def __init__(self,case1,case2):
        self.case1=case1
        self.case2=case2
        self.nom=case1+"-"+case2
        self.classes1=classesPaire(case1,case2)
        self.classes2=classesPaire(case2,case1)

    def ajouterPatron(self,n,patron,motif):
        if n==1:
            self.classes1.ajouterPatron(patron,motif)
        elif n==2:
            self.classes2.ajouterPatron(patron,motif)
        else:
            if debug: 
                # print ("le numéro de forme n'est pas dans [1,2]",n, file=logfile)
                print ("le numéro de forme n'est pas dans [1,2]",n)

    def ajouterPaire(self,forme1,forme2):
        self.classes1.ajouterPaire(forme1,forme2)
        self.classes2.ajouterPaire(forme2,forme1)
        
    def calculerClasses(self):
        return(self.classes1,self.classes2)

    
class classesPaire:
    '''
    Gestion des patrons, des classes et des transformations
    
    ajouterPatron : ajoute un patron et son motif associé (MGL)
    ajouterPaire : ajoute une paire de formes, calcule la classe de la forme1 et la règle sélectionnée
    sortirForme : cacule les formes de sortie correspondant à la forme1 avec leurs coefficients respectifs
    '''
    def __init__(self,case1,case2):
        self.case1=case1
        self.case2=case2
        self.nom=case1+"-"+case2
        self.classe={}
        self.nbClasse={}
        self.patrons={}
        self.entree={}
        self.sortie={}
        self.classeCF={}
        self.nbClasseCF={}
    
    def ajouterPatron(self,patron,motif):
        self.patrons[patron]=motif
        (entree,sortie)=patron.split("-")
        self.entree[patron]=entree.replace(u".",u"(.)")
        self.sortie[patron]=remplacementSortie(sortie)
    
    def ajouterPaire(self,forme1,forme2):
        '''
        on calcule la classe de la paire idClasseForme et la règle sélectionnée
        on incrémente le compteur de la classe et celui de la règle sélectionnée à l'intérieur de la classe
        '''
        classeFormeCF=[]
        regleFormeCF=""
        classeForme=[]
        regleForme=""
        for patron in self.patrons:
            filterF1=".*"+patron.split("-")[0]+"$"
            if re.match(filterF1,forme1):
                classeFormeCF.append(patron)
                if forme2==re.sub(self.entree[patron]+"$",self.sortie[patron],forme1):
                    regleFormeCF=patron
            filterF1=self.patrons[patron]
            if re.match(filterF1,forme1):
                classeForme.append(patron)
                '''
                le +"$" permet de forcer l'alignement à droite pour les transformations suffixales
                '''
                if forme2==re.sub(self.entree[patron]+"$",self.sortie[patron],forme1):
                    regleForme=patron
        idClasseFormeCF=", ".join(classeFormeCF)
        if not idClasseFormeCF in self.classeCF:
            self.classeCF[idClasseFormeCF]={}
            self.nbClasseCF[idClasseFormeCF]=0
        if not regleFormeCF in self.classeCF[idClasseFormeCF]:
            self.classeCF[idClasseFormeCF][regleFormeCF]=0
        self.nbClasseCF[idClasseFormeCF]+=1
        self.classeCF[idClasseFormeCF][regleFormeCF]+=1
        
        idClasseForme=", ".join(classeForme)
        if not idClasseForme in self.classe:
            self.classe[idClasseForme]={}
            self.nbClasse[idClasseForme]=0
        if not regleForme in self.classe[idClasseForme]:
            self.classe[idClasseForme][regleForme]=0
        self.nbClasse[idClasseForme]+=1
        self.classe[idClasseForme][regleForme]+=1

    def sortirForme(self,forme,contextFree=False):
        classeForme=[]
        sortieForme={}
        for patron in self.patrons:
            if contextFree:
                filterF1=".*"+patron.split("-")[0]+"$"
            else:
                filterF1=self.patrons[patron]
            if re.match(filterF1,forme):
                classeForme.append(patron)
        if classeForme:
            idClasseForme=", ".join(classeForme)
            if contextFree:
                nbClasse=self.nbClasseCF
                classe=self.classeCF
            else:
                nbClasse=self.nbClasse
                classe=self.classe
            if idClasseForme in nbClasse:
                nTotal=nbClasse[idClasseForme]
                for patron in classe[idClasseForme]:
                    sortie=re.sub(self.entree[patron]+"$",self.sortie[patron],forme)
                    sortieForme[sortie]=float(classe[idClasseForme][patron])/nTotal
            else:
                if debug: 
                    # print (forme, file=logfile)
                    # print ("pas de classe",idClasseForme, file=logfile)
                    # print ("%.2f par forme de sortie" % (float(1)/len(classeForme)), file=logfile)
                    print (forme)
                    print ("pas de classe",idClasseForme)
                    print ("%.2f par forme de sortie" % (float(1)/len(classeForme)))
                nTotal=len(classeForme)
                for patron in classeForme:
                    sortie=re.sub(self.entree[patron]+"$",self.sortie[patron],forme)
                    sortieForme[sortie]=float(1)/nTotal
        else:
            if debug:
                # print (forme, file=logfile) 
                # print ("pas de patron", file=logfile)
                print (forme) 
                print ("pas de patron")
        return sortieForme
        

### Ouvertures

In [4]:
with open("/Users/gilles/ownCloud/Recherche/Boye/HDR/Data/L4L/IMM21/IMM21-01-X-Regles.pkl") as inFile:
    reglesL4L=pickle.load(inFile)
# reglesL4L[(u'ps2P', u'is1S')].patrons

In [5]:
with open("/Users/gilles/ownCloud/Recherche/Boye/HDR/Data/L4L/IMM21/IMM21-00-X-Regles.pkl") as inFile:
    reglesHead=pickle.load(inFile)
# reglesHead[(u'ps2P', u'is1S')].patrons

In [6]:
with open("/Users/gilles/ownCloud/Recherche/Boye/HDR/Data/L4L/IMM21/IMM21-02-X-Regles.pkl") as inFile:
    reglesTail=pickle.load(inFile)
# reglesTail[(u'ps2P', u'is1S')].patrons

In [7]:
with open("/Users/gilles/ownCloud/Recherche/Boye/HDR/Data/L4L/IMM21/IMM21-01-X-Morphomes-Regles.pkl") as inFile:
    reglesMorphomes=pickle.load(inFile)
# reglesTail[(u'ps2P', u'is1S')].patrons

### Manipulations

In [35]:
paire=(u'ppFS', u'inf')
nRegles="L4L Head Tail".split(" ")
for n,regles in enumerate([reglesL4L,reglesHead,reglesTail]):
    print nRegles[n],regles[paire].nom
    print
    for k,v in regles[paire].patrons.iteritems():
        print k,v
    print
    print "================="
    print

L4L ppFS-inf

e-Etr ^(.*)ne$
Ert-rir ^(.*[uOo][fv])Ert$
y-avwar ^(.*)y$
iz-âdr ^(.*)priz$
zy-dr ^(.*)kuzy$
y-Etr ^(.*[pk][E96aO][mnr])y$
O.t-u.ir ^mOrt$
iz-Erir ^(.*[E96aOêûâô])kiz$
- ^(.*[ptkbdgfsSvzZmnJNjlrwHiyEe926auOoêûâô][ptkbdgfsSvzZmnJNjlrwHE96aOêûâô])e$
oz-Or ^(.*)kloz$
Eky-ivr ^vEky$
iz-war ^(.*)asiz$
-r ^(.*[ptkbdgfsSvzZmnJNjlrwHiyEe926auOoêûâô][iye2])$
y-ir ^(.*[ptbdfsvzmnlr])y$
ly-dr ^muly$
t-dr ^(.*[ptkbdgfsSvzZjlrwH][iyEe926uOoêûô])t$
y-uvwar ^(.*[E96O])my$
y-war ^(.*[fsSvzZlr])y$
t-r ^(.*[ptkbdgfsSvzZjrwH][iEe])t$
y-r ^(.*[ptbdfsvzrE96O][jrwHiyEe926auOoêûâô][ptkbdg])y$
y-6vwar ^(.*[tdsz])y$
y-watr ^akry$
i-r ^(.*)sHivi$
z-r ^sirkôsiz$
iz-Etr ^(.*)miz$


Head ppFS-inf

e-Etr ^(.*)ne$
Ert-rir ^(.*[uOo][fv])Ert$
y-avwar ^y$
iz-âdr ^(.*[rE6a])priz$
zy-dr ^(.*)kuzy$
y-Etr ^(.*[pk][E96aO][mnr])y$
iz-Erir ^(.*[E96aOêûâô])kiz$
- ^(.*[ptkbdgfsSvzZmnJNjlrwHiyEe926auOoêûâô][ptkbdgfsSvzZmnJNjlrwHE96Oêûô])e$
oz-Or ^kloz$
Eky-ivr ^vEky$
iz-war ^asiz$
-r ^(.*[ptkbdgfsSvzZmnJNjlrwHE96aO

In [42]:
paire=(u'fi3S', u'ii3S')
for k,v in reglesL4L[paire].patrons.iteritems():
    print k,v
    print k,reglesMorphomes[paire].patrons[k]
    print


ra-sE ^(.*[ptkbdgfsSvzZmnJNjlrwHE96aOêûâô])ira$
ra-sE ^(.*[ptkbdgfsSvzZmnJNjlrwHiyEe926auOoêûâô])ira$

9.6ra-6.E ^(.*)dEZ9n6ra$
9.6ra-6.E ^(.*)dEZ9n6ra$

jEra-EjE ^(.*)sjEra$
jEra-EjE ^(.*)sjEra$

ira-E ^(.*[ptkbdgfsSvzZmnJNjrwHiyEe926auOoêûâô][fvjrwHiyEe926auOoêûâô][ptkbdgfsSvzZmnJNjr])ira$
ira-E ^(.*[ptkbdgfsSvzZmnJNjrwHiyEe926auOoêûâô][fvjrwHiyEe926auOoêûâô][ptkbdgfsSvzZmnJNjr])ira$

ra-wE ^(.*[ptkfsS][lr])ura$
ra-wE ^(.*[ptkbdgfsSvzZ][lr])ura$

Odra-alE ^(.*[fv])Odra$
Odra-alE ^(.*[fv])Odra$

6r-E ^sâbl6r$
6r-E ^sâbl6r$

d6ra- ^sud6ra$
d6ra- ^sud6ra$

ra-E ^(.*[ptkbdgfsSvzZmnJNjlrwHE96Oêûô][mnJNjlrwHiyEe926auOoêûâô][ptkbdgfsSvzZjrE6])ra$
ra-E ^(.*[ptkbdgfsSvzZmnJNjlrwHE96Oêûô][mnJNjlrwHiyEe926auOoêûâô][ptkbdgfsSvzZjrE6])ra$

O.6ra-o.E ^(.*)O([ptbdfsvzmnlr])6ra$
O.6ra-o.E ^(.*)O([ptbdfsvzmnlr])6ra$

Ora-avE ^sOra$
Ora-avE ^sOra$

9.6ra-2.E ^(.*[ptkbdgfsSvzZmnJNlr])9([ptkbdgfsSvzZmnJNjlr])6ra$
9.6ra-2.E ^(.*[ptkbdgfsSvzZmnJNjlr])9([ptkbdgfsSvzZmnJNjlr])6ra$

ra-zE ^(.*[ptbdfsvzjlH][i

In [32]:
morphomes=pd.read_pickle("/Users/gilles/ownCloud/Recherche/Boye/HDR/Data/L4L/IMM21/IMM21-01-X-Morphomes.pkl")
print len(morphomes["case morphome".split(" ")].drop_duplicates())
morphomes["case morphome".split(" ")].drop_duplicates()

32


Unnamed: 0,case,morphome
0,inf,inf
1,ppMS,ppMP/ppMS
2,ppFS,ppFS/ppFP
3,pi3S,pi2S/pi3S
4,pP,pP
6,pi1S,pi1S
7,pi3P,pi3P
8,pi2P,pi2P
11,ai3S,ai3S/ai2S/is3S
12,fi3S,fi2S/fi3S


In [46]:
reglesL4L[(u'fi3S', u'ii3S')].patrons

{u'6r-E': u'^s\xe2bl6r$',
 u'6ra-E': u'^(.*[ptkbdgfsSvzZmnJNjlrwHiyEe926auOo\xea\xfb\xe2\xf4][ptkbdgfsSvzZmnJNjlr])6ra$',
 u'9.6ra-2.E': u'^(.*[ptkbdgfsSvzZmnJNlr])9([ptkbdgfsSvzZmnJNjlr])6ra$',
 u'9.6ra-6.E': u'^(.*)dEZ9n6ra$',
 u'E.6ra-6.E': u'^(.*[ptkbdgfsSvzZmnJNlr])E([ptbdfsvzmnl])6ra$',
 u'Era-wajE': u'^(.*)vEra$',
 u'O.6ra-o.E': u'^(.*)O([ptbdfsvzmnlr])6ra$',
 u'Odra-alE': u'^(.*[fv])Odra$',
 u'Ora-avE': u'^sOra$',
 u'd6ra-': u'^sud6ra$',
 u'dra-lE': u'^vudra$',
 u'dra-zE': u'^(.*)kudra$',
 u'ira-E': u'^(.*[ptkbdgfsSvzZmnJNjrwHiyEe926auOo\xea\xfb\xe2\xf4][fvjrwHiyEe926auOo\xea\xfb\xe2\xf4][ptkbdgfsSvzZmnJNjr])ira$',
 u'ira-alE': u'^ira$',
 u'ira-jE': u'^(.*[ptkbdgfsSvzZmnJNlr])ira$',
 u'jEra-EjE': u'^(.*)sjEra$',
 u'j\xeadra-6nE': u'^(.*[ptbdfsvz])j\xeadra$',
 u'ora-avE': u'^ora$',
 u'r-sE': u'^sErvir$',
 u'ra-E': u'^(.*[ptkbdgfsSvzZmnJNjlrwHE96O\xea\xfb\xf4][mnJNjlrwHiyEe926auOo\xea\xfb\xe2\xf4][ptkbdgfsSvzZjrE6])ra$',
 u'ra-HE': u'^(.*)yra$',
 u'ra-jE': u'^(.*[ptkbdgfsSvzZmnJN

# Votes Max et InterMax

In [83]:
cases=sorted(list(set(paire[0] for paire in reglesHead.keys())))
cases

[u'ai1P',
 u'ai1S',
 u'ai2P',
 u'ai2S',
 u'ai3P',
 u'ai3S',
 u'fi1P',
 u'fi1S',
 u'fi2P',
 u'fi2S',
 u'fi3P',
 u'fi3S',
 u'ii1P',
 u'ii1S',
 u'ii2P',
 u'ii2S',
 u'ii3P',
 u'ii3S',
 u'inf',
 u'is1P',
 u'is1S',
 u'is2P',
 u'is2S',
 u'is3P',
 u'is3S',
 u'pI1P',
 u'pI2P',
 u'pI2S',
 u'pP',
 u'pc1P',
 u'pc1S',
 u'pc2P',
 u'pc2S',
 u'pc3P',
 u'pc3S',
 u'pi1P',
 u'pi1S',
 u'pi2P',
 u'pi2S',
 u'pi3P',
 u'pi3S',
 u'ppFP',
 u'ppFS',
 u'ppMP',
 u'ppMS',
 u'ps1P',
 u'ps1S',
 u'ps2P',
 u'ps2S',
 u'ps3P',
 u'ps3S']

In [398]:
sources={"abasourdir":{"ppMS":u"abazurdi",
             "ppFS":u"abazurdi",
             "ppMP":u"abazurdi"},
         "accroire":{"inf":u"akrwar"},
         "affermir":{"inf":u"afErmir",
             "pi3S":u"afErmi"},
         "agonir":{"ppFS":u"agOni"},
         "appendre":{"inf":u"apâdr"},
        }

In [359]:
dfS0=pd.read_csv("/Users/gilles/ownCloud/Recherche/Boye/HDR/Data/L4L/IMM21/IMM21-00-X-paradigmes.csv",sep=";",index_col=0,encoding="utf8")
lexemes=dfS0.lexeme.tolist()

### Calcul des élections

In [375]:
def votes(sourceCells,bLatex=False):
    targetParadigm={}
    targetDistributio#ns={}
    localDebug=0
    for targetCell in targetCells:
        if localDebug: print targetCell
        targetInit=True
        for sourceCell,sourceForm in sourceCells.iteritems():
            distribution=reglesHead[(sourceCell, targetCell)].sortirForme(sourceForm)
            if localDebug:
                print sourceCell, sourceForm,
                print reglesHead[(sourceCell, targetCell)].sortirForme(sourceForm)
            if targetInit:
                targetValues=set(distribution.keys())
                targetDistribution=distribution
                targetInit=False
            else:
                if localDebug: print "avant",targetValues
                targetValues=targetValues&set(distribution.keys())
                if localDebug: print "après",targetValues
                for targetValue in targetValues:
                    targetDistribution[targetValue]+=distribution[targetValue]
        targetParadigm[targetCell]=targetValues
        targetDistributions[targetCell]=targetDistribution
        if localDebug: print
    electMax={}
    for target in cases:
        forms=targetDistributions[target]
        if target in sourceCells:
            if bLatex:
                electMax[target]=ur"\textbf{%s}"%sourceCells[target]
            else:
                electMax[target]=sourceCells[target]
            # print target,"=",sourceCells[target]
        elif forms:
            # print target,">",max(forms, key=forms.get),max(forms.values())
            electMax[target]=max(forms, key=forms.get)
        else:
            # print target,"pas de forme"
            if bLatex:
                electMax[target]="?"
            else:
                electMax[target]=np.NaN
    electInterMax={}
    for target in cases:
        commonForms={k:v for k,v in targetDistributions[target].iteritems() if k in forms}
        if target in sourceCells:
            if bLatex:
                electInterMax[target]=ur"\textbf{%s}"%sourceCells[target]
            else:
                electInterMax[target]=sourceCells[target]
            # print target,"=",sourceCells[target]
        elif commonForms:
            # print target,">",max(commonForms, key=commonForms.get),max(commonForms.values())
            electInterMax[target]=max(commonForms, key=commonForms.get)
        else:
            # print target,"pas de forme"
            if bLatex:
                electInterMax[target]="?"
            else:
                electInterMax[target]=np.NaN
    return electMax,electInterMax

In [380]:
targetCells=cases
dElectMax={}
dElectInterMax={}
for n,source in enumerate(lexemes[:]):
    if n%10==0: print n,
    sourceCells=dfS0[dfS0.lexeme.str.contains(ur"^%s$"%source)].T.dropna().drop("lexeme").to_dict().values()[0]
    # sourceCells=sources[source]
    electMax,electInterMax=votes(sourceCells)
    dElectMax[source]=electMax
    dElectInterMax[source]=electInterMax

0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170 180 190 200 210 220 230 240 250 260 270 280 290 300 310 320 330 340 350 360 370 380 390 400 410 420 430 440 450 460 470 480 490 500 510 520 530 540 550 560 570 580 590 600 610 620 630 640 650 660 670 680 690 700 710 720 730 740 750 760 770 780 790 800 810 820 830 840 850 860 870 880 890 900 910 920 930 940 950 960 970 980 990 1000 1010 1020 1030 1040 1050 1060 1070 1080 1090 1100 1110 1120 1130 1140 1150 1160 1170 1180 1190 1200 1210 1220 1230 1240 1250 1260 1270 1280 1290 1300 1310 1320 1330 1340 1350 1360 1370 1380 1390 1400 1410 1420 1430 1440 1450 1460 1470 1480 1490 1500 1510 1520 1530 1540 1550 1560 1570 1580 1590 1600 1610 1620 1630 1640 1650 1660 1670 1680 1690 1700 1710 1720 1730 1740 1750 1760 1770 1780 1790 1800 1810 1820 1830 1840 1850 1860 1870 1880 1890 1900 1910 1920 1930 1940 1950 1960 1970 1980 1990 2000 2010 2020 2030 2040 2050 2060 2070 2080 2090 2100 2110 2120 2130 2140 2150 2160 2170 2180 2190 2200 2210 2

In [393]:
dfMax=pd.DataFrame.from_records(dElectMax).T.reset_index()

In [394]:
dfMax.to_csv("/Users/gilles/ownCloud/Recherche/Boye/HDR/Data/L4L/IMM21/IMM21-00-X-paradigmes-Max.csv",sep=";",encoding="utf8")

In [395]:
dfInterMax=pd.DataFrame.from_records(dElectInterMax).T.reset_index()

In [396]:
dfInterMax.to_csv("/Users/gilles/ownCloud/Recherche/Boye/HDR/Data/L4L/IMM21/IMM21-00-X-paradigmes-InterMax.csv",sep=";",encoding="utf8")

In [456]:
source=u"affermir"
targetCells=cases
sourceCells=sources[source]
targetParadigm={}
targetDistributions={}
localDebug=0
for targetCell in targetCells:
    if localDebug: print targetCell
    targetInit=True
    for sourceCell,sourceForm in sourceCells.iteritems():
        distribution=reglesHead[(sourceCell, targetCell)].sortirForme(sourceForm)
        if localDebug:
            print sourceCell, sourceForm,
            print reglesHead[(sourceCell, targetCell)].sortirForme(sourceForm)
        if targetInit:
            targetValues=set(distribution.keys())
            targetDistribution=distribution
            targetInit=False
        else:
            if localDebug: print "avant",targetValues
            targetValues=targetValues&set(distribution.keys())
            if localDebug: print "après",targetValues
            for targetValue in targetValues:
                targetDistribution[targetValue]+=distribution[targetValue]
    targetParadigm[targetCell]=targetValues
    targetDistributions[targetCell]=targetDistribution
    if localDebug: print

In [457]:
electMax={}
for target in cases:
    forms=targetDistributions[target]
    if target in sourceCells:
        electMax[target]=ur"\textbf{%s}"%sourceCells[target]
        print target,"=",sourceCells[target]
    elif forms:
        print target,">",max(forms, key=forms.get),max(forms.values())
        electMax[target]=max(forms, key=forms.get)
    else:
        print target,"pas de forme"
        electMax[target]="?"

ai1P pas de forme
ai1S > afErmi 1.0
ai2P pas de forme
ai2S pas de forme
ai3P > afErmir 2.0
ai3S > afErmi 1.5
fi1P > afErmirô 1.38888888889
fi1S > afErmirE 1.51724137931
fi2P > afErmire 1.42105263158
fi2S > afErmira 1.36842105263
fi3P > afErmirô 1.48
fi3S > afErmira 1.57575757576
ii1P > afErmisjô 1.32142857143
ii1S > afErmisE 1.02666666667
ii2P > afErmisje 0.881118881119
ii2S > afErmisE 1.33333333333
ii3P > afErmisE 1.07333333333
ii3S > afErmisE 1.34680851064
inf = afErmir
is1P pas de forme
is1S pas de forme
is2P pas de forme
is2S pas de forme
is3P pas de forme
is3S pas de forme
pI1P > afErmisô 0.946428571429
pI2P > afErmise 1.19345238095
pI2S > afErmi 2.0
pP > afErmisâ 1.12834224599
pc1P > afErmirjô 2.0
pc1S > afErmirE 1.47619047619
pc2P > afErmirje 1.5
pc2S > afErmirE 1.41666666667
pc3P > afErmirE 1.42857142857
pc3S > afErmirE 1.56666666667
pi1P > afErmô 0.888888888889
pi1S > afErmi 2.0
pi2P > afErmise 1.25617283951
pi2S > afErmi 2.0
pi3P > afErmis 0.897435897436
pi3S = afErmi
ppFP > 

In [458]:
electInterMax={}
for target in cases:
    commonForms={k:v for k,v in targetDistributions[target].iteritems() if k in forms}
    if target in sourceCells:
        electInterMax[target]=ur"\textbf{%s}"%sourceCells[target]
        print target,"=",sourceCells[target]
    elif commonForms:
        print target,">",max(commonForms, key=commonForms.get),max(commonForms.values())
        electInterMax[target]=max(commonForms, key=commonForms.get)
    else:
        print target,"pas de forme"
        electInterMax[target]="?"

ai1P pas de forme
ai1S > afErmi 1.0
ai2P pas de forme
ai2S pas de forme
ai3P pas de forme
ai3S > afErmi 1.5
fi1P pas de forme
fi1S pas de forme
fi2P pas de forme
fi2S pas de forme
fi3P pas de forme
fi3S pas de forme
ii1P pas de forme
ii1S pas de forme
ii2P pas de forme
ii2S pas de forme
ii3P pas de forme
ii3S pas de forme
inf = afErmir
is1P pas de forme
is1S pas de forme
is2P pas de forme
is2S pas de forme
is3P pas de forme
is3S pas de forme
pI1P pas de forme
pI2P pas de forme
pI2S > afErmi 2.0
pP pas de forme
pc1P pas de forme
pc1S pas de forme
pc2P pas de forme
pc2S pas de forme
pc3P pas de forme
pc3S pas de forme
pi1P pas de forme
pi1S > afErmi 2.0
pi2P pas de forme
pi2S > afErmi 2.0
pi3P > afErmis 0.897435897436
pi3S = afErmi
ppFP > afErmi 1.71641791045
ppFS > afErmi 1.47153598281
ppMP > afErmi 1.70883940621
ppMS > afErmi 1.16666666667
ps1P pas de forme
ps1S > afErmis 1.08846153846
ps2P pas de forme
ps2S > afErmis 1.1
ps3P pas de forme
ps3S > afErmis 1.07291666667


In [459]:
electInterMax

{u'ai1P': '?',
 u'ai1S': u'afErmi',
 u'ai2P': '?',
 u'ai2S': '?',
 u'ai3P': '?',
 u'ai3S': u'afErmi',
 u'fi1P': '?',
 u'fi1S': '?',
 u'fi2P': '?',
 u'fi2S': '?',
 u'fi3P': '?',
 u'fi3S': '?',
 u'ii1P': '?',
 u'ii1S': '?',
 u'ii2P': '?',
 u'ii2S': '?',
 u'ii3P': '?',
 u'ii3S': '?',
 u'inf': u'\\textbf{afErmir}',
 u'is1P': '?',
 u'is1S': '?',
 u'is2P': '?',
 u'is2S': '?',
 u'is3P': '?',
 u'is3S': '?',
 u'pI1P': '?',
 u'pI2P': '?',
 u'pI2S': u'afErmi',
 u'pP': '?',
 u'pc1P': '?',
 u'pc1S': '?',
 u'pc2P': '?',
 u'pc2S': '?',
 u'pc3P': '?',
 u'pc3S': '?',
 u'pi1P': '?',
 u'pi1S': u'afErmi',
 u'pi2P': '?',
 u'pi2S': u'afErmi',
 u'pi3P': u'afErmis',
 u'pi3S': u'\\textbf{afErmi}',
 u'ppFP': u'afErmi',
 u'ppFS': u'afErmi',
 u'ppMP': u'afErmi',
 u'ppMS': u'afErmi',
 u'ps1P': '?',
 u'ps1S': u'afErmis',
 u'ps2P': '?',
 u'ps2S': u'afErmis',
 u'ps3P': '?',
 u'ps3S': u'afErmis'}

### Tableaux espaces thématiques

In [460]:
def sampa2api(sampa):
    if isinstance(sampa,str):
        api=sampa.decode("utf8")
    else:
        api=sampa
    api=api.replace(u'n"',u'n') 
    api=api.replace(u't"',u't') 
    api=api.replace(u'z"',u'z') 
    api=api.replace(u'R"',u'ʁ') 
    api=api.replace(u'p"',u'p') 
    api=api.replace(u'S',u'ʃ') 
    api=api.replace(u'Z',u'ʒ')
    api=api.replace(u'N',u'ŋ')
    api=api.replace(u'J',u'ɲ')
    api=api.replace(u'r',u'ʁ') 
    api=api.replace(u'H',u'ɥ')
    api=api.replace(u'E',u'ɛ')
    api=api.replace(u'2',u'ø')
    api=api.replace(u'9',u'œ')
    api=api.replace(u'6',u'ə')
    api=api.replace(u'O',u'ɔ')
    api=api.replace(u'è',u'e')   
    api=api.replace(u'ò',u'o')    
    api=api.replace(u'â',u'ɑ̃')   
    api=api.replace(u'ê',u'ɛ̃')   
    api=api.replace(u'û',u'œ̃')  
    api=api.replace(u'ô',u'ɔ̃')       
    api=api.replace(u'@',u'ə')
    api=api.replace(u'R',u'ʁ') 
    return api

In [461]:
coulMF=["orange","brown","yellow","lime",
          "green","teal","lightgray","pink","cyan","magenta"]
coulMT=["blue","darkgray","purple","red","olive","violet"]
couleurCaseMorphome={
    "pi1S":"brown!25","pi2S":"brown","pi3S":"brown",
    "pi3P":"brown!75",
    "ii1S":"orange","ii2S":"orange","ii3S":"orange","ii3P":"orange",
    "pi1P":"orange!25","pi2P":"orange!25","ii1P":"orange!25","ii2P":"orange!25",
    "fi1S":"lime","fi2P":"lime",
    "pc1S":"lime","pc2S":"lime","pc3S":"lime","pc3P":"lime",
    "pc1P":"lime!25","pc2P":"lime!25",
    "fi2S":"lime!75","fi3S":"lime!75",
    "fi1P":"lime!50","fi3P":"lime!50",
    "ps1S":"teal","ps2S":"teal","ps3S":"teal","ps3P":"teal",
    "ps1P":"teal!25","ps2P":"teal!25",
    "ai2S":"pink!75","ai3S":"pink!75","is3S":"pink!75",
    "is1S":"pink","is2S":"pink","is3P":"pink",
    "ai1S":"pink!25","ai1P":"pink!25","ai2P":"pink!25","ai3P":"pink!25",
    "is1P":"pink!25","is2P":"pink!25",
    "pI2S":"yellow!25","pI1P":"blue!25","pI2P":"blue!25",
    "ppMS":"cyan","ppMP":"cyan",
    "ppFS":"magenta","ppFP":"magenta",
    "inf":"cyan!25","pP":"blue!10"
            }
couleurCaseStem={
    "pi1S":"brown","pi2S":"brown","pi3S":"brown",
    "pi3P":"brown!75",
    "ii1S":"orange","ii2S":"orange","ii3S":"orange","ii3P":"orange",
    "pi1P":"orange","pi2P":"orange","ii1P":"orange","ii2P":"orange",
    "fi1S":"lime","fi2P":"lime",
    "pc1S":"lime","pc2S":"lime","pc3S":"lime","pc3P":"lime",
    "pc1P":"lime","pc2P":"lime",
    "fi2S":"lime","fi3S":"lime",
    "fi1P":"lime","fi3P":"lime",
    "ps1S":"teal!50","ps2S":"teal!50","ps3S":"teal!50","ps3P":"teal!50",
    "ps1P":"blue!50","ps2P":"blue!50",
    "ai2S":"pink","ai3S":"pink","is3S":"pink",
    "is1S":"pink","is2S":"pink","is3P":"pink",
    "ai1S":"pink","ai1P":"pink","ai2P":"pink","ai3P":"pink",
    "is1P":"pink","is2P":"pink",
    "pI2S":"yellow!50","pI1P":"blue!25","pI2P":"blue!25",
    "ppMS":"magenta!50","ppMP":"magenta!50",
    "ppFS":"magenta!50","ppFP":"magenta!50",
    "inf":"cyan!50","pP":"blue!10"
            }
%store couleurCaseStem

listLimites=[2,8,32,128,512]
listLimCoul=["red","pink","orange","green","teal","white"]


Stored 'couleurCaseStem' (dict)


In [462]:
def makeTabular(dictColours,title="",coulLim=False, cat="V"):
    dLineCode={"pi":u"Présent","ii":u"Imparfait","fi":u"Futur", "pc":u"Conditionnel", 
            "ps":u"Subj. prés.","ai":u"Passé", "is":u"Subj. imparf.","pI":u"Impératif"}
    tabular=[]
    def makeLine6(tenseCode):
        line=[dLineCode[tenseCode]]
        for person in [per+nb for nb in ["S","P"] for per in ["1","2","3"]]:
            case=tenseCode+person
            if case in dictColours:
                line.append(r"\cellcolor{%s}%s"%(dictColours[case],case))
            else:
                line.append(r"\cellcolor{%s}%s"%("white",case))
        return r" & ".join(line)+r"\\"

    def makeLine3(tenseCode):
        line=[dLineCode[tenseCode]]
        for person in [per+nb for nb in ["S","P"] for per in ["1","2","3"]]:
            if person in ["2S","1P","2P"]:
                case=tenseCode+person
                if case in dictColours:
                    line.append(r"\cellcolor{%s}%s"%(dictColours[case],case))
                else:
                    line.append(r"\cellcolor{%s}%s"%("white",case))
#                line.append(r"\cellcolor{%s}%s"%(dictColours[case],case))
            else:
                line.append(r"---")
        return r" & ".join(line)+r"\\"
    
    def makeLineNF():
        line=[u"Inf/P. prés./P. passé"]
        for case in ["inf","pP","ppMS","ppMP","ppFS","ppFP"]:
            if case in dictColours:
                line.append(r"\cellcolor{%s}%s"%(dictColours[case],case))
            else:
                line.append(r"\cellcolor{%s}%s"%("white",case))
#            line.append(r"\cellcolor{%s}%s"%(dictColours[case],case))
        return r" & ".join(line)+r"\\"

    def makeLineMF(nombre):
        line=[]
        for genre in "mf":
            case=genre+nombre
            if case in dictColours:
                line.append(r"\cellcolor{%s}%s"%(dictColours[case],case))
            else:
                line.append(r"\cellcolor{%s}%s"%("black",case))
        return r" & ".join(line)+r"\\"
    
    def makeLineCoulLim():
        line=[]
        for numLimite,limite in enumerate(listLimites):
            line.append(r"\cellcolor{%s}%s"%(listLimCoul[numLimite],"$<$"+str(limite)))
        return r"\hline\hline "+r" & ".join(line)+r"\\"
        
    if cat=="V":
        top=[
            r"\begin{center}",
            r"\begin{tabular}{rcccccc}",
            r"\hline"
            ]
        bottom=[
            r"\hline",
            r"\end{tabular}\\",
            title,
            r"\end{center}",
            r"\bigskip",
            r""
            ]
        tabular.append("\n".join(top))
        for tenseCode in ["pi","ii","fi","pc", "ps","ai", "is"]:
            tabular.append(makeLine6(tenseCode))
        tabular.append(makeLine3("pI"))
        tabular.append(makeLineNF())
    elif cat=="A":
        top=[
            r"\begin{center}",
            r"\begin{tabular}{cc}",
            r"\hline"
            ]
        bottom=[
            r"\hline",
            r"\end{tabular}\\",
            title,
            r"\end{center}",
            r"\bigskip",
            r""
            ]
        tabular.append("\n".join(top))
        for number in "sp":
            tabular.append(makeLineMF(number))
    if coulLim:
        tabular.append(makeLineCoulLim())
    tabular.append("\n".join(bottom))
    return "\n".join(tabular)

### Tableaux des Votes directs
- Max
- Common-Max

In [463]:
paradigmeTabular=makeTabular(couleurCaseStem)
for case in cases:
    paradigmeTabular=paradigmeTabular.replace(case,sampa2api(electMax[case]))
pyperclip.copy(source+u" — Max\n"+paradigmeTabular)

In [464]:
paradigmeTabular=makeTabular(couleurCaseStem)
for case in cases:
    paradigmeTabular=paradigmeTabular.replace(case,sampa2api(electInterMax[case]))
pyperclip.copy(source+u" — Common-Max\n"+paradigmeTabular)

# Tableaux des Votes SWIM

In [465]:
dfS0=pd.read_csv("/Users/gilles/ownCloud/Recherche/Boye/HDR/Data/L4L/IMM21/IMM21-00-X-paradigmes.csv",sep=";",index_col=0,encoding="utf8")
dfS0.head()

Unnamed: 0,lexeme,ai1P,ai1S,ai2P,ai2S,ai3P,ai3S,fi1P,fi1S,fi2P,...,ppFP,ppFS,ppMP,ppMS,ps1P,ps1S,ps2P,ps2S,ps3P,ps3S
0,abaisser,,,,,,,,abEs6rE,,...,abEse,abEse,abEse,abEse,,,,,,
1,abandonner,,abâdOnE,,,abâdOnEr,abâdOna,abâdOn6rô,abâdOn6rE,abâdOn6re,...,abâdOne,abâdOne,abâdOne,abâdOne,,,,abâdOn,,abâdOn
2,abasourdir,,,,,,,,,,...,,abazurdi,abazurdi,abazurdi,,,,,,
3,abattre,,,,,abatir,abati,abatrô,abatrE,abatre,...,abaty,abaty,abaty,abaty,,abat,,abat,,abat
4,abdiquer,,,,,,,,,,...,,,,abdike,,,,,,


In [466]:
parS0=dfS0[dfS0.lexeme.str.contains(u"^%s$"%source)].T.dropna().drop("lexeme").to_dict().values()[0]
paradigmeTabular=makeTabular(couleurCaseStem)
for case in cases:
    if case in parS0:
        paradigmeTabular=paradigmeTabular.replace(case,ur"\textbf{%s}"%sampa2api(parS0[case]))
    else:
        paradigmeTabular=paradigmeTabular.replace(case,"?")
pyperclip.copy(source+u" — Input\n"+paradigmeTabular)

### Extraction des paradigmes

In [467]:
def texPar(df):
    par=df[df.lexeme.str.contains(u"^%s$"%source)].T.dropna().drop("lexeme").to_dict().values()[0]
    paradigmeTabular=makeTabular(couleurCaseStem)
    for case in cases:
        if case in parS0:
            paradigmeTabular=paradigmeTabular.replace(case,ur"\textbf{%s}"%sampa2api(par[case]))
        elif case in par:
            paradigmeTabular=paradigmeTabular.replace(case,sampa2api(par[case]))
        else:
            paradigmeTabular=paradigmeTabular.replace(case,"?")
    return paradigmeTabular

In [493]:
source=u"clouer"

### Swim Syntaxique

In [483]:
dfS1=pd.read_csv("/Users/gilles/ownCloud/Recherche/Boye/HDR/Data/L4L/IMM21/IMM21-00-X-paradigmes-Swim1.csv",sep=";",index_col=0,encoding="utf8")
pyperclip.copy(source+u" — Swim1\n"+texPar(dfS1))

In [484]:
dfS1Stem=pd.read_csv("/Users/gilles/ownCloud/Recherche/Boye/HDR/Data/L4L/IMM21/IMM21-00-X-paradigmes-Swim1-StemSpace.csv",sep=";",index_col=0,encoding="utf8")
pyperclip.copy(source+u" — Swim1-StemSpace\n"+texPar(dfS1Stem))

In [485]:
dfS2Stem=pd.read_csv("/Users/gilles/ownCloud/Recherche/Boye/HDR/Data/L4L/IMM21/IMM21-00-X-paradigmes-Swim2-StemSpace.csv",sep=";",index_col=0,encoding="utf8")
pyperclip.copy(source+u" — Swim2-StemSpace\n"+texPar(dfS2Stem))

### Swim Morphomique

In [486]:
dfM1=pd.read_csv("/Users/gilles/ownCloud/Recherche/Boye/HDR/Data/L4L/IMM21/IMM21-00-X-Morphomes-Swim1.csv",sep=";",index_col=0,encoding="utf8")
pyperclip.copy(source+u" — Morphomes-Swim1\n"+texPar(dfM1))

In [496]:
dfM2=pd.read_csv("/Users/gilles/ownCloud/Recherche/Boye/HDR/Data/L4L/IMM21/IMM21-00-X-Morphomes-Swim2.csv",sep=";",index_col=0,encoding="utf8")
pyperclip.copy(source+u" — Morphomes-Swim2\n"+texPar(dfM2Stem))

In [498]:
dfM2Stem=pd.read_csv("/Users/gilles/ownCloud/Recherche/Boye/HDR/Data/L4L/IMM21/IMM21-00-X-Morphomes-Swim2-StemSpace.csv",sep=";",index_col=0,encoding="utf8")
pyperclip.copy(source+u" — Morphomes-Swim2-StemSpace\n"+texPar(dfM2Stem))

In [329]:
dfMorphomes=pd.read_pickle("/Users/gilles/ownCloud/Recherche/Boye/HDR/Data/L4L/IMM21/IMM21-00-X-Morphomes.pkl")
dMorphomes=dfMorphomes[["case","morphome"]].drop_duplicates().set_index("case").to_dict()["morphome"]
dMorphomes={k:v.split("/") for k,v in dMorphomes.iteritems()}
# dMorphomes

### Morphomes

In [330]:
dfM0=pd.read_csv("/Users/gilles/ownCloud/Recherche/Boye/HDR/Data/L4L/IMM21/IMM21-00-X-Morphomes.csv",sep=";",index_col=0,encoding="utf8")
for c in dfM0.columns.tolist():
    if c in dMorphomes and len(dMorphomes[c])>1:
        for col in dMorphomes[c]:
            if col!=c:
                dfM0[col]=dfM0[c]
pyperclip.copy(source+u" — Morphomes\n"+texPar(dfM0))

### Population des cases

In [506]:
def texParPop(dPop):
    par=dPop
    paradigmeTabular=makeTabular(couleurCaseStem)
    for case in cases:
        if case in parS0:
            paradigmeTabular=paradigmeTabular.replace(case,ur"\textbf{%s}"%par[case])
        elif case in par:
            paradigmeTabular=paradigmeTabular.replace(case,par[case])
        else:
            paradigmeTabular=paradigmeTabular.replace(case,"?")
    return paradigmeTabular


In [507]:
dPop={'inf': 3870, 'ppMS': 3866, 'pi3S': 3688, 'ppFS': 2677,
 'pi3P': 2439, 'pi1S': 2347, 'ppMP': 2309, 'pi2S': 2084,
 'pP': 2027, 'pI2S': 2023, 'pI2P': 1757, 'fi3S': 1635,
 'ppFP': 1608, 'ii3S': 1536, 'pi2P': 1301, 'fi1S': 1218,
 'ii1S': 1123, 'pc3S': 1080, 'ai3S': 1031, 'pi1P': 974,
 'pI1P': 932, 'fi3P': 896, 'ii2P': 826, 'pc1S': 782,
 'ii3P': 769, 'fi2P': 719, 'fi2S': 694, 'ii2S': 686,
 'fi1P': 654, 'pc3P': 444, 'pc2S': 436, 'ii1P': 426,
 'ps3S': 407, 'ps2S': 373, 'ai3P': 356, 'pc2P': 329,
 'ps1S': 231, 'ai1S': 197, 'pc1P': 108, 'ai2S': 57,
 'ai1P': 39, 'ps2P': 29, 'is3S': 28, 'ps3P': 26,
 'ps1P': 11, 'is1S': 10, 'ai2P': 6, 'is2P': 4,
 'is3P': 3, 'is2S': 3, 'is1P': 2}

In [508]:
dPop={k:str(v) for k,v in dPop.iteritems()}
print texParPop(dPop)

\begin{center}
\begin{tabular}{rcccccc}
\hline
Présent & \cellcolor{brown}2347 & \cellcolor{brown}2084 & \cellcolor{brown}\textbf{3688} & \cellcolor{orange}974 & \cellcolor{orange}1301 & \cellcolor{brown!75}2439\\
Imparfait & \cellcolor{orange}1123 & \cellcolor{orange}686 & \cellcolor{orange}1536 & \cellcolor{orange}426 & \cellcolor{orange}826 & \cellcolor{orange}769\\
Futur & \cellcolor{lime}1218 & \cellcolor{lime}694 & \cellcolor{lime}1635 & \cellcolor{lime}654 & \cellcolor{lime}719 & \cellcolor{lime}896\\
Conditionnel & \cellcolor{lime}782 & \cellcolor{lime}436 & \cellcolor{lime}1080 & \cellcolor{lime}108 & \cellcolor{lime}329 & \cellcolor{lime}444\\
Subj. prés. & \cellcolor{teal!50}231 & \cellcolor{teal!50}373 & \cellcolor{teal!50}407 & \cellcolor{blue!50}11 & \cellcolor{blue!50}29 & \cellcolor{teal!50}26\\
Passé & \cellcolor{pink}197 & \cellcolor{pink}57 & \cellcolor{pink}1031 & \cellcolor{pink}39 & \cellcolor{pink}6 & \cellcolor{pink}356\\
Subj. imparf. & \cellcolor{pink}10 & \ce