# Trouver les CF sous-jacentes dans les règles
Pour trouver les CF sous-jacentes dans les règles, on commence par calculer les contextes de transformation réciproque pour chaque paire de cases, c'est à dire pour chaque famille de règles de transformation

## Importations
- codecs pour les encodages
- pandas et numpy pour les calculs sur tableaux
- matplotlib pour les graphiques
- itertools pour les itérateurs sophistiqués (paires sur liste, ...)

In [119]:
# -*- coding: utf8 -*-
import codecs,glob
import features
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import itertools as it
import networkx as nx
import pickle,yaml
#%pylab inline
#pd.options.display.mpl_style = 'default'
debug=False

### Préparation des matrices de traits

In [120]:
features.add_config('bdlexique.ini')
fs=features.FeatureSystem('phonemes')

In [121]:
rep="/Users/gilles/ownCloud/Recherche/Boye/HDR/Data/Longitudinales/"
rep="/Volumes/gilles/Transfert/Copies-iMac-GB/2015-Data/Longitudinales/"
fichiers=glob.glob(rep+"*X-Regles.pkl")
samples=[f.rsplit("/",1)[-1].split("Regles")[0] for f in fichiers]
samples={int(s.split("-")[1]):s for s in samples}
samples

{0: 'Longitudinal-00-T10000-F3663-X-',
 1: 'Longitudinal-01-T20000-F5816-X-',
 2: 'Longitudinal-02-T30000-F7497-X-',
 3: 'Longitudinal-03-T40000-F8900-X-',
 4: 'Longitudinal-04-T50000-F10082-X-',
 5: 'Longitudinal-05-T60000-F11252-X-',
 6: 'Longitudinal-06-T70000-F12207-X-',
 7: 'Longitudinal-07-T80000-F13155-X-',
 8: 'Longitudinal-08-T90000-F14038-X-',
 9: 'Longitudinal-09-T100000-F14858-X-',
 10: 'Longitudinal-10-T110000-F15616-X-',
 11: 'Longitudinal-11-T120000-F16280-X-',
 12: 'Longitudinal-12-T130000-F16948-X-',
 13: 'Longitudinal-13-T140000-F17587-X-',
 14: 'Longitudinal-14-T150000-F18240-X-',
 15: 'Longitudinal-15-T160000-F18824-X-',
 16: 'Longitudinal-16-T170000-F19369-X-',
 17: 'Longitudinal-17-T180000-F19921-X-',
 18: 'Longitudinal-18-T190000-F20415-X-',
 19: 'Longitudinal-19-T200000-F20877-X-',
 20: 'Longitudinal-20-T210000-F21393-X-',
 21: 'Longitudinal-21-T220000-F21883-X-',
 22: 'Longitudinal-22-T230000-F22343-X-',
 23: 'Longitudinal-23-T240000-F22786-X-',
 24: 'Longitudi

In [122]:
sample="Longitudinal-55-T1100000-F41134-X-"
sample="Longitudinal-69-T2500000-F51641-X-"
sample=samples[129]
fRulesPMS="Regles.pkl"
fRulesPMO="Morphomes-Regles.pkl"

# Classe pour la gestion des patrons, des classes et des transformations

In [123]:
class paireClasses:
    def __init__(self,case1,case2):
        self.case1=case1
        self.case2=case2
        self.nom=case1+"-"+case2
        self.classes1=classesPaire(case1,case2)
        self.classes2=classesPaire(case2,case1)

    def ajouterPatron(self,n,patron,motif):
        if n==1:
            self.classes1.ajouterPatron(patron,motif)
        elif n==2:
            self.classes2.ajouterPatron(patron,motif)
        else:
            print "le numéro de forme n'est pas dans [1,2]",n

    def ajouterPaire(self,forme1,forme2):
        self.classes1.ajouterPaire(forme1,forme2)
        self.classes2.ajouterPaire(forme2,forme1)
        
    def calculerClasses(self):
        return(self.classes1,self.classes2)

    
class classesPaire:
    '''
    Gestion des patrons, des classes et des transformations
    
    ajouterPatron : ajoute un patron et son motif associé (MGL)
    ajouterPaire : ajoute une paire de formes, calcule la classe de la forme1 et la règle sélectionnée
    sortirForme : cacule les formes de sortie correspondant à la forme1 avec leurs coefficients respectifs
    '''
    def __init__(self,case1,case2):
        self.case1=case1
        self.case2=case2
        self.nom=case1+"-"+case2
        self.classe={}
        self.nbClasse={}
        self.patrons={}
        self.entree={}
        self.sortie={}
    
    def ajouterPatron(self,patron,motif):
        self.patrons[patron]=motif
        (entree,sortie)=patron.split("-")
        self.entree[patron]=entree.replace(u".",u"(.)")
        self.sortie[patron]=remplacementSortie(sortie)
    
    def ajouterPaire(self,forme1,forme2):
        '''
        on calcule la classe de la paire idClasseForme et la règle sélectionnée
        on incrémente le compteur de la classe et celui de la règle sélectionnée à l'intérieur de la classe
        '''
        classeForme=[]
        regleForme=""
        for patron in self.patrons:
            if re.match(self.patrons[patron],forme1):
                classeForme.append(patron)
                '''
                le +"$" permet de forcer l'alignement à droite pour les transformations suffixales
                '''
                if forme2==re.sub(self.entree[patron]+"$",self.sortie[patron],forme1):
                    regleForme=patron
        idClasseForme=", ".join(classeForme)
        if not idClasseForme in self.classe:
            self.classe[idClasseForme]={}
            self.nbClasse[idClasseForme]=0
        if not regleForme in self.classe[idClasseForme]:
            self.classe[idClasseForme][regleForme]=0
        self.nbClasse[idClasseForme]+=1
        self.classe[idClasseForme][regleForme]+=1

    def sortirForme(self,forme,contextFree=False):
        classeForme=[]
        sortieForme={}
        for patron in self.patrons:
            if contextFree:
                filterF1=".*"+patron.split("-")[0]+"$"
            else:
                filterF1=self.patrons[patron]
            if re.match(filterF1,forme):
                classeForme.append(patron)
        if classeForme:
            idClasseForme=", ".join(classeForme)
            if contextFree:
                nbClasse=self.nbClasseCF
                classe=self.classeCF
            else:
                nbClasse=self.nbClasse
                classe=self.classe
            if idClasseForme in nbClasse:
                nTotal=nbClasse[idClasseForme]
                for patron in classe[idClasseForme]:
                    sortie=re.sub(self.entree[patron]+"$",self.sortie[patron],forme)
                    sortieForme[sortie]=float(classe[idClasseForme][patron])/nTotal
            else:
#                if debug: 
#                    print (forme, file=logfile)
#                    print ("pas de classe",idClasseForme, file=logfile)
#                    print ("%.2f par forme de sortie" % (float(1)/len(classeForme)), file=logfile)
                nTotal=len(classeForme)
                for patron in classeForme:
                    sortie=re.sub(self.entree[patron]+"$",self.sortie[patron],forme)
                    sortieForme[sortie]=float(1)/nTotal
#        else:
#            if debug:
#                print (forme, file=logfile) 
#                print ("pas de patron", file=logfile)
        return sortieForme
        

## Ouvrir les fichiers de règles

In [124]:
with open(rep+sample+fRulesPMS, 'rb') as input:
    rulesPMS = pickle.load(input)
with open(rep+sample+fRulesPMO, 'rb') as input:
    rulesPMO = pickle.load(input)


## Conversion Positions <=> Regex

In [125]:
def getRegexPositions(positions):
    return "".join([p if len(p)<2 else "[%s]"%''.join(sorted(p)) for p in positions])

In [126]:
def getPositionsRegex(regex):
    if regex=="": return []
    result=[]
    regex=re.sub(ur"[()]","",regex.replace(".*","X"))
    chunks=[c for c in re.split(ur"(\[[^\]]+\])",regex) if c!=""]
    if chunks[0].startswith("^"): chunks[0]=chunks[0][1:]
    if chunks[-1].endswith("$"): chunks[-1]=chunks[-1][:-1]
    for chunk in chunks:
        if chunk.startswith("["): 
            result.append(chunk.replace("[","").replace("]",""))
        else:
            result.extend(chunk)
    return result

## Calcul des intersections

In [127]:
def getIntersectionPos(l1,l2):
    l=set()
    if l1=="X" or l1==".":
        l=set(l2)
    elif l2=="X" or l2==".":
        l=set(l1)
    else:
        l=set(l1)&set(l2)
    return l

def getIntersectionRegex(gP1,gP2,debug=False):
    p1=gP1[:]
    p2=gP2[:]
    pMin,pMax=sorted([p1,p2],key=len)
    temp=[]
    pMin.reverse()
    pMax.reverse()
#    print p1,p2
    for i in range(len(pMin)):
        l=getIntersectionPos(pMin[i],pMax[i])
        if l:
            temp.append(l)
        else:
            return []
    if len(pMax)>len(pMin):
        for i in range(len(pMin),len(pMax)):
            temp.append(set(pMax[i]))
    temp.reverse()
    result=[]
    for c in temp:
        result.append("".join(c))
    if "" in result:
        result=""
    return result

## Calcul des transformations

In [128]:
def transformeExp(gPositions,patron):
    result=[]
    positions=gPositions[:]
    positions.reverse()
    e0,s0=patron.split("-")
    e1=re.split(ur"(\.)",e0)
    s1=re.split(ur"(\.)",s0)
    e1.reverse()
    s1.reverse()
#    print e1, s1, positions
    lPosition=0
    for nChunk,chunk in enumerate(e1):
#        print "chunk",chunk, nChunk, "pos", lPosition
        if chunk!=".":
            if chunk:
                for nLettre,lettre in enumerate(chunk):
    #                print "lettre",nLettre,lettre,"pos",lPosition
                    if nLettre==0: result.extend(s1[nChunk][::-1])
                    lPosition+=1
            else:
                result.extend(s1[nChunk][::-1])
        else:
#            print "lettre",positions[lPosition],"pos",lPosition
            result.append(positions[lPosition])
            lPosition+=1
#    print result
    if len(positions)>lPosition:
        for i in range(lPosition,len(positions)):
            result.append(positions[i])
    result.reverse()
    return [r for r in result if r!=""]
        

## Calcul des contraintes

In [129]:
def nouvellesContraintes(contraintes,paire,addEdge=False,gPatrons=[]):
    if not gPatrons:
        patrons=rulesPMS[paire].patrons
    else:
        patrons=gPatrons[paire]
    result1=[]
#    result2={}
    for r in contraintes:
        for p in patrons:
            regexRegle=getPositionsRegex(patrons[p])
            regex=getIntersectionRegex(r,regexRegle)
            if regex:
                trans=transformeExp(regex,p)
                if trans:
                    result1.append(trans)
#                    result2[p]=(regex,trans)
                    if addEdge:
                        pointA=paire[0]+":"+getRegexPositions(regex)
                        pointB=paire[1]+":"+getRegexPositions(trans)
                        if debug: print pointA+"-"+pointB
                        reseauTrans.add_edge(pointA,pointB,key=p)
                    if debug:
                        print "TRANS",r, p, regex
                        print "=>",trans
    if result1:
        return result1
    else:
        return contraintes
 
def getContraintes(A,B,gPatrons=[]):
    aller=nouvellesContraintes([[u"X"]],(A,B),gPatrons=gPatrons)
    retour=nouvellesContraintes(aller,(B,A),gPatrons=gPatrons)
    return retour

def getListesContraintes(dictA,A,nodes,gPatrons=[]):
    lDictA=dict(dictA)
    for node in nodes:
        lDictA[node]=getContraintes(A,node,gPatrons=gPatrons)
    return lDictA

def mergeContraintesAB(contraintesA,contraintesB):
    result=set()
    for a in contraintesA:
        for b in contraintesB:
            intersection=getIntersectionRegex(a,b)
            if intersection:
                result.add(getRegexPositions(intersection))
    return [getPositionsRegex(p) for p in list(result)]

def mergeContraintes(dictA):
    cases=dictA.keys()
    contraintes=dictA[cases[0]]
    for case in cases[1:]:
        if dictA[case]:
            contraintes=mergeContraintesAB(contraintes,dictA[case])
    return contraintes

## Patrons manuels

## Définition des cases à prendre en compte

In [130]:
paradigmeCases=list(set([p[0] for p in rulesPMS.keys()]))
#paradigmeCases=[c for c in paradigmeCases if not "1" in c and not "2" in c and not "ai" in c and not "is" in c]
selectionCases=paradigmeCases
#selectionCases=testCells
selectionCases

[u'ii1P',
 u'pP',
 u'is1S',
 u'ii1S',
 u'ppMS',
 u'ppMP',
 u'is3P',
 u'is3S',
 u'ai1P',
 u'ii3S',
 u'ps3S',
 u'inf',
 u'ii3P',
 u'pi2S',
 u'ps3P',
 u'pi2P',
 u'ppFS',
 u'ppFP',
 u'is2S',
 u'is2P',
 u'ai1S',
 u'ps1S',
 u'pI2S',
 u'fi2P',
 u'fi2S',
 u'pI2P',
 u'ps2P',
 u'ps2S',
 u'ps1P',
 u'ai2P',
 u'ai2S',
 u'pc2P',
 u'pc2S',
 u'ii2P',
 u'pi1S',
 u'pi1P',
 u'ii2S',
 u'pi3P',
 u'pi3S',
 u'ai3S',
 u'pc3S',
 u'pc3P',
 u'pc1P',
 u'is1P',
 u'fi3S',
 u'fi3P',
 u'ai3P',
 u'fi1P',
 u'fi1S',
 u'pc1S',
 u'pI1P']

## Calcul des contraintes sur le réseau

## Trouver les relations symétriques

# Contraintes en étoile

In [131]:
contraintesCase={c:{} for c in selectionCases}
contraintesConsolidees={}
for a in contraintesCase:
    for b in [c for c in contraintesCase if c!=a]:
        contraintesTemp=nouvellesContraintes([[u"X"]],(b,a))
        contraintesCase[a][b]=contraintesTemp
print contraintesCase["ii1P"]

{u'pP': [[u's', u'a', u'v', u'j', u'\xf4'], [u'X', u'\xea269\xf4EHJOS\xe2Zabedgfikjmlonpsrutwvy\xfbz', u'bZdgfkSpstvz', u'rl', u'i', u'j', u'\xf4'], [u'X', u'uoO', u'vf', u'r', u'i', u'\xf4'], [u'X', u'zfSlsrvZ', u'i', u'j', u'\xf4'], [u'X', u'\xea269\xf4EHJOS\xe2Zabedgfikjmlonpsrutwvy\xfbz', u'\xea269\xf4EHJOSZbedgfikjmlonpsrutwvy\xfbz', u'j', u'\xf4'], [u'X', u'\xea69\xf4EHJOS\xe2Zabdgfkjmlnpsrtwv\xfbz', u'2aEiHjO9ruow6ye', u'Jj', u'\xf4'], [u'a', u'v', u'j', u'\xf4']], u'is1S': [[u'k', u'r', u'w', u'a', u'j', u'\xf4'], [u'p', u'l', u'E', u'z', u'j', u'\xf4'], [u'n', u'E', u's', u'j', u'\xf4'], [u'X', u'bdfpstvz', u'6', u'n', u'j', u'\xf4'], [u'X', u'p', u'r', u'6', u'n', u'j', u'\xf4'], [u'b', u'y', u'v', u'j', u'\xf4'], [u'f', u'H', u'i', u'j', u'\xf4'], [u'd', u'6', u'v', u'j', u'\xf4'], [u'X', u'abEdf6Opsrtv9z', u'a\xeaeijr6E\xe2', u'bdfmnpstvz', u'j', u'\xf4'], [u'r', u'6', u'Z', u'w', u'a', u'J', u'\xf4'], [u'b', u'a', u't', u'i', u's', u'j', u'\xf4'], [u's', u'i', u't', u'y', 

In [132]:
for c in contraintesCase:
    contraintesConsolidees[c]=mergeContraintes(contraintesCase[c])

for a in contraintesCase:
    for b in [c for c in contraintesCase if c!=a]:
        contraintesTemp=nouvellesContraintes(contraintesConsolidees[b],(b,a))
        contraintesCase[a][b]=contraintesTemp

for c in contraintesCase:
    contraintesConsolidees[c]=mergeContraintes(contraintesCase[c])

In [133]:
reseauTrans=nx.DiGraph()
for k in contraintesCase:    
    for elt in [c for c in contraintesCase if c!=k]:
        print k,elt,contraintesConsolidees[k]
        nouvellesContraintes(contraintesConsolidees[k],(k,elt),addEdge=True)

ii1P pP []
ii1P pc1P []
ii1P ii1S []
ii1P ppMS []
ii1P ppMP []
ii1P is3P []
ii1P is3S []
ii1P ai1P []
ii1P ii3S []
ii1P ps3S []
ii1P ai1S []
ii1P ii3P []
ii1P pi2S []
ii1P inf []
ii1P ps1S []
ii1P ppFS []
ii1P ppFP []
ii1P is2S []
ii1P is2P []
ii1P ps3P []
ii1P pi2P []
ii1P pI2S []
ii1P fi2P []
ii1P fi2S []
ii1P pI2P []
ii1P ps2P []
ii1P ps2S []
ii1P ps1P []
ii1P ai2P []
ii1P ai2S []
ii1P pc2P []
ii1P pc2S []
ii1P ii2P []
ii1P pi1S []
ii1P pi1P []
ii1P ii2S []
ii1P pi3P []
ii1P pi3S []
ii1P ai3S []
ii1P pc3S []
ii1P pc3P []
ii1P is1S []
ii1P is1P []
ii1P fi3S []
ii1P fi3P []
ii1P ai3P []
ii1P fi1P []
ii1P fi1S []
ii1P pc1S []
ii1P pI1P []
pP ii1P []
pP pc1P []
pP ii1S []
pP ppMS []
pP ppMP []
pP is3P []
pP is3S []
pP ai1P []
pP ii3S []
pP ps3S []
pP ai1S []
pP ii3P []
pP pi2S []
pP inf []
pP ps1S []
pP ppFS []
pP ppFP []
pP is2S []
pP is2P []
pP ps3P []
pP pi2P []
pP pI2S []
pP fi2P []
pP fi2S []
pP pI2P []
pP ps2P []
pP ps2S []
pP ps1P []
pP ai2P []
pP ai2S []
pP pc2P []
pP pc2S []
pP

pI2P pI2S []
pI2P fi2P []
pI2P fi2S []
pI2P ps2P []
pI2P ps2S []
pI2P ps1P []
pI2P ai2P []
pI2P ai2S []
pI2P pc2P []
pI2P pc2S []
pI2P ii2P []
pI2P pi1S []
pI2P pi1P []
pI2P ii2S []
pI2P pi3P []
pI2P pi3S []
pI2P ai3S []
pI2P pc3S []
pI2P pc3P []
pI2P is1S []
pI2P is1P []
pI2P fi3S []
pI2P fi3P []
pI2P ai3P []
pI2P fi1P []
pI2P fi1S []
pI2P pc1S []
pI2P pI1P []
ps2P ii1P []
ps2P pP []
ps2P pc1P []
ps2P ii1S []
ps2P ppMS []
ps2P ppMP []
ps2P is3P []
ps2P is3S []
ps2P ai1P []
ps2P ii3S []
ps2P ps3S []
ps2P ai1S []
ps2P ii3P []
ps2P pi2S []
ps2P inf []
ps2P ps1S []
ps2P ppFS []
ps2P ppFP []
ps2P is2S []
ps2P is2P []
ps2P ps3P []
ps2P pi2P []
ps2P pI2S []
ps2P fi2P []
ps2P fi2S []
ps2P pI2P []
ps2P ps2S []
ps2P ps1P []
ps2P ai2P []
ps2P ai2S []
ps2P pc2P []
ps2P pc2S []
ps2P ii2P []
ps2P pi1S []
ps2P pi1P []
ps2P ii2S []
ps2P pi3P []
ps2P pi3S []
ps2P ai3S []
ps2P pc3S []
ps2P pc3P []
ps2P is1S []
ps2P is1P []
ps2P fi3S []
ps2P fi3P []
ps2P ai3P []
ps2P fi1P []
ps2P fi1S []
ps2P pc1S []
ps

is1S ai1P []
is1S ii3S []
is1S ps3S []
is1S ai1S []
is1S ii3P []
is1S pi2S []
is1S inf []
is1S ps1S []
is1S ppFS []
is1S ppFP []
is1S is2S []
is1S is2P []
is1S ps3P []
is1S pi2P []
is1S pI2S []
is1S fi2P []
is1S fi2S []
is1S pI2P []
is1S ps2P []
is1S ps2S []
is1S ps1P []
is1S ai2P []
is1S ai2S []
is1S pc2P []
is1S pc2S []
is1S ii2P []
is1S pi1S []
is1S pi1P []
is1S ii2S []
is1S pi3P []
is1S pi3S []
is1S ai3S []
is1S pc3S []
is1S pc3P []
is1S is1P []
is1S fi3S []
is1S fi3P []
is1S ai3P []
is1S fi1P []
is1S fi1S []
is1S pc1S []
is1S pI1P []
is1P ii1P []
is1P pP []
is1P pc1P []
is1P ii1S []
is1P ppMS []
is1P ppMP []
is1P is3P []
is1P is3S []
is1P ai1P []
is1P ii3S []
is1P ps3S []
is1P ai1S []
is1P ii3P []
is1P pi2S []
is1P inf []
is1P ps1S []
is1P ppFS []
is1P ppFP []
is1P is2S []
is1P is2P []
is1P ps3P []
is1P pi2P []
is1P pI2S []
is1P fi2P []
is1P fi2S []
is1P pI2P []
is1P ps2P []
is1P ps2S []
is1P ps1P []
is1P ai2P []
is1P ai2S []
is1P pc2P []
is1P pc2S []
is1P ii2P []
is1P pi1S []
is1

In [134]:
reseauSimTrans=nx.Graph()
for edge in reseauTrans.edges():
    a,b=sorted(edge)
    if reseauTrans.has_edge(a,b) and reseauTrans.has_edge(b,a) and not reseauSimTrans.has_edge(a,b):
        reseauSimTrans.add_edge(a,b)
cliques=list(nx.find_cliques(reseauSimTrans))
cliquesCompletes=0
print len(cliques)
print len(selectionCases)
for c in cliques:
    if len(c)==len(selectionCases) or len(c)>len(selectionCases)-1:
        cliquesCompletes+=1
        for e in sorted(c):
            if e.startswith("inf") and e.endswith("]e"):
#                print sorted(c)
                print e
#        print sorted(c)
print cliquesCompletes

0
51
0
