# Modèle de diffusion : INDEPENDENT CASCADES

In [1]:
#IMPORT
import numpy as np

### Lecture des donnees

In [2]:
file_train = open("cascades_train.txt")
file_test = open("cascades_test.txt")


data_train=[]
for i in file_train.readlines():
    tab = [[float(j.split(":")[0]),float(j.split(":")[1])] for j in i.split(";")[:-1]]
    tab.sort(key=lambda x: x[1])
    data_train.append(tab)

    
data_test=[]
for i in file_test.readlines():
    tab = [[float(j.split(":")[0]),float(j.split(":")[1])] for j in i.split(";")[:-1]]
    tab.sort(key=lambda x: x[1])
    data_test.append(tab)

### Creation des liste de successeurs

In [3]:
def getSuccsOfTab(tab):
    succ=[]
    for i in tab:
        succ_i=[]
        for j in tab:
            if(i[1]<j[1]):
                succ_i.append(j[0])
        succ.append([i[0],succ_i])
    return succ

def unique(liste):
    seen=set()
    seen_add =seen.add
    return [x for x in liste if not(x in seen or seen_add(x))]

In [4]:
succs_train = []
for line in data_train:
    succs_train.append(getSuccsOfTab(line))

succs_test = []
for line in data_test:
    succs_test.append(getSuccsOfTab(line))

In [5]:
succs_train

[[[56.0,
   [42.0,
    41.0,
    93.0,
    89.0,
    8.0,
    3.0,
    43.0,
    73.0,
    84.0,
    61.0,
    98.0,
    86.0,
    48.0,
    47.0,
    96.0,
    50.0,
    55.0,
    20.0,
    12.0,
    5.0,
    52.0,
    54.0,
    34.0,
    4.0,
    82.0]],
  [26.0,
   [42.0,
    41.0,
    93.0,
    89.0,
    8.0,
    3.0,
    43.0,
    73.0,
    84.0,
    61.0,
    98.0,
    86.0,
    48.0,
    47.0,
    96.0,
    50.0,
    55.0,
    20.0,
    12.0,
    5.0,
    52.0,
    54.0,
    34.0,
    4.0,
    82.0]],
  [42.0,
   [89.0,
    8.0,
    3.0,
    43.0,
    73.0,
    84.0,
    61.0,
    98.0,
    86.0,
    48.0,
    47.0,
    96.0,
    50.0,
    55.0,
    20.0,
    12.0,
    5.0,
    52.0,
    54.0,
    34.0,
    4.0,
    82.0]],
  [41.0,
   [89.0,
    8.0,
    3.0,
    43.0,
    73.0,
    84.0,
    61.0,
    98.0,
    86.0,
    48.0,
    47.0,
    96.0,
    50.0,
    55.0,
    20.0,
    12.0,
    5.0,
    52.0,
    54.0,
    34.0,
    4.0,
    82.0]],
  [93.0,
   [89.0,
    8.0,
    

## Création des graphes

In [6]:
#optimisation au niveau du temps possible
def getGraph(succs):
    graph={}
    for h in succs:
        for i in h:
            try:
                for j in i[1]:
                    graph[i[0]].append(j)
                graph[i[0]]=unique(graph[i[0]])
                graph[i[0]].sort()
            except KeyError:
                graph[i[0]]=i[1]
    return graph

In [7]:
graph_train = getGraph(succs_train)
graph_test = getGraph(succs_test)

## Apprentissage

#### Fonction pour avoir la probabilité, P chapeau

In [8]:
#liste correspond a un D
def getListPrec(liste):
    prec=[]
    for i in liste:
        if (i[1]>1):
            p=[]
            for j in liste:
                if (i[1]>j[1]):
                    p.append(j[0])
            prec.append((i[0],p))
    return prec

In [9]:
#listePrec obtenue avec getListPrec dun D
def getProbaOfList(listePrec,graph_weight_d):
    a={}
    for i in listePrec:
        prod=1
        for pre in i[1]:
            prod= prod *(1 - graph_weight_d[pre][i[0]])
        a[i[0]]=1-prod
    return a

In [10]:
#fonction permettant de savoir si il existe une infection de l'element u sur v, lors d'un episode D
#si oui retourne les indices
def existLinkUV(d,u,v):
    listeSucc = getSuccsOfTab(d)
    listeU = [i[0] for i in listeSucc]
    if u in listeU:
        indiceU = np.where(np.array(listeU)==u)[0][0]
        listeV = listeSucc[indiceU][1]
        if v in listeV:
            return True
    return False

In [12]:
uv = existLinkUV(getSuccsOfTab(data_train[0]),56,42)
print(uv)

True


### Poids du graphe assignés en random

In [13]:
#creation des poids du graph dico
def getGraphWeightRandom(graph):
    graph_weight_d={}
    for i in graph:
        dico={}
        for e in graph[i]:
            rando=np.random.rand()
            dico[e]=rando
        graph_weight_d[i]=dico
    return graph_weight_d

In [14]:
gInit_train = getGraphWeightRandom(graph_train)

#### Fonction d'apprentissage

In [34]:
def fitModele(data_train,nbIt=1,eps=1e-1):
    succs_train = []
    for line in data_train:
        succs_train.append(getSuccsOfTab(line))
    graph_train = getGraph(succs_train)   
        
    #init    
    graph_cur = getGraphWeightRandom(graph_train)
    
    #it=0
    #ep=100000
    #while((it<nbIt) & (ep>eps)):
        #probas_chapiteau=[getProbaOfList(getListPrec(d),graph_cur) for d in data_train]
        
    # met a jour les poids

    for m,u in enumerate(graph_cur):
        if((m % 10)==0):
            print(u)
        for v in graph_cur[u]:
            #new_graph=copy.deepcopy(graph_cur)
            s=0
            nbUV=0
            nbnotUV=0
            for idx,d in enumerate(data_train[:3]):
                probas_chapiteau = getProbaOfList(getListPrec(d),graph_cur)
                if(existLinkUV(d,u,v)):
                    nbUV+=1
                    s+=graph_cur[u][v]*1.0/probas_chapiteau[v]
                else:
                    nbnotUV+=1
            graph_cur[u][v]= s*1.0/(nbUV+nbnotUV)
                
        #diff=0
        #for u in graph_cur:
        #    for v in graph_cur[u]:
        #        diff+=abs(new_graph[u][v]-graph_cur[u][v])
        #print ("difference :   "+ str(diff))
        #graph_cur=copy.deepcopy(new_graph)
        #it+=1
    return graph_cur

In [35]:
pc = fitModele(data_train)


0.0
10.0
20.0
30.0
40.0
50.0
60.0
70.0
80.0
90.0


In [36]:
import copy
def inference(listeT1, graph):
    listeUP=[]
    listeU=[]
    listeTmp=[]
    
    for u in listeT1:
        for v in graph[u]:
            if (np.random.rand()<graph[u][v]):
                if(v not in listeU):
                    listeU.append(v)
                    listeUP.append((v,graph[u][v]))
                    listeTmp.append(v)
                    
    t=1
    while((len(listeTmp)>0) &(t<15)):
        listeTmp2=[]
        for u in listeTmp:
            for v in graph[u]:
                if (np.random.rand()<graph[u][v]):
                    if(v not in listeU):
                        listeU.append(v)
                        listeUP.append((v,graph[u][v]))
                        listeTmp2.append(v)
        listeTmp=copy.deepcopy(listeTmp2)
        t+=1
        
    listeUP.sort(key=lambda elem: elem[1])
    return  [i[0] for i in listeUP]

t1=[68.0]
print(inference(t1,pc))

[90.0, 5.0, 48.0, 75.0, 2.0, 54.0, 8.0, 98.0, 89.0, 47.0, 43.0, 19.0, 65.0, 79.0, 4.0, 71.0, 12.0, 20.0, 58.0, 99.0, 9.0, 61.0, 34.0, 11.0, 3.0, 46.0, 7.0, 50.0, 25.0, 67.0, 62.0, 70.0, 68.0, 55.0, 57.0, 96.0, 40.0, 17.0, 94.0, 30.0, 52.0, 29.0, 16.0, 86.0, 82.0, 73.0, 72.0]


In [37]:
def MAP(graph,data_test):
    s=0
    for d in data_test:
        listeD=[i[0] for i in d if (i[1]>1)]
        listeT1=[i[0] for i in d if (i[1]==1)]
        listeU=inference(listeT1,graph)
        s2=0
        for i,u in enumerate(listeU):
            intersection = [val for val in listeD if val in listeU[:i]]
            s2+=len(intersection)*1.0/(i+1)
        s+=s2/len(listeD)
    return s/len(data_test)
MAP(pc,data_test)     

0.21882119620981702

----

# BROUILLON

In [143]:
probas_chapiteau=[getProbaOfList(getListPrec(d),gInit_train) for d in data_train]

In [153]:
indice = np.where(np.array(probas_chapiteau[0])[:,0]==4)[0][0]
probas_chapiteau[0][indice][1]

0.9999999999972592