Orkut is a free on-line social network where users form friendship each other. Orkut also allows users form a group which other members can then join. We consider such user-defined groups as ground-truth communities. We provide the Orkut friendship social network and ground-truth communities. This data is provided by Alan Mislove et al.

We regard each connected component in a group as a separate ground-truth community. We remove the ground-truth communities which have less than 3 nodes. We also provide the top 5,000 communities with highest quality which are described in our paper. As for the network, we provide the largest connected component.

The network that we are going to use is Orkut's social graph. I decided to use this dataset because Orkut was a community base social network  base on intrests. This caracteristic is crucial since it allows us to run our epidemiology simulaiton using a real network based on a real-social necessity.

The graph is undirected.

In [1]:
from igraph import *
import numpy as np
import matplotlib.pyplot as plt
import itertools

In [5]:
class Epidemy(Graph):
    '''Epidemy extends the igraph's class Graph. Ciao
    The additional functionalities are:
    
    1 Built-in getters for graph metrics
        1.1 Plotting
            a)
            b)
        1.2 Metrics
            a) distribution model
            b) eggr
    
    '''
    
    graph = None
    patient_zero = None
    sentinels = None
    global I
    
    def __init__(self, graph_edge_list, patient_zero = None, sentinels = None):
        '''The compartment label is a byte that can take values 0, 1 and 2, indicating respectivly
        0 - Suscebtible node
        1 - Infected node'''
        
        
        self.graph = Graph.Read_Ncol(graph_edge_list, directed=False)
        self.graph.vs["compartment"] = np.zeros(len(self.graph.vs), dtype = np.uint8)
        
        if patient_zero is None:
            self.patient_zero = np.random.choice(self.graph.vs)
        else:
            self.patient_zero = patient_zero
        if sentinels is None:
            self.sentinels = np.random.choice(self.graph.vs)
        else:
            self.sentinels = sentinels
             
    
    #setters  
    def setPatientZero(patient_zero):
        self.patient_zero = patient_zero
        
    def setSentinels(sentinels):
        self.sentinels = sentinels
        
    def resetSentinels(self):
        self.graph.vs["iteration"] = np.nan
    
     
    #getters
    def getDegreeDistribution(self):
        x,y = np.unique(self.graph.degree(), return_counts=True)
        print(len(self.graph.degree()))
        return (x,y)
    
    def getMaxDegreeVertex(self):
        return self.graph.vs[self.graph.degree().index(self.graph.maxdegree())]
    
    #Epidemy Utilities
    def printDegreeDistribution(self, loglog=False):
        '''Prints the degree distirbution of the underlying network using a logarithmic scale'''
        x,y = self.getDegreeDistribution()
        if loglog:
            plt.scatter(np.log(x), np.log(y))
        else:
            plt.scatter(x, y)
        plt.show()
        
        
        
    #Epidemic Models
    def SIR(self, beta = 0.4, mu = 0.1 , sentinels = None, patient_zero = None):
        """Simulate an epidemy outbreaks using a sir model"""
        
        if sentinels is None:
            self.sentinels = set([np.random.choice(self.graph.vs)])
        if patient_zero is None:
            I = set([np.random.choice(self.graph.vs)])
        else:
            I = set([self.patient_zero])
        
        self.resetSentinels();
        
        
        for iteration in itertools.count():
            if(len(I)==0):
                break
                
            print()
            dI = set([j for j in list(itertools.chain.from_iterable(self.graph.neighborhood(I)))
                      if self.graph.vs[j]["compartment"] == 0 and np.random.random() < beta])
            self.graph.vs[dI]["compartment"] = 1 


            # Finds new removed nodes and update the status
            dR = set([k for k in I if np.random.random() < mu])
            self.graph.vs[dR]["compartment"] = 0
            
            self.graph.vs[dI & self.sentinels]["iteration"] = iteration
            

            I = (I | dI) - (dR)

In [6]:
a = Epidemy('dblp2.txt')

In [None]:
a.

In [8]:
a.SIR(patient_zero = None)

10
10
10
10
10
10
10
10
10
10


10


10


10


10


10


10


10


10


10


10


In [None]:
a.printDegreeDistribution(True)

In [30]:
beta =0.1
dI = set([j for j in list(itertools.chain.from_iterable(a.graph.neighborhood(set([a.getMaxDegreeVertex()])))) if a.graph.vs[j]["compartment"] == 0 and np.random.random() < beta])
dI

{3307,
 3333,
 11392,
 14758,
 18953,
 22289,
 33323,
 36149,
 39415,
 39472,
 39473,
 45612,
 45616,
 45627,
 45643,
 45652,
 45656,
 45676,
 45682,
 45686,
 45688,
 61346,
 63111,
 81671,
 106446,
 116258,
 124695,
 124714,
 124716,
 124718,
 124720,
 124723,
 124726,
 124728,
 124735}

In [20]:
dI

{2935,
 3282,
 3295,
 6418,
 11380,
 11385,
 11392,
 28667,
 45614,
 45632,
 45638,
 45654,
 45670,
 45677,
 45681,
 45683,
 45691,
 48239,
 57680,
 63115,
 81671,
 85212,
 110783,
 116258,
 124694,
 124705,
 124707,
 124712,
 124716,
 124733,
 124747}

In [15]:
a.graph.neighborhood(set(a.getMaxDegreeVertex()))

TypeError: 'igraph.Vertex' object is not iterable

In [2]:
np.nan

NameError: name 'np' is not defined

In [5]:
np.random.random()

0.8879140882566305

In [5]:
class prova():
    x = 0
    
    def xa(self):
        print 10

In [6]:
prova = prova()

In [8]:
prova.xa()

10
