In [10]:
library('igraph')
library('Matrix')
library('pracma')
library('igraph')
library('entropy')
library('infotheo')

In [11]:
get_graph <- function(parent_path, name, directed=FALSE) {
    node_edge_file = paste(parent_path, name, ".edges", sep = '')
    graph = read_graph(node_edge_file, format = c("ncol"), directed = directed)
    graph = add.vertices(graph, nv = 1, name = name)

    index = which(V(graph)$name == name)

    el = c()
    for (vertex in 1:(vcount(graph) - 1)) {
        el = c(el, c(index, vertex))
    }
    # print(el)
    graph = add_edges(graph, el)
    return (graph)
}

In [12]:
files = list.files(path = "gplus", pattern="*.circles", full.names = TRUE)
count = 0
for (f in files){
    c_file = paste( f, sep = '')
    if (length(readLines(c_file)) > 2){
        count = count + 1
    }
}
print(count)

[1] 43


In [13]:
node_list = c("109327480479767108490", "115625564993990145546", "101373961279443806744")

In [14]:
graph_list = list()
for (node in node_list){
    graph = get_graph("gplus/", node, directed=TRUE)
    print(sprintf("in-degree : mean=%5.3f, Variance=%5.3f", mean(degree(graph,mode="in")), var(degree(graph,mode="in"))))
    pdf(sprintf("plots/q19_%s_in_degree.pdf",node))
    hist(degree(graph,mode="in"),main= paste("In-degree Distribution of Node ID ",node),xlab="Degree")
    dev.off()
    
    print(sprintf("out-degree : mean=%5.3f, Variance=%5.3f", mean(degree(graph,mode="out")), var(degree(graph,mode="out"))))
    pdf(sprintf("plots/q19_%s_out_degree.pdf",node))
    hist(degree(graph,mode="out"),main= paste("Out-degree Distribution of Node ID ",node),xlab="Degree")
    dev.off() 
}

[1] "in-degree : mean=14.062, Variance=96.001"
[1] "out-degree : mean=14.062, Variance=4588.177"


[1] "in-degree : mean=43.640, Variance=1020.621"
[1] "out-degree : mean=43.640, Variance=9351.303"
[1] "in-degree : mean=298.118, Variance=86408.770"
[1] "out-degree : mean=298.118, Variance=166186.737"


In [16]:
for (node in node_list){
    print(sprintf("Node ID: %s",node))
    graph = get_graph("gplus/", node, directed=TRUE)
    
    node_community  = walktrap.community(graph)
    node_modularity = modularity(node_community)
    print(sprintf("Modularity of Node ID %s is %2.6f", node, node_modularity))
    
    pdf(sprintf("plots/q20_%s_community_structure.pdf",node))
    plot(node_community, graph, main=sprintf("Community Structure (node ID =%s)",node),vertex.size=5, vertex.label=NA, edge.color = "grey", layout=layout.fruchterman.reingold)
    dev.off()
    
}

[1] "Node ID: 109327480479767108490"
[1] "Modularity of Node ID 109327480479767108490 is 0.252765"
[1] "Node ID: 115625564993990145546"
[1] "Modularity of Node ID 115625564993990145546 is 0.319473"
[1] "Node ID: 101373961279443806744"
[1] "Modularity of Node ID 101373961279443806744 is 0.191090"


In [19]:

for (node in node_list){
    strrep('**',75)
    print(sprintf("Node ID: %s",node))
    node_circles_file_name = paste("gplus/", node, ".circles", sep="")
    node_circles_file_content = readLines(file(node_circles_file_name, open="r"))
    
    # Get the circles for this node ID. 
    circles = list()
    for (i in 1:length(node_circles_file_content)) {
        circle_nodes = strsplit(node_circles_file_content[i],"\t")
        circles = c(circles, list(circle_nodes[[1]][-1]))
    }
    
    # get 'N' 
    all_circles=c()
    for ( circle in circles)
        all_circles=c(all_circles,circle)
    all_circle=unique(all_circles)
    N=length(all_circle)
    
    # get h_c 
    h_c =0
    for (circle in circles){
        a_i = length(circle)
        h_c=h_c-((a_i/N)* log10(a_i/N))
        
    }

    # Find the community structure
    node_graph = get_graph("gplus/", node, directed=TRUE)
    node_community  = walktrap.community(node_graph)
    
    #h_k, h_ck, h_kc 
    
    h_ck=0
    h_kc=0
    h_k=0
    
    # match the community structure with the circles.
    for(j in 1:max(node_community$membership)){

        community_nodes = V(node_graph)$name[which(node_community$membership == j)]
        b_i=length(intersect(community_nodes,all_circle))
        if(b_i!=0) {
            h_k =  h_k - ((b_i/N) * log10(b_i/N))
            for (n in 1:length(circles)) {
                common_nodes = intersect(community_nodes, circles[[n]])
                c_ij = length(unique(common_nodes))
                A_i = length(circles[[n]])
                if(c_ij!=0){
                    h_ck = - ((c_ij/N)*log10(c_ij/b_i)) + h_ck
                    h_kc =  - ((c_ij/N)*log10(c_ij/A_i)) + h_kc
                }
            }
        }
    }
    
    # Calculate homogenity and completeness
    
    homogenity =  1-(h_ck/h_c)
    completeness = 1- (h_kc/h_k)

    print(sprintf("Entropy H(C) = %5.8f",h_c))
    print(sprintf("Entropy H(K) = %5.8f",h_k))
    print(sprintf("Conditional Entropy H(C|K) = %5.8f",h_ck))
    print(sprintf("Conditional Entropy H(K|C) = %5.8f",h_kc))
    
    print(sprintf("Homogeneity h= %5.8f", homogenity))
    print(sprintf("Completeness c= %5.8f", completeness))
    

    cat("\n")
}

[1] "Node ID: 109327480479767108490"
[1] "Entropy H(C) = 0.45634767"
[1] "Entropy H(K) = 0.43655637"
[1] "Conditional Entropy H(C|K) = 0.06759188"
[1] "Conditional Entropy H(K|C) = 0.29254781"
[1] "Homogeneity h= 0.85188512"
[1] "Completeness c= 0.32987391"

[1] "Node ID: 115625564993990145546"
[1] "Entropy H(C) = 3.67636649"
[1] "Entropy H(K) = 0.46955527"
[1] "Conditional Entropy H(C|K) = 2.01505212"
[1] "Conditional Entropy H(K|C) = 2.07729483"
[1] "Homogeneity h= 0.45189030"
[1] "Completeness c= -3.42396235"

[1] "Node ID: 101373961279443806744"
[1] "Entropy H(C) = 0.16690804"
[1] "Entropy H(K) = 0.21425076"
[1] "Conditional Entropy H(C|K) = 0.16626265"
[1] "Conditional Entropy H(K|C) = 0.53653499"
[1] "Homogeneity h= 0.00386671"
[1] "Completeness c= -1.50423839"

