In [15]:
import igraph as ig
import plotly.plotly as py
from plotly.graph_objs import *

<div style="text-align:center"><h1> GRAPH-FUNCTIONS </h1></div>

In [129]:
def get_sequence_map(filename):
    url_dict = {}
    for line in open(filename, "r"):
        tup = line.split(" , ")
        url_dict[tup[1].rstrip()] = tup[0]
    return url_dict

def get_sequence_tuple_list(filename):
    url_list = []
    for line in open(filename, "r"):
        elements = line.split(" , ")
        elements[-1] = elements[-1].replace("\n", "")
        url_list.append(tuple(elements))
    return url_list

def get_sequences(filename, min_len=1):
    for line in open(filename, "r"):
        sequence = line.split(" -1 ")
        sequence.pop(len(sequence)-1)
        if len(sequence) >= min_len:
            yield sequence

# useless         
def get_labels(sequenceMap):
    labels = [None] * len(sequenceMap)
    for key in sequenceMap:
        i = int(key.replace("u_", ""))
        labels[i] = sequenceMap[key]
    return labels

def get_color(n):
    colors = ["#FF8F00", "#FFFFFF", "#FFFF00", "#00E5FF", "##76FF03", "#2979FF", "#F50057", "#9C27B0"]
    return colors[n]


def create_graph(sequences, seqMap):
    graph = ig.Graph(directed=True)
    graph.add_vertices(len(seqMap))  # adding nodes
    
    if type(seqMap) is dict:
        for key in seqMap:
            i = int(key.replace("u_", ""))
            graph.vs[i]["name"] = seqMap[key]  # adding labels
    
    elif type(seqMap) is list:
        for el in seqMap:
            i = int(el[1].replace("u_", ""))
            graph.vs[i]["name"] = el[0]
            graph.vs[i]["community"] = el[2]
            graph.vs[i]["color"] = get_color(int(el[2]))
        
        
    for seq in sequences:
        for i in range(len(seq)-1):
            source = int(seq[i].replace("u_", ""))
            target = int(seq[i+1].replace("u_", ""))
            if graph.get_eid(source, target, directed=True, error=False) == -1:
                graph.add_edge(source, target)  # adding unique edges
    return graph

<div style="text-align:center"><h1> PLOT-FUNCTIONS </h1></div>

In [17]:
def gplot(graph, graph_name):
    py.sign_in('chrispol', 'yvg1d17o5q')
    layt = graph.layout('kk', dim=3)
    
    Xn = [layt[k][0] for k in range(len(graph.vs))] # x-coordinates of nodes
    Yn = [layt[k][1] for k in range(len(graph.vs))] # y-coordinates of nodes
    Zn = [layt[k][2] for k in range(len(graph.vs))] # z-coordinates of nodes
    
    Xe = []
    Ye = []
    Ze = []
    
    for e in graph.es:
        Xe += [layt[e.source][0], layt[e.target] [0], None] # x-coordinates of edge ends
        Ye += [layt[e.source][1], layt[e.target] [1], None] # y-coordinates of edge ends
        Ze += [layt[e.source][2], layt[e.target] [2], None] # z-coordinates of edge ends
    
    trace1 = Scatter3d(
        x = Xe,
        y = Ye,
        z = Ze,
        mode = 'lines',
        line = Line(
            color = 'rgb(125,125,125)',
            width = 0.5
        ),
        hoverinfo = 'none'
    )
    
    trace2 = Scatter3d(
        x = Xn,
        y = Yn,
        z = Zn,  
        mode = 'markers',
        name = 'actors',
        marker = Marker(
            symbol = 'dot',
            size = 6,
            color = graph.vs["color"],
            colorscale = 'Viridis',
            line = Line(
                color = 'rgb(50,50,50)',
                width = 0.5
            )
        ),
        text = graph.vs["name"],
        hoverinfo = 'text'
    )
    
    
    axis = dict(
        showbackground = False,
        showline = False,
        zeroline = False,
        showgrid = False,
        showticklabels = False,
        title = ''
    )
    
    layout = Layout(
        title = graph_name,
        width = 1000,
        height = 1000,
        showlegend = False,
        scene = Scene(
            xaxis = XAxis(axis),
            yaxis = YAxis(axis),
            zaxis = ZAxis(axis),
        ),
        margin = Margin(
            t = 100
        ),
        hovermode = 'closest',
        annotations = Annotations([
                Annotation(
                    showarrow = False,
                    text = "Data source: <a href='#'>[1]</a>",
                    xref = 'paper',
                    yref = 'paper',
                    x = 0,
                    y = 0.1,
                    xanchor = 'left',
                    yanchor = 'bottom',
                    font = Font(
                        size = 14
                    )
                )
        ]),
    )
    
    data = Data([trace1, trace2])
    fig = Figure(data=data, layout=layout)
    
    py.iplot(fig, filename=graph_name)

<div style="text-align:center"><h1> RANDOM-WALK GRAPH </h1></div>

In [18]:
rw_path = "/home/chris/Scrivania/url2vec/dataset/depth-100k/seqLen-10/cs.illinois.eduRandomWalk.depth.100000.seqLen.10/"
rw_map_path = rw_path + "sequencesMapUrl.txt"
rw_seq_path = rw_path + "sequencesIDs.txt"


rw_sequence_map = get_sequence_map(rw_map_path)
rw_sequences = get_sequences(rw_seq_path, 1)

rw_graph = create_graph(rw_sequences, rw_sequence_map)
print(len(rw_graph.vs))
print(len(rw_graph.es))

760
16043


In [6]:
gplot(rw_graph, "random walk network")


Woah there! Look at all those points! Due to browser limitations, the Plotly SVG drawing functions have a hard time graphing more than 500k data points for line charts, or 40k points for other types of charts. Here are some suggestions:
(1) Use the `plotly.graph_objs.Scattergl` trace object to generate a WebGl graph.
(2) Trying using the image API to return an image instead of a graph URL
(3) Use matplotlib
(4) See if you can create your visualization with fewer data points



Estimated Draw Time Slow



The draw time for this plot will be slow for clients without much RAM.


<div style="text-align:center"><h1> RANDOM-WALK-FROM-HOMEPAGE GRAPH </h1></div>

In [19]:
rwfh_path = "/home/chris/Scrivania/url2vec/dataset/depth-100k/seqLen-10/cs.illinois.eduRandomWalkFromHomepage.depth.100000.seqLen.10/"
rwfh_map_path = rwfh_path + "sequencesMapUrl.txt"
rwfh_seq_path = rwfh_path + "sequencesIDs.txt"


rwfh_sequence_map = get_sequence_map(rwfh_map_path)
rwfh_sequences = get_sequences(rwfh_seq_path, 1)

rwfh_graph = create_graph(rwfh_sequences, rwfh_sequence_map)
print(len(rwfh_graph.vs))
print(len(rwfh_graph.es))

603
6917


In [73]:
gplot(rwfh_graph, "random walk from homepage network")

<div style="text-align:center"><h1> RANDOM-WALK-WITH-LISTS GRAPH </h1></div>

In [131]:
rwwl_path = "/home/chris/Scrivania/url2vec/dataset/depth-100k/seqLen-10/cs.illinois.eduRandomWalkLists.depth.100000.seqLen.10/"
rwwl_map_path = rwwl_path + "sequencesMapUrl.txt"
rwwl_seq_path = rwwl_path + "sequencesIDs.txt"


rwwl_sequence_map = get_sequence_map(rwwl_map_path)
rwwl_sequences = get_sequences(rwwl_seq_path, 1)

rwwl_graph = create_graph(rwwl_sequences, rwwl_sequence_map)
print(len(rwwl_graph.vs))
print(len(rwwl_graph.es))

908
10109


In [75]:
gplot(rwwl_graph, "random walk with lists network")

<div style="text-align:center"><h1> MANUALLY-COLORED-RWWL GRAPH </h1></div>

In [133]:
color_rwwl_path = rwwl_path + "sequencesMapUrl-manually-clusterized.txt"
rwwl_sequences_wc = get_sequences(rwwl_seq_path, 1)
rwwl_tuplist = get_sequence_tuple_list(color_rwwl_path)

rwwl_graph_wc = create_graph(rwwl_sequences_wc, rwwl_tuplist)
print(len(rwwl_graph_wc.vs))
print(len(rwwl_graph_wc.es))

908
10109


In [134]:
gplot(rwwl_graph_wc, "RWWL network - Manually colored")

<div style="text-align:center"><h1> CLUSTERING </h1></div>

In [105]:
vertex_dendogram = rwwl_graph.community_fastgreedy()
vertex_clustering = vertex_dendogram.as_clustering(6)

In [106]:
vertex_clustering.membership

[0,
 1,
 2,
 2,
 1,
 3,
 2,
 4,
 3,
 1,
 1,
 2,
 0,
 4,
 4,
 0,
 3,
 1,
 0,
 0,
 4,
 0,
 2,
 1,
 1,
 5,
 5,
 2,
 1,
 1,
 3,
 0,
 2,
 0,
 0,
 0,
 2,
 3,
 3,
 1,
 1,
 0,
 0,
 0,
 2,
 1,
 1,
 2,
 2,
 0,
 1,
 1,
 5,
 1,
 4,
 5,
 1,
 1,
 1,
 1,
 0,
 3,
 2,
 0,
 3,
 3,
 5,
 2,
 2,
 3,
 5,
 5,
 5,
 5,
 2,
 2,
 4,
 2,
 2,
 1,
 5,
 5,
 5,
 2,
 2,
 2,
 3,
 1,
 5,
 5,
 2,
 2,
 2,
 1,
 1,
 1,
 4,
 3,
 5,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 2,
 1,
 5,
 1,
 2,
 2,
 2,
 2,
 5,
 1,
 1,
 2,
 5,
 2,
 5,
 1,
 1,
 1,
 0,
 4,
 1,
 3,
 2,
 2,
 3,
 3,
 3,
 5,
 5,
 1,
 1,
 1,
 1,
 2,
 2,
 0,
 2,
 4,
 4,
 3,
 4,
 4,
 5,
 2,
 0,
 2,
 2,
 1,
 1,
 4,
 2,
 1,
 1,
 1,
 1,
 2,
 1,
 2,
 3,
 3,
 3,
 1,
 3,
 1,
 1,
 4,
 5,
 1,
 5,
 3,
 2,
 3,
 2,
 3,
 2,
 2,
 1,
 3,
 3,
 3,
 0,
 5,
 1,
 5,
 3,
 1,
 4,
 2,
 2,
 2,
 2,
 1,
 1,
 5,
 2,
 1,
 1,
 0,
 2,
 1,
 4,
 2,
 2,
 2,
 1,
 2,
 1,
 2,
 4,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 2,
 1,
 4,
 3,
 1,
 3,
 0,
 2,
 2,
 1,
 1,
 2,
 2,
 3,
 2,
 3,
 1,
 1,
 1,
 1,
 4,
 1,
 1,
 1,
 5,
 1,
 1,
 1,


In [107]:
print vertex_clustering.sizes()

[167, 308, 238, 56, 78, 61]


In [110]:
colors = ["#FF8F00", "#FFFFFF", "#FFFF00", "#00E5FF", "##76FF03", "#2979FF", "#F50057"]
for i in range(len(rwwl_graph.vs)):
    rwwl_graph.vs[i]["color"] = colors[vertex_clustering.membership[i]]
    # print rw_graph.vs[i]["name"], vertex_clustering.membership[i]

In [111]:
gplot(rwwl_graph, "rwwl colored")

In [72]:
cg = vertex_clustering.cluster_graph(None, None)
for v in cg.vs:
    v["name"] = v.index
gplot(cg, "cg")