In [2]:
from julia.api import Julia
jl = Julia(compiled_modules=False)

In [3]:
import os
import networkx as nx
import pandas as pd
import random
import numpy as np
from julia import Main
import matplotlib.pyplot as plt

In [58]:
class DataSets:
    base_path = 'data/'
    CORA =     {'path': 'cora/cora.cites', 'sep': '\t', 'name': 'Cora'}  # ~ 5K edges 2K nodes
    FACEBOOK = {'path': 'facebook/facebook_combined.txt', 'sep': ' ', 'name': 'Facebook'} # ~88K edges 4K nodes
    ENRON_EMAILS = {'path': 'enron/email-Enron.txt', 'sep': '\t', 'name': 'Enron Emails'} # ~ 184K edges 33K nodes
    GRQC = {'path': 'grqc/ca-GrQc.txt', 'sep': '\t', 'name': 'General Relativity & Quantum Cosmology collab network'}

    # returns an networkx graph object representing the dataset
    # if lcc is true, it returns only largest connected component
    @classmethod
    def get_undirected_networkx_graph(cls, dataset, lcc=True):
        path = cls.base_path + dataset['path']
        separator = dataset['sep']
        edgelist = pd.read_csv(path, sep=separator, names=['target', 'source'], comment='#')
        G = nx.from_pandas_edgelist(edgelist)
        if lcc == True:
            gs = [G.subgraph(c) for c in nx.connected_components(G)]
            G = max(gs, key=len)
        return G
    
    @classmethod
    def print_graph_properties(cls, dataset):
        path = cls.base_path + dataset['path']
        separator = dataset['sep']
        edgelist = pd.read_csv(path, sep=separator, names=['target', 'source'], comment='#')
        G = nx.from_pandas_edgelist(edgelist)
        print("--", dataset['name'], "--")
        print("FULL GRAPH - nodes:", G.number_of_nodes(), "edges:", G.number_of_edges())
        gs = [G.subgraph(c) for c in nx.connected_components(G)]
        G = max(gs, key=len)
        print("LCC -        nodes:", G.number_of_nodes(), "edges:", G.number_of_edges(), '\n')

In [63]:
DataSets.print_graph_properties(DataSets.GRQC)

-- General Relativity & Quantum Cosmology collab network --
FULL GRAPH - nodes: 5242 edges: 14496
LCC -        nodes: 4158 edges: 13428 



In [75]:
# if node numbers start at 0, add one to every node (Laplacians.sparsify doesn't like it)
G = DataSets.get_undirected_networkx_graph(DataSets.GRQC, False)

Main.eval("@eval Main import Base.MainInclude: include")
Main.include("script.jl")
    
edgelist = list(G.edges())
source_nodes = [x[0] for x in edgelist]
target_nodes = [x[1] for x in edgelist]

Gsparse = Main.sparsifyGraph(source_nodes, target_nodes)
Gsparse

RuntimeError: <PyCall.jlwrap (in a Julia function called from Python)
JULIA: PosDefException: matrix is not positive definite; Cholesky factorization failed.
Stacktrace:
 [1] #cholesky!#6 at /Users/julia/buildbot/worker/package_macos64/build/usr/share/julia/stdlib/v1.5/SuiteSparse/src/cholmod.jl:1308 [inlined]
 [2] cholesky(::SuiteSparse.CHOLMOD.Sparse{Float64}; shift::Float64, check::Bool, perm::Nothing) at /Users/julia/buildbot/worker/package_macos64/build/usr/share/julia/stdlib/v1.5/SuiteSparse/src/cholmod.jl:1346
 [3] cholesky at /Users/julia/buildbot/worker/package_macos64/build/usr/share/julia/stdlib/v1.5/SuiteSparse/src/cholmod.jl:1339 [inlined]
 [4] #cholesky#9 at /Users/julia/buildbot/worker/package_macos64/build/usr/share/julia/stdlib/v1.5/SuiteSparse/src/cholmod.jl:1458 [inlined]
 [5] cholesky at /Users/julia/buildbot/worker/package_macos64/build/usr/share/julia/stdlib/v1.5/SuiteSparse/src/cholmod.jl:1458 [inlined]
 [6] #130 at /Users/andrewkawabata/.julia/packages/Laplacians/CD1YV/src/solverInterface.jl:79 [inlined]
 [7] wrapInterface(::Laplacians.var"#130#131", ::SparseMatrixCSC{Float64,Int64}; tol::Int64, maxits::Float64, maxtime::Float64, verbose::Bool, pcgIts::Array{Int64,1}, params::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /Users/andrewkawabata/.julia/packages/Laplacians/CD1YV/src/solverInterface.jl:36
 [8] #125#126 at /Users/andrewkawabata/.julia/packages/Laplacians/CD1YV/src/solverInterface.jl:64 [inlined]
 [9] lapWrapConnected(::Function, ::SparseMatrixCSC{Float64,Int64}; tol::Float64, maxits::Float64, maxtime::Float64, verbose::Bool, pcgIts::Array{Int64,1}, params::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at /Users/andrewkawabata/.julia/packages/Laplacians/CD1YV/src/solverInterface.jl:120
 [10] lapWrapConnected(::Function, ::SparseMatrixCSC{Float64,Int64}) at /Users/andrewkawabata/.julia/packages/Laplacians/CD1YV/src/solverInterface.jl:113
 [11] lapWrapComponents(::Function, ::SparseMatrixCSC{Float64,Int64}; tol::Float64, maxits::Int64, maxtime::Float64, verbose::Bool, pcgIts::Array{Int64,1}, params::Base.Iterators.Pairs{Symbol,ApproxCholParams,Tuple{Symbol},NamedTuple{(:params,),Tuple{ApproxCholParams}}}) at /Users/andrewkawabata/.julia/packages/Laplacians/CD1YV/src/solverInterface.jl:243
 [12] #approxchol_lap#216 at /Users/andrewkawabata/.julia/packages/Laplacians/CD1YV/src/approxChol.jl:756 [inlined]
 [13] sparsify(::SparseMatrixCSC{Float64,Int64}; ep::Int64, matrixConcConst::Float64, JLfac::Float64) at /Users/andrewkawabata/.julia/packages/Laplacians/CD1YV/src/sparsify.jl:21
 [14] sparsifyGraph(::Array{Int64,1}, ::Array{Int64,1}) at /Users/andrewkawabata/Desktop/notebooks/Research/experiment-1/script.jl:6
 [15] invokelatest(::Any, ::Any, ::Vararg{Any,N} where N; kwargs::Base.Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}) at ./essentials.jl:710
 [16] invokelatest(::Any, ::Any, ::Vararg{Any,N} where N) at ./essentials.jl:709
 [17] _pyjlwrap_call(::Function, ::Ptr{PyCall.PyObject_struct}, ::Ptr{PyCall.PyObject_struct}) at /Users/andrewkawabata/.julia/packages/PyCall/tqyST/src/callback.jl:28
 [18] pyjlwrap_call(::Ptr{PyCall.PyObject_struct}, ::Ptr{PyCall.PyObject_struct}, ::Ptr{PyCall.PyObject_struct}) at /Users/andrewkawabata/.julia/packages/PyCall/tqyST/src/callback.jl:44>

In [48]:
G = DataSets.get_undirected_networkx_graph(DataSets.FACEBOOK)
el = list(G.edges())

In [None]:
'''
def sparsify_spectral(G, ep):
    Main.eval("@eval Main import Base.MainInclude: include")
    Main.include("script.jl")
    
    edgelist = list(G.edges())
    source_nodes = [x[0] for x in edgelist]
    target_nodes = [x[1] for x in edgelist]
    
    Gsparse = Main.sparsifyGraph(source_nodes,target_nodes)
    return Gsparse'''