In [60]:
from collections import defaultdict
import glob
import os

import pandas as pd


### Graphs

In [61]:
graph_dir = "/scratch/cluster/qduong/graphs/graph_properties_mtx/"

data = defaultdict(lambda: [])

for path in glob.glob(os.path.join(graph_dir, "*.OUT")):
    if "temporal_edges" in path or "nodeid" in path or "coord" in path:
        continue
    graph = os.path.basename(path).replace(".OUT", "")
    with open(path) as f:
        line = f.readline()
        # print(f"GRAPH {graph:35}: {line}", end="")
        assert(len(line.split()) == 8)

        n1 = int(line.split()[0])
        #n2 = int(line.split()[1])
        #is_square = bool(line.split()[2])
        num_edges = int(line.split()[3])
        avg_degree = float(line.split()[4])
        #avg_degree_2 = float(line.split()[5])
        sparsity = float(line.split()[6])
        num_cc = int(line.split()[7])

        data['Graph'].append(graph)
        data['n1'].append(n1)
        #data['n2'].append(n2)
        data['num_edges'].append(num_edges)
        data['avg_degree'].append(avg_degree)
        data['sparsity'].append(sparsity)
        data['num_cc'].append(num_cc)

df = pd.DataFrame(data).set_index('Graph').sort_index()
df

Unnamed: 0_level_0,n1,num_edges,avg_degree,sparsity,num_cc
Graph,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
amazon-2008.mtx,735323,5158388,7.015132,0.000954,1
amazon0601.mtx,403394,3387388,8.39722,0.002082,7
belgium_osm.mtx,1441295,1549970,1.075401,7.5e-05,1
citationCiteseer.mtx,268495,1156647,4.30789,0.001604,1
cnr-2000.mtx,325557,3216152,9.878921,0.003034,1
coAuthorsCiteseer.mtx,227320,814134,3.581445,0.001576,1
coAuthorsDBLP.mtx,299067,977676,3.269087,0.001093,1
coPapersCiteseer.mtx,434102,16036720,36.942285,0.00851,1
coPapersDBLP.mtx,540486,15245729,28.207445,0.005219,1
com-Youtube.mtx,1134890,2987624,2.632523,0.000232,1


## Graph type breakdown

In [62]:
citation_graphs = [
    'citationCiteseer.mtx', 
    'coAuthorsCiteseer.mtx', 
    'coAuthorsDBLP.mtx',
    'coPapersCiteseer.mtx', 
    'coPapersDBLP.mtx', 
    'dblp-2010.mtx',
]
road_graphs = [
    'belgium_osm.mtx', 
    'luxembourg_osm.mtx', 
    'netherlands_osm.mtx',
]
web_graphs = [
    'amazon-2008.mtx', 
    'amazon0601.mtx', 
    'cnr-2000.mtx', 
    'com-Youtube.mtx',
    #'com-Youtube_Communities_all.mtx', 
    #'com-Youtube_Communities_top5000.mtx',
    'eu-2005.mtx', 
    'flickr.mtx', 
    'in-2004.mtx', 
    'soc-LiveJournal1.mtx',
    'sx-stackoverflow.mtx', 
    'sx-stackoverflow_A2Q.mtx',
    'sx-stackoverflow_C2A.mtx', 
    'sx-stackoverflow_C2Q.mtx',
    'web-Google.mtx', 
    'wiki-topcats.mtx', 
    #'wiki-topcats_Categories.mtx',
    'wikipedia-20051105.mtx', 
    'wikipedia-20060925.mtx',
]

### Road graphs
- [`belgium_osm.mtx`](https://sparse.tamu.edu/DIMACS10/belgium_osm)
- [`luxembourg_osm.mtx`](https://sparse.tamu.edu/DIMACS10/luxembourg_osm)
- [`netherlands_osm.mtx`](https://sparse.tamu.edu/DIMACS10/netherlands_osm)

In [63]:
road_df = df.loc[road_graphs]
road_df

Unnamed: 0_level_0,n1,num_edges,avg_degree,sparsity,num_cc
Graph,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
belgium_osm.mtx,1441295,1549970,1.075401,7.5e-05,1
luxembourg_osm.mtx,114599,119666,1.044215,0.000911,1
netherlands_osm.mtx,2216688,2441238,1.1013,5e-05,1


### Web graphs
- [`cnr-2000.mtx`](https://sparse.tamu.edu/LAW/cnr-2000)
- [`com-Youtube.mtx`](https://sparse.tamu.edu/SNAP/com-Youtube)
- [`soc-LiveJournal1.mtx`](https://sparse.tamu.edu/SNAP/soc-LiveJournal1)
- [`sx-stackoverflow.mtx`](https://sparse.tamu.edu/SNAP/sx-stackoverflow)
- [`web-Google.mtx`](https://sparse.tamu.edu/SNAP/web-Google)
- [`wikipedia-20060925.mtx`](https://sparse.tamu.edu/Gleich/wikipedia-20060925)

In [72]:
web_df = df.loc[web_graphs]
web_df

Unnamed: 0_level_0,n1,num_edges,avg_degree,sparsity,num_cc
Graph,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
amazon-2008.mtx,735323,5158388,7.015132,0.000954,1
amazon0601.mtx,403394,3387388,8.39722,0.002082,7
cnr-2000.mtx,325557,3216152,9.878921,0.003034,1
com-Youtube.mtx,1134890,2987624,2.632523,0.000232,1
eu-2005.mtx,862664,19235140,22.297372,0.002585,1
flickr.mtx,820878,9837214,11.983771,0.00146,1
in-2004.mtx,1382908,16917053,12.232956,0.000885,96
soc-LiveJournal1.mtx,4847571,68993773,14.232648,0.000294,1876
sx-stackoverflow.mtx,2601977,36233450,13.925354,0.000535,23580
sx-stackoverflow_A2Q.mtx,2601977,16266395,6.251552,0.00024,45250


In [75]:
web_df.loc[[
    "cnr-2000.mtx", 
    "com-Youtube.mtx",
    "soc-LiveJournal1.mtx",
    "sx-stackoverflow.mtx",
    "web-Google.mtx",
    "wikipedia-20060925.mtx"
]]

Unnamed: 0_level_0,n1,num_edges,avg_degree,sparsity,num_cc
Graph,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
cnr-2000.mtx,325557,3216152,9.878921,0.003034,1
com-Youtube.mtx,1134890,2987624,2.632523,0.000232,1
soc-LiveJournal1.mtx,4847571,68993773,14.232648,0.000294,1876
sx-stackoverflow.mtx,2601977,36233450,13.925354,0.000535,23580
web-Google.mtx,916428,5105039,5.570584,0.000608,2746
wikipedia-20060925.mtx,2983494,37269096,12.491762,0.000419,1151


### Citation graphs
- [`coPapersCiteseer.mtx`](https://sparse.tamu.edu/DIMACS10/coPapersCiteseer)
- [`coAuthorsCiteseer.mtx`](https://sparse.tamu.edu/DIMACS10/coAuthorsCiteseer)
- [`dblp-2010.mtx`](https://sparse.tamu.edu/LAW/dblp-2010)

In [65]:
citation_df = df.loc[citation_graphs]
citation_df

Unnamed: 0_level_0,n1,num_edges,avg_degree,sparsity,num_cc
Graph,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
citationCiteseer.mtx,268495,1156647,4.30789,0.001604,1
coAuthorsCiteseer.mtx,227320,814134,3.581445,0.001576,1
coAuthorsDBLP.mtx,299067,977676,3.269087,0.001093,1
coPapersCiteseer.mtx,434102,16036720,36.942285,0.00851,1
coPapersDBLP.mtx,540486,15245729,28.207445,0.005219,1
dblp-2010.mtx,326186,807700,2.476195,0.000759,22954


### Summary:
- [`belgium_osm.mtx`](https://sparse.tamu.edu/DIMACS10/belgium_osm)
- [`cnr-2000.mtx`](https://sparse.tamu.edu/LAW/cnr-2000)
- [`coAuthorsCiteseer.mtx`](https://sparse.tamu.edu/DIMACS10/coAuthorsCiteseer)
- [`coPapersCiteseer.mtx`](https://sparse.tamu.edu/DIMACS10/coPapersCiteseer)
- [`com-Youtube.mtx`](https://sparse.tamu.edu/SNAP/com-Youtube)
- [`dblp-2010.mtx`](https://sparse.tamu.edu/LAW/dblp-2010)
- [`luxembourg_osm.mtx`](https://sparse.tamu.edu/DIMACS10/luxembourg_osm)
- [`netherlands_osm.mtx`](https://sparse.tamu.edu/DIMACS10/netherlands_osm)
- [`soc-LiveJournal1.mtx`](https://sparse.tamu.edu/SNAP/soc-LiveJournal1)
- [`sx-stackoverflow.mtx`](https://sparse.tamu.edu/SNAP/sx-stackoverflow)
- [`web-Google.mtx`](https://sparse.tamu.edu/SNAP/web-Google)
- [`wikipedia-20060925.mtx`](https://sparse.tamu.edu/Gleich/wikipedia-20060925)