diff --git a/build/buildgaimc.m b/build/buildgaimc.m
new file mode 100644
index 0000000..e2819cb
--- /dev/null
+++ b/build/buildgaimc.m
@@ -0,0 +1,87 @@
+%% build the gaimc distribution
+function buildgaimc(varargin)
+if ~isempty(strmatch('all',varargin))
+ buildlist = {'test','pages','zip'};
+else
+ buildlist = varargin;
+end
+tobuild = @(type,buidlist) ~isempty(strmatch(type,buildlist));
+gaimcversion = '1.0';
+mypath = fileparts(mfilename('fullpath'));
+oldpath = pwd;
+matlablpath=path;
+try
+ if tobuild('test',buildlist)
+ fprintf('Running tests...\n\n');
+ cd(mypath) % move to where we think we are
+ cd ../ % in main gaimc directory
+ addpath(pwd); % add it to the path
+ cd test % run tests
+ try
+ test_main
+ catch ME
+ fprintf('*************\n')
+ fprintf('Tests failed!\n');
+ fprintf('*************\n')
+ fprintf('\n');
+ fprintf('Build halted.\n');
+ fprintf('\n');
+ fprintf('Test error:\n');
+ rethrow(ME);
+ end
+ end
+
+ if tobuild('pages',buildlist)
+ fprintf('Building demos...\n\n');
+ cd(mypath) % move to where we think we are
+ cd ../ % in main gaimc directory
+ addpath(pwd); % add it to the path
+ cd demo
+ try
+ publish('demo');
+ publish('airports');
+ publish('performance_comparison_simple');
+ if tobuild('pages_fullperf')
+ publish('performance_comparison');
+ end
+ catch ME
+ fprintf('******************\n')
+ fprintf('Publishing failed!\n');
+ fprintf('******************\n')
+ fprintf('\n');
+ fprintf('Build halted.\n');
+ fprintf('\n');
+ fprintf('Test error:\n');
+ rethrow(ME);
+ end
+ end
+
+ if tobuild('zip',buildlist)
+ cd(mypath) % move to where we think we are
+ try
+ cd ../../
+ [status,result] = system(...
+ sprintf('zip -r gaimc/build/gaimc-%s.zip gaimc/*.m gaimc/demo gaimc/test gaimc/graphs',...
+ gaimcversion));
+ result
+ catch ME
+ fprintf('******************\n')
+ fprintf('Zipping failed!\n');
+ fprintf('******************\n')
+ fprintf('\n');
+ fprintf('Build halted.\n');
+ fprintf('\n');
+ fprintf('Error:\n');
+ rethrow(ME);
+ end
+ end
+
+catch ME
+ cd(oldpath); % move to where we think we are
+ path(matlabpath);
+ rethrow(ME)
+end
+
+cd(oldpath);
+path(matlabpath);
+
diff --git a/build/matlab_central_notes.txt b/build/matlab_central_notes.txt
new file mode 100644
index 0000000..bf4a4cb
--- /dev/null
+++ b/build/matlab_central_notes.txt
@@ -0,0 +1,35 @@
+Picture: minnesota graph (demo_08.png)
+
+Title : gaimc : Graph Algorithms In Matlab Code
+Summary (100 chars) :
+Efficient pure-Matlab implementations of graph algorithms to complement MatlabBGL's mex functions.
+Description (5000 chars) :
+
+While MatlabBGL uses the Boost Graph Library for efficient graph routines,
+gaimc implements everything in pure Matlab code. While the routines are
+slower, they aren't as slow as I initially thought. Since people often
+have problems getting MatlabBGL to compile on new versions of Matlab
+or on new architectures, this library is then a complement to MatlabBGL.
+
+See the published M-files for a few examples of the capabilities.
+
+Functions
+ depth first search (dfs)
+ breadth first search (bfs)
+ connected components (scomponents)
+ maximum weight bipartite matching (bipartite_matching)
+ Dijkstra's shortest paths (dijkstra)
+ Prim's minimum spanning tree (mst_prim)
+ clustering coefficients (clustercoeffs)
+ directed clustering coefficients (dirclustercoeffs)
+ core numbers (corenums)
+
+The project is maintained at github : http://github.com/dgleich/gaimc/tree/master
+
+Tags:
+
+graph, network, dijkstra, core numbers, prim, dfs, bfs, connected components,
+strongly connected components, clustering coefficients, directed clustering coefficients,
+directed network, directed graph, depth first search, breadth first search,
+shortest paths, minimum spanning tree, mst
+
diff --git a/demo/airports.m b/demo/airports.m
index 0f2aad4..af1970a 100644
--- a/demo/airports.m
+++ b/demo/airports.m
@@ -67,12 +67,10 @@
% That's the US, now we need to plot our data on top of it.
[X,Y] = gplot(T,xy); % get the information to reproduce a gplot
plotm(Y,X,'k.-','LineWidth',1.5); % plot the lines on the map
-%%
-% We need to clear the axes after the mapping toolbox
-clf;
+
%%
% Let's just look at the continential US too.
-figure; ax = usamap('conus');
+clf; ax = usamap('conus');
states = shaperead('usastatelo', 'UseGeoCoords', true, 'Selector',...
{@(name) ~any(strcmp(name,{'Alaska','Hawaii'})), 'Name'});
faceColors = makesymbolspec('Polygon',...
@@ -85,9 +83,6 @@
% One interesting aspect of this map is that major airline hubs (Chicago,
% New York, etc. are not well represented. One possible explaination is
% that they have larger delays than other regional airports.
-%
-% Clear the figure again before proceeding.
-clf;
%% Honolulu to St. Johns?
% Before, we saw that the lengthiest route was between St. John's and
diff --git a/demo/demo.m b/demo/demo.m
index 68fe5da..7778ae7 100644
--- a/demo/demo.m
+++ b/demo/demo.m
@@ -83,7 +83,7 @@
%%
% Load the example matrix from the Boost Graph Library
-load('graphs/dfs_example');
+load_gaimc_graph('dfs_example');
figure(1); graph_draw(A,xy,'labels',labels);
%%
@@ -99,7 +99,7 @@
%%
% Let's look at breadth first search too, using a different example.
-load('graphs/bfs_example');
+load_gaimc_graph('bfs_example');
figure(1); clf; graph_draw(A,xy,'labels',labels);
%%
@@ -161,7 +161,8 @@
%%
% Now, we just call MST and look at the result.
-T = mst_prim(A);
+% T = mst_prim(A);
+% This command means we can't run the demo, so it's commented out.
%%
% Oops, travel time isn't symmetric! Let's just pick the longest possible
@@ -220,6 +221,7 @@
%%
% We also have a largest_component function that makes it easy to just get
% the largest connected component.
+clf;
[Acc,f] = largest_component(A);
graph_draw(Acc,xy(f,:))
%%
@@ -236,7 +238,7 @@
%%
% Load a road network to use for statistical computations
-load('graphs/minnesota');
+load_gaimc_graph('minnesota');
gplot(A,xy);
%%
@@ -280,7 +282,7 @@
%%
% Load and convert the graph.
-load ('graphs/all_shortest_paths_example');
+load_gaimc_graph('all_shortest_paths_example');
A = spfun(@(x) x-min(min(A))+1,A); % remove the negative edges
As = convert_sparse(A);
%%
@@ -305,4 +307,4 @@
toc
%%
% And just to check, let's make sure the output is the same.
-isequal(D,D2)
\ No newline at end of file
+isequal(D,D2)
diff --git a/demo/html/airports.html b/demo/html/airports.html
new file mode 100644
index 0000000..c8b66e2
--- /dev/null
+++ b/demo/html/airports.html
@@ -0,0 +1,321 @@
+
+
+
+
+
+
+
+ The US airport network
+
+
+
+
+
+
The US airport network
+
+
THe North American airport network is an interesting graph to examine. The source for this data was a file on Brendan Frey's
+ affinity propagation website. A(i,j) is the negative travel time between two airports. Although the data didn't include
+ the airport locations, I used the Yahoo! Geocoding API to generate a latitude and longitude for each airport.
+
+
+
Contents
+
+
The data
load_gaimc_graph('airports');
+Plot a histogram for all route time estimates
[si, ti, rt] = find(A);
+hist(-rt,100);
+
Find the lengthiest route
[val,ind] = max(-rt)
+{labels{si(ind)} labels{ti(ind)}}
+
+val =
+
+ 2855
+
+
+ind =
+
+ 63478
+
+
+ans =
+
+ 'Honolulu, HI' 'St. Johns, NL'
+
+
Some of the routes include stop overs, so it's probable that is what we find in this case.
+
Graph analysis: connected?
+
One of the first questions about any graph should be if it's connected or not.
max(scomponents(A))
+
+ans =
+
+ 1
+
+
There is only one connected component, so the graph is connected.
+
Distance instead of time
+
Let's see how the edges correlate distance with estimated travel time.
[ai aj te] = find(A);
+de = distance(xy(ai,:), xy(aj,:));
+plot(de,-te,'.');
+xlabel('distance (arclength)'); ylabel('time (?)');
+
Wow! It's all over the place, but there is a lower bound. Some of these routes can include stop
+
Minimum spanning tree
+
This section repeats and extends some analysis in the overall gaimc demo. First, let's recompute the minimum spanning tree
+ based on travel time.
+
load_gaimc_graph('airports')
+A = -A;
+A = max(A,A');
+T = mst_prim(A);
+clf;
+gplot(T,xy);
+
+
+
+ax = worldmap('USA');
+load coast
+geoshow(ax, lat, long,...
+ 'DisplayType', 'polygon', 'FaceColor', [.45 .60 .30])
+states = shaperead('usastatelo', 'UseGeoCoords', true);
+faceColors = makesymbolspec('Polygon',...
+ {'INDEX', [1 numel(states)], 'FaceColor', polcmap(numel(states))});
+ geoshow(ax, states, 'DisplayType', 'polygon', 'SymbolSpec', faceColors)
+set(gcf,'Position', [ 52 234 929 702]);
+
That's the US, now we need to plot our data on top of it.
[X,Y] = gplot(T,xy);
+plotm(Y,X,'k.-','LineWidth',1.5);
+
Let's just look at the continential US too.
clf; ax = usamap('conus');
+states = shaperead('usastatelo', 'UseGeoCoords', true, 'Selector',...
+ {@(name) ~any(strcmp(name,{'Alaska','Hawaii'})), 'Name'});
+faceColors = makesymbolspec('Polygon',...
+ {'INDEX', [1 numel(states)], 'FaceColor', polcmap(numel(states))});
+geoshow(ax, states, 'DisplayType', 'polygon', 'SymbolSpec', faceColors)
+framem off; gridm off; mlabel off; plabel off
+set(gcf,'Position', [ 52 234 929 702]);
+plotm(Y,X,'k.-','LineWidth',1.5);
+
One interesting aspect of this map is that major airline hubs (Chicago, New York, etc. are not well represented. One possible
+ explaination is that they have larger delays than other regional airports.
+
+
Honolulu to St. Johns?
+
Before, we saw that the lengthiest route was between St. John's and Honolulu. But, does the network have a better path to
+ follow between these cities? Let's check using Dijkstra's shortest path algorithm!
+
+load_gaimc_graph('airports')
+A = -A;
+
+
+[si, ti, rt] = find(A);
+[val,ind] = max(rt);
+start = si(ind);
+dest = ti(ind);
+[d pred] = dijkstra(A,start);
+d(dest)
+
+ans =
+
+ 735
+
+
That value is considerably shorter than the direct time. How do we find this awesome route?
path =[]; u = dest; while (u ~= start) path=[u path]; u=pred(u); end
+fprintf('%s',labels{start});
+for i=path; fprintf(' --> %s', labels{i}); end, fprintf('\n');
+Honolulu, HI --> Chicago, IL --> Montreal, QC --> St. Johns, NL
+
All pairs shortest paths
+
At this point, the right thing to do would be to recompute each edge in the network using an all-pairs shortest path algorithm,
+ and then look at how distance correlates with time in that network. However, I haven't had time to implement that algorithm
+ yet.
+
+
Conclusion
+
Hopefully, you will agree that network algorithms are a powerful way to look at the relationships between airports!
+
+
+
+
+
\ No newline at end of file
diff --git a/demo/html/airports.png b/demo/html/airports.png
new file mode 100644
index 0000000..19847a6
Binary files /dev/null and b/demo/html/airports.png differ
diff --git a/demo/html/airports_01.png b/demo/html/airports_01.png
new file mode 100644
index 0000000..b88c23c
Binary files /dev/null and b/demo/html/airports_01.png differ
diff --git a/demo/html/airports_02.png b/demo/html/airports_02.png
new file mode 100644
index 0000000..6b650ab
Binary files /dev/null and b/demo/html/airports_02.png differ
diff --git a/demo/html/airports_03.png b/demo/html/airports_03.png
new file mode 100644
index 0000000..6d38c92
Binary files /dev/null and b/demo/html/airports_03.png differ
diff --git a/demo/html/airports_04.png b/demo/html/airports_04.png
new file mode 100644
index 0000000..3828376
Binary files /dev/null and b/demo/html/airports_04.png differ
diff --git a/demo/html/airports_05.png b/demo/html/airports_05.png
new file mode 100644
index 0000000..6112d3f
Binary files /dev/null and b/demo/html/airports_05.png differ
diff --git a/demo/html/demo.html b/demo/html/demo.html
new file mode 100644
index 0000000..05f6e22
--- /dev/null
+++ b/demo/html/demo.html
@@ -0,0 +1,744 @@
+
+
+
+
+
+
+
+ Demo of gaimc - 'Graph Algorithms In Matlab Code'
+
+
+
+
+
+
Demo of gaimc - 'Graph Algorithms In Matlab Code'
+
+
Matlab includes great algorithms to work with sparse matrices but does provide a reasonable set of algorithms to work with
+ sparse matrices as graph data structures. My other project -- MatlabBGL -- provides a high-performance solution to this problem
+ by directly interfacing the Matlab sparse matrix data structure with the Boost Graph Library. That library, however, suffers
+ from enormous complication because it must be compiled for each platform. The Boost Graph Library heavily uses advanced C++
+ features that impair easy portability between platforms. In contrast, the gaimc library is implemented in pure Matlab code,
+ making it completely portable.
+
+
The cost of the portability for this library is a 2-4x slowdown in the runtime of the algorithms as well as significantly
+ fewer algorithms to choose from.
+
+
+
Contents
+
+
Sparse matrices as graphs
+
To store the connectivity structure of the graph, gaimc uses the adjacency matrix of a graph.
+
A graph is represented by a set of vertices and a set of edges between the vertices. Often, we write
to denote the graph, the set of vertices, and the set of edges, respectively. In gaimc, like in my other package MatlabBGL,
+ we represent graphs with their adjacency matrices. This representation is handy in Matlab because Matlab is rather efficient
+ at working with the large sparse matrices that typically arise as adjacency matrices.
+
+
To convert from
to an adjacency matrix, we identify each vertex with a row of the matrix via a bijective map. The adjacency matrix is then
+ a
matrix called A. The entry A(i,j) = 1 for any edge between in
and 0 otherwise. Let's look at an example.
+
load_gaimc_graph('bfs_example');
+graph_draw(A,xy,'labels',labels)
+full(A)
+labels'
+
+ans =
+
+ 0 1 0 0 1 0 0 0
+ 1 0 0 0 0 1 0 0
+ 0 0 0 1 0 1 1 0
+ 0 0 1 0 0 0 0 1
+ 1 0 0 0 0 0 0 0
+ 0 1 1 0 0 0 1 0
+ 0 0 1 0 0 1 0 1
+ 0 0 0 1 0 0 1 0
+
+
+ans =
+
+ 'r' 's' 't' 'u' 'v' 'w' 'x' 'y'
+
+
This output means that vertex 'r' is row 1, vertex 's' is row 2 and because A(1,2) = 1, then there is an edge between them,
+ just like in the picture.
+
+
One funny property is that A(2,1) = 1 too! So we actually have to store each edge twice in the adjacency matrix. This might
+ seem wasteful, but its hard to avoid as I've learned while working on graph algorithms. So don't worry about it! It also
+ makes the generalization to directed graphs (below) easy.
+
+
For more information about the adjacency matrix representation of a graph, see a standard book on graph algorithms.
+
Weighted and directed graphs
+
Our previous case handled the situation for undirected graphs only. To encode weighted and directed graphs, we use weighted
+ and non-symmetric adjacency matrices.
+
+
For a weighted matrix, A(i,j) = distance between i and j for most of the algorithms in gaimc. But A(i,j) = 0 means there
+ is no edge, and so sometimes things can get a little tricky to get what you want.
+
+
For a directed graph, just set A(i,j) ~= A(j,i). The adjacency matrix won't be symmetric, but that's what you want!
+
To understand more, explore the examples or read up on adjacency matrices in graph theory books.
+
Loading helper
+
To make loading our sample graphs easy, gaimc defines it's own function to load graphs.
load_gaimc_graph('dfs_example');
+whos
+ Name Size Bytes Class Attributes
+
+ A 9x9 416 double sparse
+ Acc 5x5 176 double sparse
+ As 1x1 40792464 struct
+ At 50000x50000 20395704 double sparse
+ D 5x5 200 double
+ D2 5x5 200 double
+ T 456x456 18216 double sparse
+ X 2730x1 21840 double
+ Y 2730x1 21840 double
+ ai 1249731x1 9997848 double
+ aj 71959x1 575672 double
+ ans 1x8 912 cell
+ ati 1249731x1 9997848 double
+ ax 1x1 8 double
+ cc 9x1 72 double
+ cc1 50000x1 400000 double
+ cc2 50000x1 400000 double
+ cc3 50000x1 400000 double
+ cc4 50000x1 400000 double
+ ccfs 2642x1 21136 double
+ ci 1249731x1 9997848 double
+ cn 2642x1 21136 double
+ comp_results 6x4 192 double
+ cp 50001x1 400008 double
+ cs1 1x1 8 double
+ cs2 1x1 8 double
+ cs3 1x1 8 double
+ cs4 1x1 8 double
+ cwd 1x28 56 char
+ d 456x1 3648 double
+ d1 1024x1 8192 double
+ d2 1024x1 8192 double
+ d3 1024x1 8192 double
+ d4 1024x1 8192 double
+ de 71959x1 575672 double
+ dest 1x1 8 double
+ dirtail 1x10 20 char
+ dt 8x1 64 double
+ f 9x1 9 logical
+ faceColors 1x1 1904 struct
+ gi 1x1 8 double
+ graphdir 1x10 20 char
+ graphs 1x5 642 cell
+ i 1x1 8 double
+ ind 1x1 8 double
+ labels 9x1 1026 cell
+ lat 9865x1 78920 double
+ lax 1x1 8 double
+ long 9865x1 78920 double
+ mat_fast 1x1 8 double
+ mat_std 1x1 8 double
+ matlabbgldir 1x20 40 char
+ mex_fast 1x1 8 double
+ mex_std 1x1 8 double
+ n 1x1 8 double
+ nrep 1x1 8 double
+ ntests 1x1 8 double
+ path 1x3 24 double
+ pred 1x456 3648 double
+ rep 1x1 8 double
+ results 1x3 2140 struct
+ ri 1249731x1 9997848 double
+ rp 50001x1 400008 double
+ rst 1x1 8 double
+ rt 71959x1 575672 double
+ si 71959x1 575672 double
+ source 1x83 166 char
+ start 1x1 8 double
+ states 49x1 190442 struct
+ szi 1x1 8 double
+ szs 1x6 48 double
+ te 71959x1 575672 double
+ ti 1x1 8 double
+ u 1x1 8 double
+ v 1x1 8 double
+ val 1x1 8 double
+ xy 9x2 144 double
+
+
This helps make our examples work regardless of where the current directory lies.
+
Search algorithms
+
The two standard graph search algorithms are depth first search and breadth first search. This library implements both.
+
Load the example matrix from the Boost Graph Library
load_gaimc_graph('dfs_example');
+figure(1); graph_draw(A,xy,'labels',labels);
+
Run a depth first search. The output records the distance to the other vertices, except where the vertices are not reachable
+ starting from the first node A.
+
d=dfs(A,1)
+
+d =
+
+ 0
+ 1
+ 3
+ 3
+ 2
+ 4
+ -1
+ -1
+ -1
+
+
From this example, we see that vertices a-f are reachable from vertex a, but that verice g-i are not reachable. Given the
+ of the edges, this makes sense.
+
+
Let's look at breadth first search too, using a different example.
load_gaimc_graph('bfs_example');
+figure(1); clf; graph_draw(A,xy,'labels',labels);
+
The breadth first search algorithm records the distance from the starting vertex to each vertex it visits in breadth first
+ order. This means it visits all the vertices in order of their distance from the starting vertex. The d output records the
+ distance, and the dt output records the step of the algorithm when the breadth first search saw the node.
+
[d dt] = bfs(A,2);
+
+figure(1); clf; graph_draw(A,xy,'labels',num2str(dt));
+
Notice how the algorithm visits all vertices one edge away from the start vertex (0) before visiting those two edges away.
+
Shortest paths
+
In the previous two examples, the distance between vertices was equivalent to the number of edges. Some graphs, however,
+ have specific weights, such as the graph of flights between airports. We can use this information to build information about
+ the shortest path between two nodes in a network.
+
+
+load_gaimc_graph('airports')
+A = -A;
+lax=247; rst=355;
+
+[d pred] = dijkstra(A,lax);
+
+fprintf('Minimum time: %g\n',d(rst));
+
+fprintf('Path:\n');
+path =[]; u = rst; while (u ~= lax) path=[u path]; u=pred(u); end
+fprintf('%s',labels{lax});
+for i=path; fprintf(' --> %s', labels{i}); end, fprintf('\n');
+Minimum time: 244
+Path:
+Los Angeles, CA --> Minneapolis/St Paul, MN --> Rochester, MN
+
Minimum spanning trees
+
A minimum spanning tree is a set of edges from a graph that ...
+
This demo requires the mapping toolbox for maximum effect, but we'll do okay without it.
+
Our data comes from a graph Brendan Frey prepared for his affinity propagation clustering tool. For 456 cities in the US,
+ we have the mean travel time between airports in those cities, along with their latitude and longitude.
+
load_gaimc_graph('airports')
+For some reason, the data is stored with the negative travel time between cities. (I believe this is so that closer cities
+ have larger edges between them.) But for a minimum spanning tree, we want the actual travel time between cities.
+
A = -A;
+
Now, we just call MST and look at the result. T = mst_prim(A); This command means we can't run the demo, so it's commented
+ out.
+
+
Oops, travel time isn't symmetric! Let's just pick the longest possible time.
A = max(A,A');
+T = mst_prim(A);
+sum(sum(T))/2
+
+ans =
+
+ (1,1) 24550
+
+
Well, the total weight isn't that helpful, let's look at the data instead.
+
clf;
+gplot(T,xy);
+
Hey! That looks like the US! You can see regional airports and get some sense of the overall connectivity.
+
Connected components
+
The connected components of a network determine which parts of the network are reachable from other parts. One of your first
+ questions about any network should generally be: is it connected?
+
+
There are two types of connected components: components and strongly connected components. gaimc only implements an algorithm
+ for the latter case, but that's okay! It turns out it computes exactly the right thing for connected components as well.
+ The difference only occurs when the graph is undirected vs. directed.
+
load_gaimc_graph('dfs_example')
+graph_draw(A,xy)
+
This picture shows there are 3 strongly connected components and 2 connected components
+max(scomponents(A))
+
+ans =
+
+ 3
+
+
+max(scomponents(A|A'))
+
+ans =
+
+ 2
+
+
Let's look at the vertices in the strongly connected components
cc = scomponents(A)
+
+cc =
+
+ 2
+ 2
+ 2
+ 1
+ 2
+ 2
+ 3
+ 3
+ 3
+
+
The output tells us that vertices 1,2,3,5,6 are in one strong component, vertex 4 is it's own strong component, and vertices
+ 7,8,9 are in another one. Remember that a strong component is all the vertices mutually reachable from a given vertex. If
+ you start at vertex 4, you can't get anywhere else! That's why it is in a different component than vertices 1,2,3,5,6.
+
+
We also have a largest_component function that makes it easy to just get the largest connected component.
clf;
+[Acc,f] = largest_component(A);
+graph_draw(Acc,xy(f,:))
+
The filter variable f, tells us which vertices in the original graph made it into the largest strong component. We can just
+ apply that filter to the coordinates xy and reuse them for drawing the graph!
+
+
Statistics
+
Graph statistics are just measures that indicate a property of the graph at every vertex, or at every edges. Arguably the
+ simplest graph statistic would be the average vertex degree. Because such statistics are easy to compute with the adjaceny
+ matrix in Matlab, they do not have special functions in gaimc.
+
+
Load a road network to use for statistical computations
load_gaimc_graph('minnesota');
+gplot(A,xy);
+
Average vertex degree
d = sum(A,2);
+mean(d)
+
+ans =
+
+ (1,1) 2.5034
+
+
So the average number of roads at any intersection is 2.5. My guess is that many roads have artificial intersections in the
+ graph structure that do not correspond to real intersections. Try validating that hypothesis using the library!
+
+
Average clustering coefficients
ccfs = clustercoeffs(A);
+mean(ccfs)
+
+
+
+
+ans =
+
+ 0.0160
+
+
Average core numbers
cn = corenums(A);
+mean(cn)
+
+ans =
+
+ 1.9463
+
+
Efficient repetition
+
Every time a gaimc function runs, it converts the adjacency matrix into a set of compressed sparse row arrays. These arrays
+ yield efficient access to the edges of the graph starting at a particular vertex. For many function calls on the same graph,
+ this conversion process slows the algorithms. Hence, gaimc also accepts pre-converted input, in which case it skips it's
+ conversion.
+
+
Let's demonstrate how this works by calling Dijkstra's algorithm to compute the shortest paths between all vertices in the
+ graph. The Floyd Warshall algorithm computes these same quantities more efficiently, but that would just be one more algorithm
+ to implement and maintain.
+
+
Load and convert the graph.
load_gaimc_graph('all_shortest_paths_example');
+A = spfun(@(x) x-min(min(A))+1,A);
+As = convert_sparse(A);
+Now, we'll run Dijkstra's algorithm for every vertex and save the result On my 2GHz laptop, this takes 0.000485 seconds.
n = size(A,1);
+D = zeros(n,n);
+tic
+for i=1:n
+ D(i,:) = dijkstra(As,i);
+end
+toc
+
Elapsed time is 0.000304 seconds.
+
Let's try it without the conversion to see if we can notice the difference in speed. On my 2GHz laptop, this takes 0.001392
+ seconds.
+
D2 = zeros(n,n);
+tic
+for i=1:n
+ D2(i,:) = dijkstra(A,i);
+end
+toc
+
Elapsed time is 0.000541 seconds.
+
And just to check, let's make sure the output is the same.
isequal(D,D2)
+
+ans =
+
+ 1
+
+
+
+
+
+
\ No newline at end of file
diff --git a/demo/html/demo.png b/demo/html/demo.png
new file mode 100644
index 0000000..6f0c93a
Binary files /dev/null and b/demo/html/demo.png differ
diff --git a/demo/html/demo_01.png b/demo/html/demo_01.png
new file mode 100644
index 0000000..2dbcfe2
Binary files /dev/null and b/demo/html/demo_01.png differ
diff --git a/demo/html/demo_02.png b/demo/html/demo_02.png
new file mode 100644
index 0000000..4fd7aee
Binary files /dev/null and b/demo/html/demo_02.png differ
diff --git a/demo/html/demo_03.png b/demo/html/demo_03.png
new file mode 100644
index 0000000..9cad9bf
Binary files /dev/null and b/demo/html/demo_03.png differ
diff --git a/demo/html/demo_04.png b/demo/html/demo_04.png
new file mode 100644
index 0000000..5e602d1
Binary files /dev/null and b/demo/html/demo_04.png differ
diff --git a/demo/html/demo_05.png b/demo/html/demo_05.png
new file mode 100644
index 0000000..e0c2922
Binary files /dev/null and b/demo/html/demo_05.png differ
diff --git a/demo/html/demo_06.png b/demo/html/demo_06.png
new file mode 100644
index 0000000..e08d0ee
Binary files /dev/null and b/demo/html/demo_06.png differ
diff --git a/demo/html/demo_07.png b/demo/html/demo_07.png
new file mode 100644
index 0000000..d364e9f
Binary files /dev/null and b/demo/html/demo_07.png differ
diff --git a/demo/html/demo_08.png b/demo/html/demo_08.png
new file mode 100644
index 0000000..1612ed5
Binary files /dev/null and b/demo/html/demo_08.png differ
diff --git a/demo/html/demo_eq02910.png b/demo/html/demo_eq02910.png
new file mode 100644
index 0000000..d1fbfd3
Binary files /dev/null and b/demo/html/demo_eq02910.png differ
diff --git a/demo/html/demo_eq74756.png b/demo/html/demo_eq74756.png
new file mode 100644
index 0000000..473e14e
Binary files /dev/null and b/demo/html/demo_eq74756.png differ
diff --git a/demo/html/demo_eq85525.png b/demo/html/demo_eq85525.png
new file mode 100644
index 0000000..896d5f6
Binary files /dev/null and b/demo/html/demo_eq85525.png differ
diff --git a/demo/html/demo_eq91421.png b/demo/html/demo_eq91421.png
new file mode 100644
index 0000000..d1fbfd3
Binary files /dev/null and b/demo/html/demo_eq91421.png differ
diff --git a/demo/html/performance_comparison_simple.html b/demo/html/performance_comparison_simple.html
new file mode 100644
index 0000000..fbc95aa
--- /dev/null
+++ b/demo/html/performance_comparison_simple.html
@@ -0,0 +1,334 @@
+
+
+
+
+
+
+
+ Compare performance of gaimc to matlab_bgl
+
+
+
+
+
+
Compare performance of gaimc to matlab_bgl
+
+ While the gaimc library implements its graph routines in Matlab "m"-code, the matlab_bgl library uses graph algorithms from the Boost graph library in C++ through a mex interface. Folklore has it that Matlab code
+ with for loops like those required in the gaimc library is considerably slower. This example examines this lore and shows that gaimc is typically within a factor of 2-4 of the mex code for one function. The full performance_comparison suite evaluates the
+ rest, but it takes a while to run, so I only run it when I'm interested in a complete picture and can afford to run it overnight.
+
+
+
Contents
+
+
This demo is unlikely to work on your own computer. They depend on having the MatlabBGL routines in one spot.
+
Setup the environment
+
We need MatlabBGL on the path
graphdir = '../graphs/';
+matlabbgldir = '~/dev/matlab-bgl/4.0';
+try
+ addpath(matlabbgldir);
+ ci=components(sparse(ones(5)));
+catch
+ error('gaimc:performance_comparison','Matlab BGL is not working, halting...');
+end
+Warning: Duplicate directory name: /home/dgleich/dev/matlab-bgl/4.0.
+
Check to make sure we are in the correct directory
cwd = pwd; dirtail = ['gaimc' filesep 'demo'];
+if strcmp(cwd(end-length(dirtail)+1:end),dirtail) == 0
+ error('%s should be executed from %s\n',mfilename,dirtail);
+end
+initalize the results structure
results=[];
+mex_fast=0; mat_fast=0; mex_std=0; mat_std=0;
+
Connected components
+
To evaluate the performance of the connected components algorithm, we use sets of random graphs.
nrep=30;
+szs=[1 10 100 5000 10000 50000];
+comp_results=[mex_fast mat_fast mex_std mat_std];
+for szi=1:length(szs)
+ fprintf('\n%20s size=%-5i ', 'scomponents', szs(szi));
+
+ if szi==2, mex_fast=0; mat_fast=0; mex_std=0; mat_std=0; end
+ for rep=1:nrep
+ fprintf('\b\b\b\b'); fprintf(' %3i', rep);
+ A=sprand(szs(szi),szs(szi),25/szs(szi));
+ At=A'; [rp ci ai]=sparse_to_csr(A); As.rp=rp; As.ci=ci; As.ai=ai;
+ tic; cc1=components(A); mex_std=mex_std+toc;
+ tic; cc2=components(At,struct('istrans',1,'nocheck',1));
+ mex_fast=mex_fast+toc;
+ tic; cc3=scomponents(A); mat_std=mat_std+toc;
+ tic; cc4=scomponents(As); mat_fast=mat_fast+toc;
+ cs1=accumarray(cc1,1,[max(cc1) 1]);
+ cs2=accumarray(cc2,1,[max(cc2) 1]);
+ cs3=accumarray(cc3,1,[max(cc3) 1]);
+ cs4=accumarray(cc4,1,[max(cc4) 1]);
+ if any(cs1 ~= cs2) || any(cs2 ~= cs3) || any(cs2 ~= cs4)
+ error('gaimc:scomponents','incorrect results from scomponents');
+ end
+ end
+ comp_results(end+1,:) = [mex_fast mat_fast mex_std mat_std];
+end
+comp_results=diff(comp_results);
+results(end+1).name='scomponents';
+results(end).mex_fast = mex_fast;
+results(end).mat_fast = mat_fast;
+results(end).mex_std = mex_std;
+results(end).mat_std = mat_std;
+
+ scomponents size=1 30
+ scomponents size=10 30
+ scomponents size=100 30
+ scomponents size=5000 30
+ scomponents size=10000 30
+ scomponents size=50000 30
Dijkstra's algorithm
+
To evaluate the performance of Dijkstra's algorithm, we pick
graphs = {'clr-25-2', 'clr-24-1', 'cs-stanford', ...
+ 'minnesota', 'tapir'};
+nrep=30; ntests=100; mex_fast=0; mat_fast=0; mex_std=0; mat_std=0;
+for rep=1:nrep
+ for gi=1:length(graphs)
+ load([graphdir graphs{gi} '.mat']); n=size(A,1);
+ At=A'; [rp ci ai]=sparse_to_csr(A); As.rp=rp; As.ci=ci; As.ai=ai;
+ for ti=1:ntests
+ fprintf([repmat('\b',1,66) '%20s rep=%3i graph=%-20s trial=%4i'], ...
+ 'dijkstra',rep,graphs{gi},ti);
+ v=ceil(n*rand(1));
+ tic; d1=dijkstra_sp(A,v); mex_std=mex_std+toc;
+ tic; d2=dijkstra_sp(At,v,struct('istrans',1,'nocheck',1));
+ mex_fast=mex_fast+toc;
+ tic; d3=dijkstra(A,v); mat_std=mat_std+toc;
+ tic; d4=dijkstra(As,v); mat_fast=mat_fast+toc;
+ if any(d1 ~= d2) || any(d2 ~= d3) || any(d3 ~= d4)
+ error('gaimc:dijkstra','incorrect results from dijkstra');
+ end
+ end
+ end
+end
+fprintf('\n');
+results(end+1).name='dijkstra';
+results(end).mex_fast = mex_fast;
+results(end).mat_fast = mat_fast;
+results(end).mex_std = mex_std;
+results(end).mat_std = mat_std;
+ dijkstra rep= 30 graph=tapir trial= 100
+
Summarize the results
+
We are going to summarize the results in a bar plot based on the algorithm. Each algorithm is a single bar where the performance
+ of the mex code is 1.
+
nresults=length(results);
+Ystd = zeros(nresults,1);
+Yfast = zeros(nresults,1);
+for i=1:nresults
+ Ystd(i)=results(i).mat_std/results(i).mex_std;
+ Yfast(i)=results(i).mat_fast/results(i).mex_fast;
+end
+bar(1:nresults,[Ystd Yfast]); set(gca,'XTickLabel',{results.name});
+legend('Standard','Fast','Location','Northwest');
+
From this, we see that the connected component codes are about half the speed of the Matlab BGL functions and Dijkstra's is
+ about 1/4th the speed. This seems unideal, but the code is much more portable and flexible.
+
+
The difference between the Standard and Fast results is that the fast results eliminate all data translation for both gaimc
+ and MatlabBGL and are evaluating the actual algorithm implementation and not any of the data translation components.
+
+
+
+
+
+
\ No newline at end of file
diff --git a/demo/html/performance_comparison_simple.png b/demo/html/performance_comparison_simple.png
new file mode 100644
index 0000000..a2096d9
Binary files /dev/null and b/demo/html/performance_comparison_simple.png differ
diff --git a/demo/html/performance_comparison_simple_01.png b/demo/html/performance_comparison_simple_01.png
new file mode 100644
index 0000000..06386ec
Binary files /dev/null and b/demo/html/performance_comparison_simple_01.png differ
diff --git a/demo/performance_comparison.m b/demo/performance_comparison.m
index 63a8f52..f823fd5 100644
--- a/demo/performance_comparison.m
+++ b/demo/performance_comparison.m
@@ -6,6 +6,10 @@
% considerably slower. This example examines this lore and shows that
% |gaimc| is typically within a factor of 2-4 of the mex code.
+%%
+% *This demo is unlikely to work on your own computer.*
+% *They depend on having the MatlabBGL routines in one spot.*
+
%% Setup the environment
% We need MatlabBGL on the path
graphdir = '../graphs/';
@@ -103,9 +107,11 @@
%%
% Plot the data for connected components
-plot(szs, comp_results,'.-');
-legend('mex fast','mat fast','mex std','mat std','Location','Northwest');
-ylabel('time'); xlabel('graph size');
+% This plot isn't too meaningful, so I've omitted it.
+
+% plot(szs, comp_results,'.-');
+% legend('mex fast','mat fast','mex std','mat std','Location','Northwest');
+% ylabel('time'); xlabel('graph size');
%% Dijkstra's algorithm
% To evaluate the performance of Dijkstra's algorithm, we pick
@@ -237,7 +243,9 @@
%% Summarize the results
% We are going to summarize the results in a bar plot based on the
-% algorithm. Each algorithm is a single bar, where the performance of the
+% algorithm. Each algorithm is a single bar
+graphs = {'all_shortest_paths_example', 'clr-24-1', 'cs-stanford', ...
+ 'minnesota','tapir'};, where the performance of the
% mex code is 1.
nresults=length(results);
Ystd = zeros(nresults,1);
diff --git a/demo/performance_comparison_simple.m b/demo/performance_comparison_simple.m
new file mode 100644
index 0000000..54c8905
--- /dev/null
+++ b/demo/performance_comparison_simple.m
@@ -0,0 +1,131 @@
+%% Compare performance of gaimc to matlab_bgl
+% While the |gaimc| library implements its graph routines in Matlab
+% "m"-code, the |matlab_bgl| library uses graph algorithms from the Boost
+% graph library in C++ through a mex interface. Folklore has it that
+% Matlab code with for loops like those required in the |gaimc| library is
+% considerably slower. This example examines this lore and shows that
+% |gaimc| is typically within a factor of 2-4 of the mex code for one
+% function. The full performance_comparison suite evaluates the rest, but
+% it takes a while to run, so I only run it when I'm interested in a
+% complete picture and can afford to run it overnight.
+
+%%
+% *This demo is unlikely to work on your own computer.*
+% *They depend on having the MatlabBGL routines in one spot.*
+
+%% Setup the environment
+% We need MatlabBGL on the path
+graphdir = '../graphs/';
+matlabbgldir = '~/dev/matlab-bgl/4.0';
+try
+ addpath(matlabbgldir); % change this to your matlab_bgl path
+ ci=components(sparse(ones(5)));
+catch
+ error('gaimc:performance_comparison','Matlab BGL is not working, halting...');
+end
+
+%%
+% Check to make sure we are in the correct directory
+cwd = pwd; dirtail = ['gaimc' filesep 'demo'];
+if strcmp(cwd(end-length(dirtail)+1:end),dirtail) == 0
+ error('%s should be executed from %s\n',mfilename,dirtail);
+end
+
+%%
+% initalize the results structure
+results=[];
+mex_fast=0; mat_fast=0; mex_std=0; mat_std=0;
+
+%% Connected components
+% To evaluate the performance of the connected components algorithm, we use
+% sets of random graphs.
+nrep=30;
+szs=[1 10 100 5000 10000 50000];
+comp_results=[mex_fast mat_fast mex_std mat_std];
+for szi=1:length(szs)
+ fprintf('\n%20s size=%-5i ', 'scomponents', szs(szi));
+ % Matlab needs 1 iteration to compile the function
+ if szi==2, mex_fast=0; mat_fast=0; mex_std=0; mat_std=0; end
+ for rep=1:nrep
+ fprintf('\b\b\b\b'); fprintf(' %3i', rep);
+ A=sprand(szs(szi),szs(szi),25/szs(szi));
+ At=A'; [rp ci ai]=sparse_to_csr(A); As.rp=rp; As.ci=ci; As.ai=ai;
+ tic; cc1=components(A); mex_std=mex_std+toc;
+ tic; cc2=components(At,struct('istrans',1,'nocheck',1));
+ mex_fast=mex_fast+toc;
+ tic; cc3=scomponents(A); mat_std=mat_std+toc;
+ tic; cc4=scomponents(As); mat_fast=mat_fast+toc;
+ cs1=accumarray(cc1,1,[max(cc1) 1]);
+ cs2=accumarray(cc2,1,[max(cc2) 1]);
+ cs3=accumarray(cc3,1,[max(cc3) 1]);
+ cs4=accumarray(cc4,1,[max(cc4) 1]);
+ if any(cs1 ~= cs2) || any(cs2 ~= cs3) || any(cs2 ~= cs4)
+ error('gaimc:scomponents','incorrect results from scomponents');
+ end
+ end
+ comp_results(end+1,:) = [mex_fast mat_fast mex_std mat_std];
+end
+comp_results=diff(comp_results);
+results(end+1).name='scomponents';
+results(end).mex_fast = mex_fast;
+results(end).mat_fast = mat_fast;
+results(end).mex_std = mex_std;
+results(end).mat_std = mat_std;
+
+%% Dijkstra's algorithm
+% To evaluate the performance of Dijkstra's algorithm, we pick
+graphs = {'clr-25-2', 'clr-24-1', 'cs-stanford', ...
+ 'minnesota', 'tapir'};
+nrep=30; ntests=100; mex_fast=0; mat_fast=0; mex_std=0; mat_std=0;
+for rep=1:nrep
+ for gi=1:length(graphs)
+ load([graphdir graphs{gi} '.mat']); n=size(A,1);
+ At=A'; [rp ci ai]=sparse_to_csr(A); As.rp=rp; As.ci=ci; As.ai=ai;
+ for ti=1:ntests
+ fprintf([repmat('\b',1,66) '%20s rep=%3i graph=%-20s trial=%4i'], ...
+ 'dijkstra',rep,graphs{gi},ti);
+ v=ceil(n*rand(1));
+ tic; d1=dijkstra_sp(A,v); mex_std=mex_std+toc;
+ tic; d2=dijkstra_sp(At,v,struct('istrans',1,'nocheck',1));
+ mex_fast=mex_fast+toc;
+ tic; d3=dijkstra(A,v); mat_std=mat_std+toc;
+ tic; d4=dijkstra(As,v); mat_fast=mat_fast+toc;
+ if any(d1 ~= d2) || any(d2 ~= d3) || any(d3 ~= d4)
+ error('gaimc:dijkstra','incorrect results from dijkstra');
+ end
+ end
+ end
+end
+fprintf('\n');
+results(end+1).name='dijkstra';
+results(end).mex_fast = mex_fast;
+results(end).mat_fast = mat_fast;
+results(end).mex_std = mex_std;
+results(end).mat_std = mat_std;
+
+
+%% Summarize the results
+% We are going to summarize the results in a bar plot based on the
+% algorithm. Each algorithm is a single bar where the performance of the
+% mex code is 1.
+nresults=length(results);
+Ystd = zeros(nresults,1);
+Yfast = zeros(nresults,1);
+for i=1:nresults
+ Ystd(i)=results(i).mat_std/results(i).mex_std;
+ Yfast(i)=results(i).mat_fast/results(i).mex_fast;
+end
+bar(1:nresults,[Ystd Yfast]); set(gca,'XTickLabel',{results.name});
+legend('Standard','Fast','Location','Northwest');
+
+%%
+% From this, we see that the connected component codes are about half the
+% speed of the Matlab BGL functions and Dijkstra's is about 1/4th the
+% speed. This seems unideal, but the code is much more portable and
+% flexible.
+%
+% The difference between the Standard and Fast results is that the fast
+% results eliminate all data translation for both gaimc and MatlabBGL and
+% are evaluating the actual algorithm implementation and not any of the
+% data translation components.
+