Permalink
Browse files

Merge branch 'master' of git@github.com:dgleich/gaimc

Conflicts:

	sparse_to_csr.m
  • Loading branch information...
2 parents 346aaaa + 617635c commit 9bccc1064c47346c6dcd203f33cb24da6e25372d David Gleich committed May 16, 2009
View
@@ -0,0 +1,3 @@
+*~
+*.asv
+
View
@@ -2,7 +2,7 @@
% Graph Algorithms in Matlab Code (gaimc)
% Written by David Gleich
% Version 1.0 (beta)
-% 2008-04-21
+% 2008-2009
%=========================================
%
% Search algorithms
@@ -17,17 +17,34 @@
%
% Connected components
% scomponents - Compute strongly connected components
+% largest_component - Selects only the largest component
%
% Statistics
% clustercoeffs - Compute clustering coefficients
% dirclustercoeffs - Compute directed clustering coefficients
% corenums - Compute core numbers
%
+% Drawing
+% graph_draw - Draw an adjacency matrix (from Leon Peshkin)
+%
% Helper functions
% sparse_to_csr - Compressed sparse row arrays from a matrix
+% load_gaimc_graph - Loads a sample graph from the library
% David Gleich
-% Copyright, Stanford University, 2008
+% Copyright, Stanford University, 2008-2009
% History
% 2008-04-10: Initial version
+
+
+% TODO for release
+% Fix mlintrpt errors
+% Update copyright info everywhere
+% Implement bipartite matching
+
+
+% Future todos
+% Implement weighted core nums
+% More testing
+% Implement all pairs shortest paths with Floyd Warshall
View
2 bfs.m
@@ -11,6 +11,8 @@
% [...] = bfs(A,u,v) stops the bfs when it hits the vertex v
%
% Example:
+% load_gaimc_graph('bfs_example.mat') % use the dfs example from Boost
+% d = bfs(A,1)
%
% See also DFS
View
@@ -8,11 +8,14 @@
% control normalization and weighted computation. If normalized=0 or
% false, then the computation is not normalized by d*(d-1) in the
% unweighted case. If weighted=0 or false, then the weights of the matrix
-% A are ignored.
+% A are ignored. Either parameter will assume it's default value if you
+% specify an empty matrix.
%
% See also DIRCLUSTERCOEFFS
%
% Example:
+% load_gaimc_graph('clique-10');
+% cc = clustercoeffs(A) % they are all equal! as we expect in a clique
if ~exist('normalized','var') || isempty(normalized), normalized=true; end
if ~exist('weighted','var') || isempty(weighted), weighted=true; end
@@ -37,7 +40,7 @@
end
n=length(rp)-1;
-cc=zeros(n,1); ind=false(n,1); cache=zeros(n,1); degs=zeros(n,1);
+cc=zeros(n,1); ind=false(n,1); cache=zeros(n,1);
ew=1; ew2=1;
for v=1:n
for rpi=rp(v):rp(v+1)-1
View
@@ -0,0 +1,22 @@
+function As = convert_sparse(A)
+% CONVERT_SPARSE Convert a sparse matrix to the native gaimc representation
+%
+% As = convert_sparse(A) returns a struct with the three arrays defining
+% the compressed sparse row structure of A.
+%
+% Example:
+% load('graphs/all_shortest_paths_example')
+% As = convert_sparse(A)
+%
+% See also SPARSE_TO_CSR SPARSE
+
+% David Gleich
+% Copyright, Stanford University, 2008-2009
+
+% History
+% 2009-04-29: Initial coding
+
+[rp ci ai] = sparse_to_csr(A);
+As.rp = rp;
+As.ci = ci;
+As.ai = ai;
View
@@ -15,7 +15,7 @@
% Decomposition of Networks." Sept. 1 2002.
%
% Example:
-% load('graphs/cores_example.mat')
+% load_gaimc_graph('cores_example'); % the graph A has three components
% corenums(A)
%
View
@@ -0,0 +1,124 @@
+%% The US airport network
+% THe North American airport network is an interesting graph to examine.
+% The source for this data was a file on Brendan Frey's affinity
+% propagation website. A(i,j) is the negative travel time between two
+% airports. Although the data didn't include the airport locations, I used
+% the Yahoo! Geocoding API to generate a latitude and longitude for each
+% airport.
+
+%% The data
+load_gaimc_graph('airports');
+%%
+% Plot a histogram for all route time estimates
+[si, ti, rt] = find(A);
+hist(-rt,100); % times are stored as negative values
+
+%%
+% Find the lengthiest route
+[val,ind] = max(-rt)
+{labels{si(ind)} labels{ti(ind)}}
+
+%%
+% Some of the routes include stop overs, so it's probable that is what
+% we find in this case.
+
+%% Graph analysis: connected?
+% One of the first questions about any graph should be if it's connected or
+% not.
+
+max(scomponents(A))
+
+%%
+% There is only one connected component, so the graph is connected.
+
+%% Distance instead of time
+% Let's see how the edges correlate distance with estimated travel time.
+[ai aj te] = find(A);
+de = distance(xy(ai,:), xy(aj,:));
+plot(de,-te,'.');
+xlabel('distance (arclength)'); ylabel('time (?)');
+
+%%
+% Wow! It's all over the place, but there is a lower bound. Some of
+% these routes can include stop
+
+%% Minimum spanning tree
+% This section repeats and extends some analysis in the overall gaimc demo.
+% First, let's recompute the minimum spanning tree based on travel time.
+load_gaimc_graph('airports')
+A = -A; % we store the negative travel time
+A = max(A,A'); % travel time isn't symmetric
+T = mst_prim(A);
+clf;
+gplot(T,xy);
+
+
+% These next lines plot a map of the US with states colored.
+ax = worldmap('USA');
+load coast
+geoshow(ax, lat, long,...
+ 'DisplayType', 'polygon', 'FaceColor', [.45 .60 .30])
+states = shaperead('usastatelo', 'UseGeoCoords', true);
+faceColors = makesymbolspec('Polygon',...
+ {'INDEX', [1 numel(states)], 'FaceColor', polcmap(numel(states))});
+ geoshow(ax, states, 'DisplayType', 'polygon', 'SymbolSpec', faceColors)
+set(gcf,'Position', [ 52 234 929 702]);
+%%
+% That's the US, now we need to plot our data on top of it.
+[X,Y] = gplot(T,xy); % get the information to reproduce a gplot
+plotm(Y,X,'k.-','LineWidth',1.5); % plot the lines on the map
+%%
+% We need to clear the axes after the mapping toolbox
+clf;
+%%
+% Let's just look at the continential US too.
+figure; ax = usamap('conus');
+states = shaperead('usastatelo', 'UseGeoCoords', true, 'Selector',...
+ {@(name) ~any(strcmp(name,{'Alaska','Hawaii'})), 'Name'});
+faceColors = makesymbolspec('Polygon',...
+ {'INDEX', [1 numel(states)], 'FaceColor', polcmap(numel(states))});
+geoshow(ax, states, 'DisplayType', 'polygon', 'SymbolSpec', faceColors)
+framem off; gridm off; mlabel off; plabel off
+set(gcf,'Position', [ 52 234 929 702]);
+plotm(Y,X,'k.-','LineWidth',1.5); % plot the lines on the map
+%%
+% One interesting aspect of this map is that major airline hubs (Chicago,
+% New York, etc. are not well represented. One possible explaination is
+% that they have larger delays than other regional airports.
+%
+% Clear the figure again before proceeding.
+clf;
+
+%% Honolulu to St. Johns?
+% Before, we saw that the lengthiest route was between St. John's and
+% Honolulu. But, does the network have a better path to follow between
+% these cities? Let's check using Dijkstra's shortest path algorithm!
+
+% Reload the network to restore it.
+load_gaimc_graph('airports')
+A = -A;
+
+% find the longest route again
+[si, ti, rt] = find(A);
+[val,ind] = max(rt); % we've already negated above, so no need to redo it
+start = si(ind);
+dest = ti(ind);
+[d pred] = dijkstra(A,start); % compute the distance to everywhere from St. Johns
+d(dest)
+
+%%
+% That value is considerably shorter than the direct time. How do we find
+% this awesome route?
+path =[]; u = dest; while (u ~= start) path=[u path]; u=pred(u); end
+fprintf('%s',labels{start});
+for i=path; fprintf(' --> %s', labels{i}); end, fprintf('\n');
+
+%% All pairs shortest paths
+% At this point, the right thing to do would be to recompute each edge in
+% the network using an all-pairs shortest path algorithm, and then look at
+% how distance correlates with time in that network. However, I haven't
+% had time to implement that algorithm yet.
+
+%% Conclusion
+% Hopefully, you will agree that network algorithms are a powerful way to
+% look at the relationships between airports!
Oops, something went wrong.

0 comments on commit 9bccc10

Please sign in to comment.