Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Bring back index/about pages; add an indexing daemon

  • Loading branch information...
commit d4ccc12aa31941dd6aa7e65349af7a0a09211a13 1 parent 3f8998a
@gdb authored
View
50 bin/index-all
@@ -0,0 +1,50 @@
+#!/usr/bin/env ruby
+require 'optparse'
+require 'fileutils'
+
+require File.join(File.dirname(__FILE__), '../lib/anygit')
+
+def main
+ options = {}
+ optparse = OptionParser.new do |opts|
+ opts.banner = "Usage: #{$0} [options] <url>"
+
+ opts.on('-h', '--help', 'Display this message') do
+ puts opts
+ exit(1)
+ end
+
+ opts.on('-d', '--dont-delete', 'Leave dumps lying around') do
+ options[:dont_delete] = true
+ end
+
+ opts.on('-l', '--loop-forever', 'Loop') do
+ options[:loop_forever] = true
+ end
+ end
+ optparse.parse!
+
+ if ARGV.length != 0
+ puts optparse
+ return 1
+ end
+
+ Anygit::Model.init
+
+ while true
+ Anygit::Indexer.run_all(:dont_delete => options[:dont_delete])
+ break unless options[:loop_forever]
+ # Really want some notification from the web interface, but meh.
+ sleep(5)
+ end
+ return 0
+end
+
+if $0 == __FILE__
+ ret = main
+ begin
+ exit(ret)
+ rescue TypeError
+ exit(0)
+ end
+end
View
15 lib/anygit/indexer.rb
@@ -2,18 +2,27 @@ module Anygit::Indexer
TMPDIR_BASE = File.join(File.dirname(__FILE__), '../../tmp')
def self.run(repo, opts={})
+ Anygit.log.info("About to index #{repo}")
+
fp = FetchPackfile.new(repo, opts)
fp.run do |repo, path|
ip = IndexPackfile.new(repo, path, opts)
ip.run
end
- repo.needs_index = 'false'
+ repo.index_state = 'indexed'
+ repo.been_indexed = 'true'
repo.save
end
def self.run_all(opts={})
- Anygit::Model::Repo.all(:needs_index => 'true').each do |repo|
- run(repo, opts)
+ Anygit::Model::Repo.all(:index_state => 'pending').each do |repo|
+ begin
+ run(repo, opts)
+ rescue StandardError => e
+ Anygit.log.error("Error indexing #{repo}: #{e} (#{e.class})\n #{e.backtrace.join("\n ")}")
+ repo.index_state = 'failed'
+ repo.save
+ end
end
end
View
13 lib/anygit/model.rb
@@ -29,8 +29,9 @@ class Repo
# TODO: support aliasing
property :url, String, :length => 3000
property :template, String, :length => 3000
- # Actually just true/false
- property :needs_index, String, :index => true, :default => 'false'
+ property :index_state, String, :default => 'unstarted'
+ # Should really just be a bool
+ property :been_indexed, String, :index => true, :default => 'false'
property :created_at, DateTime
property :fetched_at, DateTime
@@ -56,7 +57,7 @@ class GitObject
include DataMapper::Resource
property :sha1, String, SHA1_KEY_OPTS
- property :type, String
+ property :type, String, :index => true
property :created_at, DateTime
def hex_sha1
@@ -71,6 +72,12 @@ class ObjectRepo
property :sha1, String, SHA1_KEY_OPTS
belongs_to :repo, :key => true
property :created_at, DateTime
+
+ def self.most_popular(limit)
+ or_name = Util.validate_table_name(ObjectRepo.storage_name)
+ r_name = Util.validate_table_name(Repo.storage_name)
+ repository(:default).adapter.select("SELECT b.url, a.repo_id, COUNT(*) as count FROM #{or_name} AS a JOIN #{r_name} AS b ON a.repo_id = b.id GROUP BY repo_id ORDER BY count DESC LIMIT ?", limit)
+ end
end
end
end
View
91 lib/anygit/web_interface.rb
@@ -7,30 +7,73 @@ class WebInterface < Sinatra::Base
LIMIT = 10
set :root, Pathname.new(File.join(File.dirname(__FILE__), '../..')).realpath
- get '/q/?' do
- @limit = LIMIT
- @collection = Model::GitObject.all(:limit => @limit)
- @sha1 = nil
- erb :q_many
+ get '/' do
+ index
+ end
+
+ post '/repos' do
+ query = {:url => params[:url]}
+
+ if r = Model::Repo.first(query)
+ if r.index_state == 'pending'
+ @flash = "Repo at #{r.url} is already marked for indexing; have no fear--we will get to it."
+ elsif r.index_state == 'indexed'
+ @flash = "Marking repo at #{r.url} for reindexing"
+ elsif r.index_state == 'failed'
+ @flash = "Last attempt to index repo at #{r.url} failed, but I'll try again."
+ else
+ raise "Invalid indexing state: #{r.index_state} for #{r}"
+ end
+ else
+ r = Model::Repo.create(query)
+ @flash = "Adding repo at #{r.url} to the index"
+ end
+
+ r.index_state = 'pending'
+ r.save
+
+ # Too lazy to actually add the flash
+ index
+ end
+
+ def index
+ # Could do more efficiently via group_by I think.
+ @repo_count = Model::Repo.count(:been_indexed => 'true')
+ @commit_count = Model::GitObject.count(:type => 'commit')
+ @tree_count = Model::GitObject.count(:type => 'tree')
+ @blob_count = Model::GitObject.count(:type => 'blob')
+ @tag_count = Model::GitObject.count(:type => 'tag')
+
+ @largest_repos = Model::ObjectRepo.most_popular(5)
+ erb :index
end
- get '/q/:sha1' do
+ get '/about' do
+ erb :about
+ end
+
+ get '/q/?:sha1?' do
@limit = LIMIT
- @sha1 = sha1 = params[:sha1]
+ @sha1 = sha1 = params[:sha1] || ''
if sha1.length > 40
halt 400, "SHA1s must be no more than 40 characters"
- elsif sha1 !~ /^[a-f0-9]+$/
+ elsif sha1 !~ /^[a-f0-9]*$/
halt 400, "SHA1s must be in hex"
end
- binary_sha1 = Util.sha1_to_bytes(sha1)
- if upper = upper_bound(binary_sha1)
- Anygit.log.info("Querying for objects bounded between [#{binary_sha1.inspect}, #{upper.inspect})")
- @collection = Model::GitObject.all(:sha1.gte => binary_sha1, :sha1.lt => upper, :limit => @limit)
+ if sha1 == ''
+ Anygit.log.info("Querying for all objects")
+ @collection = Model::GitObject.all(:limit => @limit)
else
- Anygit.log.info("Querying for objects bounded below by #{binary_sha1.inspect}")
- @collection = Model::GitObject.all(:sha1.gte => binary_sha1, :limit => @limit)
+ binary_sha1 = Util.sha1_to_bytes(sha1)
+ if upper = upper_bound(binary_sha1)
+ Anygit.log.info("Querying for objects bounded between [#{binary_sha1.inspect}, #{upper.inspect})")
+ @collection = Model::GitObject.all(:sha1.gte => binary_sha1, :sha1.lt => upper, :limit => @limit)
+ else
+ Anygit.log.info("Querying for objects bounded below by #{binary_sha1.inspect}")
+ @collection = Model::GitObject.all(:sha1.gte => binary_sha1, :limit => @limit)
+ end
end
if @collection.count == 1
@@ -43,26 +86,6 @@ class WebInterface < Sinatra::Base
dest = repo.webview(object.type, object.hex_sha1)
Anygit.log.info("Redirecting to #{dest}")
redirect(dest)
-
-# Might be useful if we start indexing all the arrows
-#
-# op_name = Util.validate_table_name(Anygit::Model::ObjectPointer.storage_name)
-# go_name = Util.validate_table_name(Anygit::Model::GitObject.storage_name)
-# # TODO: paginate, filter by type
-# @raw_pointers = repository(:default).adapter.select("
-# SELECT b.sha1, b.type
-# FROM #{op_name} AS a LEFT JOIN #{go_name} AS b
-# ON a.source = b.sha1
-# WHERE a.target = ?
-# LIMIT ?
-# ", object.sha1, @limit)
-# @git_objects = @raw_pointers.map do |pointer|
-# go = Model::GitObject.new
-# go.sha1 = pointer.sha1
-# go.type = pointer.type
-# go
-# end
-# erb :q_one
else
erb :q_many
end
View
81 views/about.erb
@@ -0,0 +1,81 @@
+<h2> What is anygit? </h2>
+
+<p> <b>anygit</b> is a project of the <a
+href="http://sipb.mit.edu">Student Information Processing Board</a>,
+MIT's student computing group. We seek to index the world's git
+repositories and provide our indexed data to the general public.
+Visit <a href="http://anyg.it/">http://anyg.it</a> to run a query.</p>
+
+<h2> What does it do? </h2>
+
+<p> Think of <b>anygit</b> as a search engine for <a
+href="http://git-scm.com/">git</a> repositories. We take the git
+object model and turn it on its head. </p>
+
+<p> Here's how it works: you give us a SHA1 (or a SHA1 prefix) of a
+git object. <b>anygit</b> will then consult our
+painstakingly-compiled index to provide you with information about
+in which repositories and other git objects the requested object appears. </p>
+
+In particular,
+
+<ul>
+<li>
+For any requested object, <b>anygit</b> provides the list of
+repositories that object appears in, as well as any tags that may
+point to that object.
+</li>
+<li>
+For a blob, <b>anygit</b> will provide the trees that the
+blob appears in, as well as its filename in that tree.
+</li>
+<li>
+For a tree, we will spit back the set of supertrees of that tree (as
+well as any associated filenames) and any commits that point to that
+tree.
+</li>
+</ul>
+
+<h2> Where did this marvelous invention come from? </h2>
+
+<p> <b>anygit</b> was the brainchild of <a
+href="http://ebroder.net">Evan Broder</a>. In his extensive usage of
+git as a developer, Evan often found himself wondering how far his
+commits were traveling. He decided to put into place a project to
+track this. So one fateful night, he gathered around him a group of
+MIT students, and they all swore they would not rest until his vision
+became a reality. </p>
+
+<p> The chief developer for the project is <a
+href="http://gregbrockman.com">Greg Brockman</a>. Other contributors
+include <a href="http://web.mit.edu/davidben/www/">David
+Benjamin</a>, <a href="http://web.mit.edu/lizdenys/www/">Liz Denys</a>,
+<a href="http://nelhage.com">Nelson Elhage</a>, <b>Alan Huang</b>,
+<a href="http://web.mit.edu/price">Greg Price</a>, and
+<a href="http://www.comclub.org/~quentins/about">Quentin
+Smith</a>. </p>
+
+<h2> What are the intended use cases? </h2>
+
+<p> Dunno. Email us at <a
+href="mailto:anygit@mit.edu">anygit@mit.edu</a> if you have any great
+ideas. </p>
+
+<h2> Where can I get the code for anygit? </h2>
+
+<p> The code for anygit is freely available (under the MIT license) on
+<a href="http://github.com/ebroder/anygit">GitHub</a>. </p>
+
+<h2> I have no idea what's going on here, how can I learn more about git? </h2>
+
+<p> There are many excellent resources for git available for free on the internets: </p>
+
+<ul>
+<li><a href="http://blog.nelhage.com/2010/01/git-in-pictures/">Git in pictures</a></li>
+<li><a href="http://eagain.net/articles/git-for-computer-scientists/">Git for computer scientists</a></li>
+<li> <a href="http://marklodato.github.com/visual-git-guide/">Visual git guide</a> </li>
+</ul>
+
+<h2> Cool! I'd like to make some queries now. </h2>
+
+<p> Be our guest. Make a query from our <a href="http://anyg.it/">homepage</a>.</p>
View
84 views/index.erb
@@ -0,0 +1,84 @@
+<script type="text/javascript">
+ function setfocus(elt) { document.getElementById(elt).focus(); }
+
+ function expand() {
+ urlbox = document.getElementById('url');
+ text = urlbox.value;
+ parent = urlbox.parentNode;
+ parent.removeChild(urlbox);
+ if(urlbox.nodeName.toLowerCase() == 'input') {
+ urlbox = document.createElement('textarea');
+ urlbox.setAttribute('cols', '20');
+ urlbox.setAttribute('rows', '5');
+ document.getElementById('expand').innerHTML = '&#9650;';
+ } else {
+ urlbox = document.createElement('input');
+ urlbox.setAttribute('type', 'text');
+ urlbox.setAttribute('size', '20');
+ document.getElementById('expand').innerHTML = '&#9660;';
+ }
+ urlbox.setAttribute('id', 'url');
+ urlbox.setAttribute('name', 'url');
+ urlbox.setAttribute('title', 'URL of the repository');
+ urlbox.value = text;
+ parent.insertBefore(urlbox, document.getElementById('submit').previousSibling);
+ }
+</script>
+
+<table><tr><td>
+
+<div class="box" id="current">
+ <div class="info"><h2>Currently indexed:</h2></div>
+ <div id="repo"><b><%= @repo_count %></b> repos
+ <img src="/static/git-repo.png" onclick="setfocus('url');"></div>
+ <div id="blob"><b><%= @blob_count %></b> blobs
+ <img src="/static/git-blob.png" onclick="setfocus('sha1');"></div>
+ <div id="tree"><b><%= @tree_count %></b> trees
+ <img src="/static/git-tree.png" onclick="setfocus('sha1');"></div>
+ <div id="commit"><b><%= @commit_count %></b> commits
+ <img src="/static/git-commit.png" onclick="setfocus('sha1');"></div>
+ <div id="tag"><b><%= @tag_count %></b> tags
+ <img src="/static/git-tag.png" onclick="setfocus('sha1');"></div>
+</div>
+
+</td><td>
+
+<div class="box" id="request">
+ <div class="info"><h2>Request indexing:</h2></div>
+ <div id="add">
+ <p>Would you like your repository to be added to the index? Enter the Git URL here.</p>
+ <form action="/repos" method="POST">
+ <p> <label for="url">URL of the repository</label> <input type="text" name="url" id="url" /> </p>
+ <p> <input type="submit" value="Submit" />
+ </form>
+ </div>
+</div>
+
+<div class="box" id="search">
+ <div class="info"><h2>Object lookup:</h2></div>
+ <div id="sha">
+ <p>You can query for any Git object by going to <b>http://anyg.it/q/$sha1prefix</b>.</p>
+ <p>Alternatively, just enter your SHA1 prefix in the textfield:</p>
+ <form action="/q" method="GET">
+ <p> <label for="sha1">SHA-1 prefix to search for</label> <input type="text" name="sha1" id="sha1" /> </p>
+ <p> <input type="submit" value="Query" />
+ </form>
+ </div>
+</div>
+
+</td><td>
+
+<div class="box" id="stats">
+ <div class="info"><h2>Largest repositories:</h2></div>
+ <div id="largest">
+ <ol>
+<% @largest_repos.each do |stat| %>
+ <li> <b><%=h stat.url %></b><br />with <b><%=h stat.count %></b> git objects </li>
+<% end %>
+ </ol>
+ </div>
+</div>
+
+</td></tr></table>
+
+<br />
View
8 views/layout.erb
@@ -12,6 +12,14 @@
Welcome to <a href="/">anygit</a>, indexing the world's git repositories.
</div>
+<% if @flash %>
+ <div class="flash">
+ <ul id="flash-messages">
+ <li><%=h @flash %></li>
+ </ul>
+ </div>
+<% end %>
+
<div class="body">
<%= yield %>
</div>
Please sign in to comment.
Something went wrong with that request. Please try again.