Browse files

Initial commit & license

  • Loading branch information...
0 parents commit ce034680669258fb3acb368c61dd4b88efa8b212 @floere committed Aug 16, 2010
Showing with 10,707 additions and 0 deletions.
  1. +8 −0 .gitignore
  2. +7 −0 README.textile
  3. +165 −0 client/LICENSE
  4. +3 −0 client/README.textile
  5. +55 −0 client/Rakefile
  6. +10 −0 client/lib/search-engine.rb
  7. +37 −0 client/lib/search/convenience.rb
  8. +96 −0 client/lib/search/engine.rb
  9. +25 −0 client/lib/search/serializer.rb
  10. +113 −0 client/spec/search/convenience_spec.rb
  11. +184 −0 client/spec/search/engine_spec.rb
  12. +39 −0 client/spec/search/serializer_spec.rb
  13. +7 −0 client/spec/spec.opts
  14. +9 −0 client/spec/spec_helper.rb
  15. +2 −0 server/.bundle/config
  16. +14 −0 server/Gemfile
  17. +165 −0 server/LICENSE
  18. +13 −0 server/README.textile
  19. +50 −0 server/Rakefile
  20. +31 −0 server/TODO.textile
  21. +32 −0 server/doc/process.textile
  22. +10 −0 server/lib/bundling.rb
  23. +9 −0 server/lib/constants.rb
  24. +212 −0 server/lib/deployment.rb
  25. +6 −0 server/lib/picky-tasks.rb
  26. +22 −0 server/lib/picky.rb
  27. +26 −0 server/lib/picky/application.rb
  28. +1 −0 server/lib/picky/cacher/README
  29. +17 −0 server/lib/picky/cacher/generator.rb
  30. +7 −0 server/lib/picky/cacher/partial/default.rb
  31. +19 −0 server/lib/picky/cacher/partial/none.rb
  32. +7 −0 server/lib/picky/cacher/partial/strategy.rb
  33. +91 −0 server/lib/picky/cacher/partial/subtoken.rb
  34. +15 −0 server/lib/picky/cacher/partial_generator.rb
  35. +7 −0 server/lib/picky/cacher/similarity/default.rb
  36. +73 −0 server/lib/picky/cacher/similarity/double_levenshtone.rb
  37. +25 −0 server/lib/picky/cacher/similarity/none.rb
  38. +7 −0 server/lib/picky/cacher/similarity/strategy.rb
  39. +15 −0 server/lib/picky/cacher/similarity_generator.rb
  40. +7 −0 server/lib/picky/cacher/weights/default.rb
  41. +39 −0 server/lib/picky/cacher/weights/logarithmic.rb
  42. +7 −0 server/lib/picky/cacher/weights/strategy.rb
  43. +15 −0 server/lib/picky/cacher/weights_generator.rb
  44. +13 −0 server/lib/picky/configuration/configuration.rb
  45. +68 −0 server/lib/picky/configuration/field.rb
  46. +41 −0 server/lib/picky/configuration/indexes.rb
  47. +66 −0 server/lib/picky/configuration/type.rb
  48. +101 −0 server/lib/picky/cores.rb
  49. +14 −0 server/lib/picky/database/source.rb
  50. +187 −0 server/lib/picky/ext/ruby19/Makefile
  51. +7 −0 server/lib/picky/ext/ruby19/extconf.rb
  52. +20 −0 server/lib/picky/ext/ruby19/mkmf.log
  53. BIN server/lib/picky/ext/ruby19/performant.bundle
  54. +339 −0 server/lib/picky/ext/ruby19/performant.c
  55. BIN server/lib/picky/ext/ruby19/performant.o
  56. BIN server/lib/picky/ext/ruby19/unicode.bundle
  57. BIN server/lib/picky/ext/ruby19/url_escape.bundle
  58. +45 −0 server/lib/picky/extensions/array.rb
  59. +11 −0 server/lib/picky/extensions/hash.rb
  60. +15 −0 server/lib/picky/extensions/module.rb
  61. +18 −0 server/lib/picky/extensions/symbol.rb
  62. +23 −0 server/lib/picky/helpers/cache.rb
  63. +11 −0 server/lib/picky/helpers/gc.rb
  64. +45 −0 server/lib/picky/helpers/measuring.rb
  65. +27 −0 server/lib/picky/helpers/search.rb
  66. +328 −0 server/lib/picky/index/bundle.rb
  67. +109 −0 server/lib/picky/index/category.rb
  68. +38 −0 server/lib/picky/index/combined.rb
  69. +37 −0 server/lib/picky/index/type.rb
  70. +108 −0 server/lib/picky/indexers/base.rb
  71. +3 −0 server/lib/picky/indexers/default.rb
  72. +19 −0 server/lib/picky/indexers/field.rb
  73. +59 −0 server/lib/picky/indexers/solr.rb
  74. +164 −0 server/lib/picky/indexes.rb
  75. +4 −0 server/lib/picky/initializers/ext.rb
  76. +22 −0 server/lib/picky/initializers/mysql.rb
  77. +268 −0 server/lib/picky/loader.rb
  78. +19 −0 server/lib/picky/loggers/search.rb
  79. +23 −0 server/lib/picky/performant/array.rb
  80. +82 −0 server/lib/picky/query/allocation.rb
  81. +129 −0 server/lib/picky/query/allocations.rb
  82. +123 −0 server/lib/picky/query/base.rb
  83. +69 −0 server/lib/picky/query/combination.rb
  84. +106 −0 server/lib/picky/query/combinations.rb
  85. +92 −0 server/lib/picky/query/combinator.rb
  86. +15 −0 server/lib/picky/query/full.rb
  87. +56 −0 server/lib/picky/query/heuristics.rb
  88. +22 −0 server/lib/picky/query/live.rb
  89. +73 −0 server/lib/picky/query/qualifiers.rb
  90. +77 −0 server/lib/picky/query/solr.rb
  91. +215 −0 server/lib/picky/query/token.rb
  92. +98 −0 server/lib/picky/query/tokens.rb
  93. +159 −0 server/lib/picky/query/weigher.rb
  94. +37 −0 server/lib/picky/rack/harakiri.rb
  95. +103 −0 server/lib/picky/results/base.rb
  96. +19 −0 server/lib/picky/results/full.rb
  97. +19 −0 server/lib/picky/results/live.rb
  98. +156 −0 server/lib/picky/routing.rb
  99. +11 −0 server/lib/picky/signals.rb
  100. +1 −0 server/lib/picky/solr/pids/sunspot-solr.pid
  101. +73 −0 server/lib/picky/solr/schema_generator.rb
  102. +130 −0 server/lib/picky/tokenizers/base.rb
  103. +3 −0 server/lib/picky/tokenizers/default.rb
  104. +73 −0 server/lib/picky/tokenizers/index.rb
  105. +70 −0 server/lib/picky/tokenizers/query.rb
  106. +21 −0 server/lib/picky/umlaut_substituter.rb
  107. +5 −0 server/lib/tasks/application.rake
  108. +53 −0 server/lib/tasks/cache.rake
  109. +4 −0 server/lib/tasks/framework.rake
  110. +31 −0 server/lib/tasks/index.rake
  111. +45 −0 server/lib/tasks/server.rake
  112. +13 −0 server/lib/tasks/shortcuts.rake
  113. +36 −0 server/lib/tasks/solr.rake
  114. +11 −0 server/lib/tasks/spec.rake
  115. +13 −0 server/lib/tasks/statistics.rake
  116. +1 −0 server/prototype_project/README.textile
  117. +64 −0 server/spec/ext/performant_spec.rb
  118. +71 −0 server/spec/lib/application_spec.rb
  119. +89 −0 server/spec/lib/cacher/partial/subtoken_spec.rb
  120. +35 −0 server/spec/lib/cacher/partial_generator_spec.rb
  121. +60 −0 server/spec/lib/cacher/similarity/double_levenshtone_spec.rb
  122. +23 −0 server/spec/lib/cacher/similarity/none_spec.rb
  123. +22 −0 server/spec/lib/cacher/similarity_generator_spec.rb
  124. +30 −0 server/spec/lib/cacher/weights/logarithmic_spec.rb
  125. +21 −0 server/spec/lib/cacher/weights_generator_spec.rb
  126. +38 −0 server/spec/lib/configuration/configuration_spec.rb
  127. +55 −0 server/spec/lib/configuration/type_spec.rb
  128. +8 −0 server/spec/lib/configuration_spec.rb
  129. +65 −0 server/spec/lib/cores_spec.rb
  130. +37 −0 server/spec/lib/extensions/array_spec.rb
  131. +11 −0 server/spec/lib/extensions/hash_spec.rb
  132. +27 −0 server/spec/lib/extensions/module_spec.rb
  133. +85 −0 server/spec/lib/extensions/symbol_spec.rb
  134. +35 −0 server/spec/lib/helpers/cache_spec.rb
  135. +71 −0 server/spec/lib/helpers/gc_spec.rb
  136. +18 −0 server/spec/lib/helpers/measuring_spec.rb
  137. +50 −0 server/spec/lib/helpers/search_spec.rb
  138. +47 −0 server/spec/lib/index/bundle_partial_generation_speed_spec.rb
  139. +260 −0 server/spec/lib/index/bundle_spec.rb
  140. +203 −0 server/spec/lib/index/category_spec.rb
  141. +130 −0 server/spec/lib/indexers/base_spec.rb
  142. +26 −0 server/spec/lib/indexers/field_spec.rb
  143. +48 −0 server/spec/lib/loader_spec.rb
  144. +19 −0 server/spec/lib/loggers/search_spec.rb
  145. +13 −0 server/spec/lib/performant/array_spec.rb
  146. +194 −0 server/spec/lib/query/allocation_spec.rb
  147. +336 −0 server/spec/lib/query/allocations_spec.rb
  148. +104 −0 server/spec/lib/query/base_spec.rb
  149. +90 −0 server/spec/lib/query/combination_spec.rb
  150. +83 −0 server/spec/lib/query/combinations_spec.rb
  151. +112 −0 server/spec/lib/query/combinator_spec.rb
  152. +22 −0 server/spec/lib/query/full_spec.rb
  153. +47 −0 server/spec/lib/query/heuristics_spec.rb
  154. +61 −0 server/spec/lib/query/live_spec.rb
  155. +31 −0 server/spec/lib/query/qualifiers_spec.rb
  156. +51 −0 server/spec/lib/query/solr_spec.rb
  157. +297 −0 server/spec/lib/query/token_spec.rb
  158. +189 −0 server/spec/lib/query/tokens_spec.rb
  159. +233 −0 server/spec/lib/results/base_spec.rb
  160. +285 −0 server/spec/lib/routing_spec.rb
  161. +42 −0 server/spec/lib/solr/schema_generator_spec.rb
  162. +61 −0 server/spec/lib/tokenizers/base_spec.rb
  163. +51 −0 server/spec/lib/tokenizers/index_spec.rb
  164. +105 −0 server/spec/lib/tokenizers/query_spec.rb
  165. +84 −0 server/spec/lib/umlaut_substituter_spec.rb
  166. +7 −0 server/spec/spec.opts
  167. +15 −0 server/spec/spec_helper.rb
  168. +55 −0 server/spec/specific/speed_spec.rb
  169. +16 −0 server/test_project/Gemfile
  170. +2 −0 server/test_project/Rakefile
  171. +77 −0 server/test_project/app/application.rb
  172. +71 −0 server/test_project/app/config.rb
  173. BIN server/test_project/cache/index/test/isbn/full_isbn_index.dump
  174. BIN server/test_project/cache/index/test/isbn/full_isbn_similarity.dump
  175. BIN server/test_project/cache/index/test/isbn/full_isbn_weights.dump
  176. BIN server/test_project/cache/index/test/isbn/partial_isbn_index.dump
  177. BIN server/test_project/cache/index/test/isbn/partial_isbn_similarity.dump
  178. BIN server/test_project/cache/index/test/isbn/partial_isbn_weights.dump
  179. BIN server/test_project/cache/index/test/main/full_author_index.dump
  180. BIN server/test_project/cache/index/test/main/full_author_similarity.dump
  181. BIN server/test_project/cache/index/test/main/full_author_weights.dump
  182. BIN server/test_project/cache/index/test/main/full_blurb_index.dump
  183. BIN server/test_project/cache/index/test/main/full_blurb_similarity.dump
  184. BIN server/test_project/cache/index/test/main/full_blurb_weights.dump
  185. BIN server/test_project/cache/index/test/main/full_title_index.dump
  186. BIN server/test_project/cache/index/test/main/full_title_similarity.dump
  187. BIN server/test_project/cache/index/test/main/full_title_weights.dump
  188. BIN server/test_project/cache/index/test/main/full_year_index.dump
  189. BIN server/test_project/cache/index/test/main/full_year_similarity.dump
  190. BIN server/test_project/cache/index/test/main/full_year_weights.dump
  191. BIN server/test_project/cache/index/test/main/partial_author_index.dump
  192. BIN server/test_project/cache/index/test/main/partial_author_similarity.dump
  193. BIN server/test_project/cache/index/test/main/partial_author_weights.dump
  194. BIN server/test_project/cache/index/test/main/partial_blurb_index.dump
  195. BIN server/test_project/cache/index/test/main/partial_blurb_similarity.dump
  196. BIN server/test_project/cache/index/test/main/partial_blurb_weights.dump
  197. BIN server/test_project/cache/index/test/main/partial_title_index.dump
  198. BIN server/test_project/cache/index/test/main/partial_title_similarity.dump
  199. BIN server/test_project/cache/index/test/main/partial_title_weights.dump
  200. BIN server/test_project/cache/index/test/main/partial_year_index.dump
  201. BIN server/test_project/cache/index/test/main/partial_year_similarity.dump
  202. BIN server/test_project/cache/index/test/main/partial_year_weights.dump
  203. +1 −0 server/test_project/config/README
  204. +6 −0 server/test_project/config/db/base.yml
  205. +6 −0 server/test_project/config/db/indexes.yml
  206. +6 −0 server/test_project/config/db/origin.yml
  207. +1 −0 server/test_project/config/logging.rb
  208. +1 −0 server/test_project/data/books.csv
  209. +14 −0 server/test_project/data/generate_test_db.sql
  210. BIN server/test_project/data/test.db
  211. +48 −0 server/test_project/spec/app/application_spec.rb
  212. +35 −0 server/test_project/spec/integration_spec.rb
  213. +9 −0 server/test_project/spec/spec_helper.rb
  214. BIN server/test_project/test
8 .gitignore
@@ -0,0 +1,8 @@
+.DS_Store
+pkg/
+server/lib/picky/ext/ruby19/Makefile
+server/lib/picky/ext/ruby19/mkmf.log
+server/lib/picky/ext/ruby19/performant.bundle
+
+server/.yardoc
+server/doc
7 README.textile
@@ -0,0 +1,7 @@
+Note: In development. Expect release version in September 2010.
+
+h1. Picky
+
+h2. The combinatorial small-text search engine.
+
+The search enine is split into a server and a client which is to be used in e.g. Rails.
165 client/LICENSE
@@ -0,0 +1,165 @@
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+ This version of the GNU Lesser General Public License incorporates
+the terms and conditions of version 3 of the GNU General Public
+License, supplemented by the additional permissions listed below.
+
+ 0. Additional Definitions.
+
+ As used herein, "this License" refers to version 3 of the GNU Lesser
+General Public License, and the "GNU GPL" refers to version 3 of the GNU
+General Public License.
+
+ "The Library" refers to a covered work governed by this License,
+other than an Application or a Combined Work as defined below.
+
+ An "Application" is any work that makes use of an interface provided
+by the Library, but which is not otherwise based on the Library.
+Defining a subclass of a class defined by the Library is deemed a mode
+of using an interface provided by the Library.
+
+ A "Combined Work" is a work produced by combining or linking an
+Application with the Library. The particular version of the Library
+with which the Combined Work was made is also called the "Linked
+Version".
+
+ The "Minimal Corresponding Source" for a Combined Work means the
+Corresponding Source for the Combined Work, excluding any source code
+for portions of the Combined Work that, considered in isolation, are
+based on the Application, and not on the Linked Version.
+
+ The "Corresponding Application Code" for a Combined Work means the
+object code and/or source code for the Application, including any data
+and utility programs needed for reproducing the Combined Work from the
+Application, but excluding the System Libraries of the Combined Work.
+
+ 1. Exception to Section 3 of the GNU GPL.
+
+ You may convey a covered work under sections 3 and 4 of this License
+without being bound by section 3 of the GNU GPL.
+
+ 2. Conveying Modified Versions.
+
+ If you modify a copy of the Library, and, in your modifications, a
+facility refers to a function or data to be supplied by an Application
+that uses the facility (other than as an argument passed when the
+facility is invoked), then you may convey a copy of the modified
+version:
+
+ a) under this License, provided that you make a good faith effort to
+ ensure that, in the event an Application does not supply the
+ function or data, the facility still operates, and performs
+ whatever part of its purpose remains meaningful, or
+
+ b) under the GNU GPL, with none of the additional permissions of
+ this License applicable to that copy.
+
+ 3. Object Code Incorporating Material from Library Header Files.
+
+ The object code form of an Application may incorporate material from
+a header file that is part of the Library. You may convey such object
+code under terms of your choice, provided that, if the incorporated
+material is not limited to numerical parameters, data structure
+layouts and accessors, or small macros, inline functions and templates
+(ten or fewer lines in length), you do both of the following:
+
+ a) Give prominent notice with each copy of the object code that the
+ Library is used in it and that the Library and its use are
+ covered by this License.
+
+ b) Accompany the object code with a copy of the GNU GPL and this license
+ document.
+
+ 4. Combined Works.
+
+ You may convey a Combined Work under terms of your choice that,
+taken together, effectively do not restrict modification of the
+portions of the Library contained in the Combined Work and reverse
+engineering for debugging such modifications, if you also do each of
+the following:
+
+ a) Give prominent notice with each copy of the Combined Work that
+ the Library is used in it and that the Library and its use are
+ covered by this License.
+
+ b) Accompany the Combined Work with a copy of the GNU GPL and this license
+ document.
+
+ c) For a Combined Work that displays copyright notices during
+ execution, include the copyright notice for the Library among
+ these notices, as well as a reference directing the user to the
+ copies of the GNU GPL and this license document.
+
+ d) Do one of the following:
+
+ 0) Convey the Minimal Corresponding Source under the terms of this
+ License, and the Corresponding Application Code in a form
+ suitable for, and under terms that permit, the user to
+ recombine or relink the Application with a modified version of
+ the Linked Version to produce a modified Combined Work, in the
+ manner specified by section 6 of the GNU GPL for conveying
+ Corresponding Source.
+
+ 1) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (a) uses at run time
+ a copy of the Library already present on the user's computer
+ system, and (b) will operate properly with a modified version
+ of the Library that is interface-compatible with the Linked
+ Version.
+
+ e) Provide Installation Information, but only if you would otherwise
+ be required to provide such information under section 6 of the
+ GNU GPL, and only to the extent that such information is
+ necessary to install and execute a modified version of the
+ Combined Work produced by recombining or relinking the
+ Application with a modified version of the Linked Version. (If
+ you use option 4d0, the Installation Information must accompany
+ the Minimal Corresponding Source and Corresponding Application
+ Code. If you use option 4d1, you must provide the Installation
+ Information in the manner specified by section 6 of the GNU GPL
+ for conveying Corresponding Source.)
+
+ 5. Combined Libraries.
+
+ You may place library facilities that are a work based on the
+Library side by side in a single library together with other library
+facilities that are not Applications and are not covered by this
+License, and convey such a combined library under terms of your
+choice, if you do both of the following:
+
+ a) Accompany the combined library with a copy of the same work based
+ on the Library, uncombined with any other library facilities,
+ conveyed under the terms of this License.
+
+ b) Give prominent notice with the combined library that part of it
+ is a work based on the Library, and explaining where to find the
+ accompanying uncombined form of the same work.
+
+ 6. Revised Versions of the GNU Lesser General Public License.
+
+ The Free Software Foundation may publish revised and/or new versions
+of the GNU Lesser General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Library as you received it specifies that a certain numbered version
+of the GNU Lesser General Public License "or any later version"
+applies to it, you have the option of following the terms and
+conditions either of that published version or of any later version
+published by the Free Software Foundation. If the Library as you
+received it does not specify a version number of the GNU Lesser
+General Public License, you may choose any version of the GNU Lesser
+General Public License ever published by the Free Software Foundation.
+
+ If the Library as you received it specifies that a proxy can decide
+whether future versions of the GNU Lesser General Public License shall
+apply, that proxy's public statement of acceptance of any version is
+permanent authorization for you to choose that version for the
+Library.
3 client/README.textile
@@ -0,0 +1,3 @@
+h1. Search engine client.
+
+# TODO Usage etc.
55 client/Rakefile
@@ -0,0 +1,55 @@
+require 'rake'
+require 'rake/gempackagetask'
+
+task :default => :spec
+
+# Gemming
+#
+desc "Create the picky search engine client"
+spec = Gem::Specification.new do |s|
+ s.name = 'picky-client'
+ s.version = '0.0.0'
+ s.author = 'Florian Hanke'
+ s.email = 'florian.hanke+picky-client@gmail.com'
+ s.homepage = 'http://floere.github.com/picky'
+ s.platform = Gem::Platform::RUBY
+ s.summary = 'picky Search Engine Client'
+ s.files = Dir["lib/**/*.rb"]
+ s.test_files = Dir["spec/**/*_spec.rb"]
+ s.has_rdoc = false
+ s.extra_rdoc_files = ['README.textile']
+end
+
+Rake::GemPackageTask.new(spec) do |pkg|
+ pkg.need_tar = true
+end
+
+require 'spec'
+require 'spec/rake/spectask'
+
+spec_root = File.join(File.dirname(__FILE__), 'spec')
+
+desc "Run all specs in spec directory (excluding plugin specs)"
+Spec::Rake::SpecTask.new(:spec) do |t|
+ t.spec_opts = ['--options', "\"#{File.join(spec_root, 'spec.opts')}\""]
+ t.spec_files = FileList[File.join(spec_root, '**', '*_spec.rb')]
+end
+namespace :spec do
+
+ desc "Run all specs in spec directory with RCov (excluding plugin specs)"
+ Spec::Rake::SpecTask.new(:rcov) do |t|
+ t.spec_opts = ['--options', "\"spec/spec.opts\""]
+ t.spec_files = FileList['spec/**/*_spec.rb']
+ t.rcov = true
+ t.rcov_opts = lambda do
+ IO.readlines("#{SEARCH_ROOT}/spec/rcov.opts").map {|l| l.chomp.split " "}.flatten
+ end
+ end
+
+ desc "Print Specdoc for all specs (excluding plugin specs)"
+ Spec::Rake::SpecTask.new(:doc) do |t|
+ t.spec_opts = ["--format", "specdoc", "--dry-run"]
+ t.spec_files = FileList['spec/**/*_spec.rb']
+ end
+
+end
10 client/lib/search-engine.rb
@@ -0,0 +1,10 @@
+$KCODE = 'UTF-8' unless RUBY_VERSION > '1.8.7'
+
+require 'rubygems'
+
+require 'active_support'
+
+this = File.dirname __FILE__
+require File.join(this, '/search/engine')
+require File.join(this, '/search/serializer')
+require File.join(this, '/search/convenience')
37 client/lib/search/convenience.rb
@@ -0,0 +1,37 @@
+module Search
+ # Use this class to extend the hash the serializer returns.
+ #
+ module Convenience
+
+ # Are there any allocations?
+ #
+ def empty?
+ allocations.empty?
+ end
+ # Returns the topmost limit results.
+ #
+ def ids limit = 20
+ ids = []
+ allocations.each { |allocation| allocation[4].each { |id| break if ids.size > limit; ids << id } }
+ ids
+ end
+ # Removes the ids from each allocation.
+ #
+ def clear_ids
+ allocations.each { |allocation| allocation[4].clear }
+ end
+
+ # Caching readers.
+ #
+ def allocations
+ @allocations || @allocations = self[:allocations]
+ end
+ def allocations_size
+ @allocations_size || @allocations_size = allocations.size
+ end
+ def total
+ @total || @total = self[:total]
+ end
+
+ end
+end
96 client/lib/search/engine.rb
@@ -0,0 +1,96 @@
+require 'net/http'
+
+module Search
+ # Frontend for the search client.
+ #
+ # Configure a search by passing the options in the initializer:
+ # * host
+ # * port
+ # * path
+ #
+ # TODO Rewrite such that instead of an http request we connect through tcp.
+ # Or use EventMachine.
+ #
+ module Engine
+
+ class Base
+
+ attr_accessor :host, :port, :path
+
+ def initialize options = {}
+ options = default_configuration.merge options
+
+ @host = options[:host]
+ @port = options[:port]
+ @path = options[:path]
+ end
+ def default_configuration
+ {}
+ end
+ def self.default_configuration options = {}
+ define_method :default_configuration do
+ options
+ end
+ end
+ def default_params
+ {}
+ end
+ def self.default_params options = {}
+ options.stringify_keys! if options.respond_to?(:stringify_keys!)
+ define_method :default_params do
+ options
+ end
+ end
+
+ # Merges the given params, overriding the defaults.
+ #
+ def defaultize params = {}
+ default_params.merge params
+ end
+
+ # Searches the index. Use this method.
+ #
+ # Returns a hash. Extend with Convenience.
+ #
+ def search params = {}
+ return {} if params[:query].blank?
+
+ send_search params
+ end
+
+ # Sends a search to the configured address.
+ #
+ def send_search params = {}
+ params = defaultize params
+ Net::HTTP.get self.host, "#{self.path}?#{params.to_query}", self.port
+ end
+
+ end
+
+ class Full < Base
+ default_configuration :host => 'localhost', :port => 4000, :path => '/searches/full'
+
+ # Full needs to deserialize the results.
+ #
+ def send_search params = {}
+ Serializer.deserialize super(params)
+ end
+
+ end
+
+ class Live < Base
+ default_configuration :host => 'localhost', :port => 4000, :path => '/searches/live'
+ end
+
+ end
+end
+
+# Extend hash with to_query method.
+#
+class Hash
+ def to_query namespace = nil
+ collect do |key, value|
+ value.to_query(namespace ? "#{namespace}[#{key}]" : key)
+ end.sort * '&'
+ end
+end
25 client/lib/search/serializer.rb
@@ -0,0 +1,25 @@
+module Search
+
+ # This class handles serialization and deserialization.
+ #
+ class Serializer
+
+ # Serialize the Results.
+ #
+ # Note: This code is executed on the search engine side.
+ #
+ def self.serialize serializable_results
+ Marshal.dump serializable_results.serialize
+ end
+
+ # Create new search results from serialized ones.
+ #
+ # Note: This code is executed on the client side.
+ #
+ def self.deserialize serialized_results
+ Marshal.load serialized_results
+ end
+
+ end
+
+end
113 client/spec/search/convenience_spec.rb
@@ -0,0 +1,113 @@
+require File.dirname(__FILE__) + '/../spec_helper'
+
+describe Search::Convenience do
+
+ before(:each) do
+ @convenience = {
+ :allocations => [[nil, nil, nil, nil, [1,2,3,4,5,6,7,8]],
+ [nil, nil, nil, nil, [9,10,11,12,13,14,15,16]],
+ [nil, nil, nil, nil, [17,18,19,20,21,22,23]]],
+ :offset => 123,
+ :total => 12345,
+ :duration => 0.12345
+ }.extend Search::Convenience
+ end
+
+ # describe 'replace_ids_with' do
+ # before(:each) do
+ # @results = Search::Results.new [
+ # [nil, nil, nil, [1,2,3,4,5,6,7,8]],
+ # [nil, nil, nil, [9,10,11,12,13,14,15,16]],
+ # [nil, nil, nil, [17,18,19,20,21,22,23]]
+ # ], [], nil, 123, true, true, 0.123, 1234
+ # end
+ # it 'should replace the ids' do
+ # new_ids = (11..31).to_a # +10
+ # @results.replace_ids_with new_ids
+ # @results.ids.should == (11..31).to_a
+ # end
+ # end
+
+ describe 'clear_ids' do
+ it 'should clear all ids' do
+ @convenience.clear_ids
+
+ @convenience.ids.should == []
+ end
+ end
+
+ describe 'ids' do
+ it 'should return the top default ids' do
+ @convenience.ids.should == (1..21).to_a
+ end
+ it 'should return the top limit entries' do
+ @convenience.ids(7).should == (1..8).to_a
+ end
+ end
+
+ describe 'allocations_size' do
+ it 'should just add up the allocations of both types' do
+ @convenience.allocations_size.should == 3
+ end
+ end
+
+ # describe 'render?' do
+ # context 'no ids' do
+ # before(:each) do
+ # @convenience.stub! :empty? => true
+ # end
+ # it 'should not render' do
+ # @convenience.render?.should == false
+ # end
+ # end
+ # context 'less results than the treshold' do
+ # before(:each) do
+ # @convenience.stub! :empty? => false
+ # @convenience.stub! :total => 7
+ # end
+ # it 'should render' do
+ # @convenience.render?.should == true
+ # end
+ # end
+ # context 'too many, but just in one allocation' do
+ # before(:each) do
+ # @convenience.stub! :empty? => false
+ # @convenience.stub! :total => 100
+ # @convenience.stub! :allocations_size => 1
+ # end
+ # it 'should render' do
+ # @convenience.render?.should == true
+ # end
+ # end
+ # context 'too many' do
+ # before(:each) do
+ # @convenience.stub! :empty? => false
+ # @convenience.stub! :total => 100
+ # @convenience.stub! :allocations_size => 2
+ # end
+ # it 'should not render' do
+ # @convenience.render?.should == false
+ # end
+ # end
+ # end
+
+ describe 'empty?' do
+ context 'allocations empty' do
+ before(:each) do
+ @convenience.stub! :allocations => stub(:allocations, :empty? => true)
+ end
+ it 'should be true' do
+ @convenience.empty?.should == true
+ end
+ end
+ context 'allocations not empty' do
+ before(:each) do
+ @convenience.stub! :allocations => stub(:allocations, :empty? => false)
+ end
+ it 'should be false' do
+ @convenience.empty?.should == false
+ end
+ end
+ end
+
+end
184 client/spec/search/engine_spec.rb
@@ -0,0 +1,184 @@
+require File.dirname(__FILE__) + '/../spec_helper'
+
+describe Search::Engine do
+
+ describe 'defaultize' do
+ context 'no default params' do
+ before(:each) do
+ @base = Search::Engine::Base.new
+ end
+ it 'should return unchanged' do
+ @base.defaultize( :a => :b ).should == { :a => :b }
+ end
+ end
+ context 'default params' do
+ before(:each) do
+ Search::Engine::Base.default_params 'c' => 'd'
+ @base = Search::Engine::Base.new
+ end
+ after(:each) do
+ Search::Engine::Base.default_params
+ end
+ it 'should return changed' do
+ @base.defaultize( 'a' => 'b' ).should == { 'a' => 'b', 'c' => 'd' }
+ end
+ it 'should override the default' do
+ @base.defaultize( 'c' => 'b' ).should == { 'c' => 'b' }
+ end
+ end
+ end
+
+ describe 'Base' do
+ before(:each) do
+ @base = Search::Engine::Base.new
+ end
+ it 'should have a default_configuration method' do
+ lambda { @base.default_configuration }.should_not raise_error
+ end
+ it 'should return an empty configuration hash' do
+ @base.default_configuration.should == {}
+ end
+ it 'should have a default_params method' do
+ lambda { @base.default_params }.should_not raise_error
+ end
+ it 'should return an empty params hash' do
+ @base.default_params.should == {}
+ end
+ end
+
+ describe "Full" do
+ before(:each) do
+ @full = Search::Engine::Full.new
+ end
+ describe "defaults" do
+ it "should set host to 'localhost'" do
+ @full.host.should == 'localhost'
+ end
+ it "should set port to 4000" do
+ @full.port.should == 4000
+ end
+ it "should set path to '/searches/full'" do
+ @full.path.should == '/searches/full'
+ end
+ end
+
+ describe "cattr_accessors" do
+ before(:each) do
+ @full = Search::Engine::Full.new :host => :some_host, :port => :some_port, :path => :some_path
+ end
+ it "should have a writer for the host" do
+ @full.host = :some_host
+ @full.host.should == :some_host
+ end
+ it "should have a writer for the port" do
+ @full.port = :some_port
+ @full.port.should == :some_port
+ end
+ it "should have a writer for the path" do
+ @full.path = :some_path
+ @full.path.should == :some_path
+ end
+ it "should have a reader for the host" do
+ lambda { @full.host }.should_not raise_error
+ end
+ it "should have a reader for the port" do
+ lambda { @full.port }.should_not raise_error
+ end
+ it "should have a reader for the path" do
+ lambda { @full.path }.should_not raise_error
+ end
+ end
+
+ describe "search" do
+ describe "with nil as search term" do
+ before(:each) do
+ @query = nil
+ end
+ it "should return a Search::Results for bla" do
+ @full.search(:query => @query).should be_kind_of(Hash)
+ end
+ it "should return an empty Search::Results" do
+ @full.search(:query => @query).should be_empty
+ end
+ end
+ describe "with '' as search term" do
+ before(:each) do
+ @query = ''
+ end
+ it "should return a Search::Results" do
+ @full.search(:query => @query).should be_kind_of(Hash)
+ end
+ it "should return an empty Search::Results" do
+ @full.search(:query => @query).should be_empty
+ end
+ end
+ end
+ end
+
+ describe "Live" do
+ before(:each) do
+ @live = Search::Engine::Live.new
+ end
+ describe "defaults" do
+ it "should set host to 'localhost'" do
+ @live.host.should == 'localhost'
+ end
+ it "should set port to 4000" do
+ @live.port.should == 4000
+ end
+ it "should set path to '/searches/live'" do
+ @live.path.should == '/searches/live'
+ end
+ end
+
+ describe "cattr_accessors" do
+ it "should have a writer for the host" do
+ @live.host = :some_host
+ @live.host.should == :some_host
+ end
+ it "should have a writer for the port" do
+ @live.port = :some_port
+ @live.port.should == :some_port
+ end
+ it "should have a writer for the path" do
+ @live.path = :some_path
+ @live.path.should == :some_path
+ end
+ it "should have a reader for the host" do
+ lambda { @live.host }.should_not raise_error
+ end
+ it "should have a reader for the port" do
+ lambda { @live.port }.should_not raise_error
+ end
+ it "should have a reader for the path" do
+ lambda { @live.path }.should_not raise_error
+ end
+ end
+
+ describe "search" do
+ describe "with nil as search term" do
+ before(:each) do
+ @query = nil
+ end
+ it "should return a Search::Results" do
+ @live.search(:query => @query).should be_kind_of(Hash)
+ end
+ it "should return an empty Search::Results" do
+ @live.search(:query => @query).should be_empty
+ end
+ end
+ describe "with '' as search term" do
+ before(:each) do
+ @query = ''
+ end
+ it "should return a Search::Results" do
+ @live.search(:query => @query).should be_kind_of(Hash)
+ end
+ it "should return an empty Search::Results" do
+ @live.search(:query => @query).should be_empty
+ end
+ end
+ end
+ end
+
+end
39 client/spec/search/serializer_spec.rb
@@ -0,0 +1,39 @@
+require File.dirname(__FILE__) + '/../spec_helper'
+
+describe Search::Serializer do
+
+ describe "serialize-deserialize" do
+ it "should serialize and deserialize certain values" do
+ results = stub :results
+ results.stub! :serialize => {}
+
+ deserialized = Search::Serializer.deserialize Search::Serializer.serialize(results)
+
+ deserialized.should == {}
+ end
+ end
+
+ describe "serialize" do
+ it "should serialize" do
+ results = stub :results, :serialize => {
+ :allocations => [[nil, nil, nil, [1,2,3,4,5,6,7,8]],
+ [nil, nil, nil, [9,10,11,12,13,14,15,16]],
+ [nil, nil, nil, [17,18,19,20,21,22,23]]],
+ :offset => 123,
+ :total => 12345,
+ :duration => 0.12345
+ }
+
+ Search::Serializer.serialize(results).should == "\x04\b{\t:\x10allocations[\b[\t000[\ri\x06i\ai\bi\ti\ni\vi\fi\r[\t000[\ri\x0Ei\x0Fi\x10i\x11i\x12i\x13i\x14i\x15[\t000[\fi\x16i\x17i\x18i\x19i\x1Ai\ei\x1C:\voffseti\x01{:\ntotali\x0290:\rdurationf\x0F0.12345\x00\xF2|"
+ end
+ end
+
+ describe "deserialize" do
+ it "should deserialize" do
+ results = Search::Serializer.deserialize "\x04\b{\t:\x10allocations[\b[\t000[\ri\x06i\ai\bi\ti\ni\vi\fi\r[\t000[\ri\x0Ei\x0Fi\x10i\x11i\x12i\x13i\x14i\x15[\t000[\fi\x16i\x17i\x18i\x19i\x1Ai\ei\x1C:\voffseti\x01{:\ntotali\x0290:\rdurationf\x0F0.12345\x00\xF2|"
+
+ results.should be_kind_of(Hash)
+ end
+ end
+
+end
7 client/spec/spec.opts
@@ -0,0 +1,7 @@
+--colour
+--format
+progress
+--loadby
+mtime
+--reverse
+--backtrace
9 client/spec/spec_helper.rb
@@ -0,0 +1,9 @@
+# This file is copied to ~/spec when you run 'ruby script/generate rspec'
+# from the project root directory.
+ENV['SEARCH_ENV'] = 'test'
+require File.expand_path(File.dirname(__FILE__) + "/../lib/search-engine")
+require 'spec'
+
+def in_the(object, &block)
+ object.instance_eval &block
+end
2 server/.bundle/config
@@ -0,0 +1,2 @@
+---
+BUNDLE_WITHOUT: ""
14 server/Gemfile
@@ -0,0 +1,14 @@
+source :gemcutter
+
+# Gems required by Picky.
+#
+gem 'bundler', '>=0.9.26'
+gem 'activesupport', '2.3.8', :require => 'active_support'
+gem 'activerecord', '2.3.8', :require => 'active_record'
+gem 'rack', '1.2.1'
+gem 'rack-mount', '0.6.9'
+gem 'rsolr', '0.12.1'
+gem 'sunspot', '1.1.0'
+gem 'text', '0.2.0'
+gem 'rack_fast_escape', '2009.06.24'
+gem 'rspec'
165 server/LICENSE
@@ -0,0 +1,165 @@
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+ This version of the GNU Lesser General Public License incorporates
+the terms and conditions of version 3 of the GNU General Public
+License, supplemented by the additional permissions listed below.
+
+ 0. Additional Definitions.
+
+ As used herein, "this License" refers to version 3 of the GNU Lesser
+General Public License, and the "GNU GPL" refers to version 3 of the GNU
+General Public License.
+
+ "The Library" refers to a covered work governed by this License,
+other than an Application or a Combined Work as defined below.
+
+ An "Application" is any work that makes use of an interface provided
+by the Library, but which is not otherwise based on the Library.
+Defining a subclass of a class defined by the Library is deemed a mode
+of using an interface provided by the Library.
+
+ A "Combined Work" is a work produced by combining or linking an
+Application with the Library. The particular version of the Library
+with which the Combined Work was made is also called the "Linked
+Version".
+
+ The "Minimal Corresponding Source" for a Combined Work means the
+Corresponding Source for the Combined Work, excluding any source code
+for portions of the Combined Work that, considered in isolation, are
+based on the Application, and not on the Linked Version.
+
+ The "Corresponding Application Code" for a Combined Work means the
+object code and/or source code for the Application, including any data
+and utility programs needed for reproducing the Combined Work from the
+Application, but excluding the System Libraries of the Combined Work.
+
+ 1. Exception to Section 3 of the GNU GPL.
+
+ You may convey a covered work under sections 3 and 4 of this License
+without being bound by section 3 of the GNU GPL.
+
+ 2. Conveying Modified Versions.
+
+ If you modify a copy of the Library, and, in your modifications, a
+facility refers to a function or data to be supplied by an Application
+that uses the facility (other than as an argument passed when the
+facility is invoked), then you may convey a copy of the modified
+version:
+
+ a) under this License, provided that you make a good faith effort to
+ ensure that, in the event an Application does not supply the
+ function or data, the facility still operates, and performs
+ whatever part of its purpose remains meaningful, or
+
+ b) under the GNU GPL, with none of the additional permissions of
+ this License applicable to that copy.
+
+ 3. Object Code Incorporating Material from Library Header Files.
+
+ The object code form of an Application may incorporate material from
+a header file that is part of the Library. You may convey such object
+code under terms of your choice, provided that, if the incorporated
+material is not limited to numerical parameters, data structure
+layouts and accessors, or small macros, inline functions and templates
+(ten or fewer lines in length), you do both of the following:
+
+ a) Give prominent notice with each copy of the object code that the
+ Library is used in it and that the Library and its use are
+ covered by this License.
+
+ b) Accompany the object code with a copy of the GNU GPL and this license
+ document.
+
+ 4. Combined Works.
+
+ You may convey a Combined Work under terms of your choice that,
+taken together, effectively do not restrict modification of the
+portions of the Library contained in the Combined Work and reverse
+engineering for debugging such modifications, if you also do each of
+the following:
+
+ a) Give prominent notice with each copy of the Combined Work that
+ the Library is used in it and that the Library and its use are
+ covered by this License.
+
+ b) Accompany the Combined Work with a copy of the GNU GPL and this license
+ document.
+
+ c) For a Combined Work that displays copyright notices during
+ execution, include the copyright notice for the Library among
+ these notices, as well as a reference directing the user to the
+ copies of the GNU GPL and this license document.
+
+ d) Do one of the following:
+
+ 0) Convey the Minimal Corresponding Source under the terms of this
+ License, and the Corresponding Application Code in a form
+ suitable for, and under terms that permit, the user to
+ recombine or relink the Application with a modified version of
+ the Linked Version to produce a modified Combined Work, in the
+ manner specified by section 6 of the GNU GPL for conveying
+ Corresponding Source.
+
+ 1) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (a) uses at run time
+ a copy of the Library already present on the user's computer
+ system, and (b) will operate properly with a modified version
+ of the Library that is interface-compatible with the Linked
+ Version.
+
+ e) Provide Installation Information, but only if you would otherwise
+ be required to provide such information under section 6 of the
+ GNU GPL, and only to the extent that such information is
+ necessary to install and execute a modified version of the
+ Combined Work produced by recombining or relinking the
+ Application with a modified version of the Linked Version. (If
+ you use option 4d0, the Installation Information must accompany
+ the Minimal Corresponding Source and Corresponding Application
+ Code. If you use option 4d1, you must provide the Installation
+ Information in the manner specified by section 6 of the GNU GPL
+ for conveying Corresponding Source.)
+
+ 5. Combined Libraries.
+
+ You may place library facilities that are a work based on the
+Library side by side in a single library together with other library
+facilities that are not Applications and are not covered by this
+License, and convey such a combined library under terms of your
+choice, if you do both of the following:
+
+ a) Accompany the combined library with a copy of the same work based
+ on the Library, uncombined with any other library facilities,
+ conveyed under the terms of this License.
+
+ b) Give prominent notice with the combined library that part of it
+ is a work based on the Library, and explaining where to find the
+ accompanying uncombined form of the same work.
+
+ 6. Revised Versions of the GNU Lesser General Public License.
+
+ The Free Software Foundation may publish revised and/or new versions
+of the GNU Lesser General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Library as you received it specifies that a certain numbered version
+of the GNU Lesser General Public License "or any later version"
+applies to it, you have the option of following the terms and
+conditions either of that published version or of any later version
+published by the Free Software Foundation. If the Library as you
+received it does not specify a version number of the GNU Lesser
+General Public License, you may choose any version of the GNU Lesser
+General Public License ever published by the Free Software Foundation.
+
+ If the Library as you received it specifies that a proxy can decide
+whether future versions of the GNU Lesser General Public License shall
+apply, that proxy's public statement of acceptance of any version is
+permanent authorization for you to choose that version for the
+Library.
13 server/README.textile
@@ -0,0 +1,13 @@
+h1. Picky
+
+h2. Introduction
+
+"If a Unicorn kills itself, there is no blood. Only rainbows."
+
+h2. What it is
+
+ * Classifying Small Text Search Engine (TODO Explain)
+
+h2. What it is not
+
+ * Full Text Search Engine
50 server/Rakefile
@@ -0,0 +1,50 @@
+require 'rake'
+require 'rake/gempackagetask'
+
+task :default => :spec
+
+# Gem.
+#
+desc "Create the picky search engine client"
+spec = Gem::Specification.new do |s|
+ s.name = 'picky'
+ s.version = '0.0.0'
+ s.author = 'Florian Hanke'
+ s.email = 'florian.hanke+picky@gmail.com'
+ s.homepage = 'http://floere.github.com/picky'
+ s.description = 'Fast Combinatorial Ruby Search Engine'
+ s.platform = Gem::Platform::RUBY
+ s.summary = 'Picky the Search Engine'
+ s.files = [] # Dir["lib/**/*.rb"]
+ s.test_files = [] # Dir["spec/**/*_spec.rb"]
+ s.has_rdoc = false
+ s.extra_rdoc_files = ['README.textile']
+
+ s.add_dependency 'bundler', '>=0.9.26'
+ s.add_dependency 'activesupport', '2.3.8'
+ s.add_dependency 'activerecord', '2.3.8'
+ s.add_dependency 'rack', '1.2.1'
+ s.add_dependency 'rack-mount', '0.6.9'
+ s.add_dependency 'rsolr' '0.12.1'
+ s.add_dependency 'sunspot', '1.1.0'
+ s.add_dependency 'text', '0.2.0'
+ s.add_dependency 'rack_fast_escape', '2009.06.24'
+
+ s.add_development_dependency 'rspec'
+end
+Rake::GemPackageTask.new(spec) do |pkg|
+ pkg.need_tar = true
+end
+
+# Specs.
+#
+require 'spec'
+require 'spec/rake/spectask'
+
+spec_root = File.join(File.dirname(__FILE__), 'spec')
+
+desc "Run all specs in spec directory (excluding plugin specs)"
+Spec::Rake::SpecTask.new(:spec) do |t|
+ t.spec_opts = ['--options', "\"#{File.join(spec_root, 'spec.opts')}\""]
+ t.spec_files = FileList[File.join(spec_root, '**', '*_spec.rb')]
+end
31 server/TODO.textile
@@ -0,0 +1,31 @@
+h1. TODOs
+
+ * FIXMEs already in the code.
+ * TODOs already in the code.
+ * Covering a very simple case.
+ * Controller Mini-FW.
+ * Documentation.
+ * Website.
+ * Covering a more complex case based on possible feedback.
+ * 15 minute blog search video. (tongue in cheek)
+
+ * Write a tokenizer introspector with which you can see how the tokenizing configs influence a query or indexing measure.
+ * Live interface and config changes.
+
+ * Use strategies (in Indexers) to allow SQL only indexing.
+ * Use strategies to allow Non-SQL sources.
+ * Put app specific code in /app
+ * Put shared app specific code in /shared?
+ * create gemifying stuff
+ * directory structure
+
+ * Doc: Recommended for use up to 100 Mio. Datapoints.
+ * Use Ruby 1.9.3
+ * Option: trap_signals # default true
+ * Rake can generate directories and files
+ * Not for Windows
+ * picky bin
+ * picky project <name> # => Generates project.
+ * deploy server start needs :sequential => true option
+
+ * Should partial bundle weights always be 0? (They might be too high, as they have many more ids)
32 server/doc/process.textile
@@ -0,0 +1,32 @@
+h1. Processes
+
+h2. Startup Overview
+
+(In the rackup file application.ru)
+
+# Framework Inclusion: Require the gem.
+# Application Loading: load the application
+# Index Loading: Indexes are loaded from their respective cache files "some_index/*_index.dump" into Indexes[:some_index].
+# Rack Application: Run the Rack App.
+
+h2. Startup Details
+
+h3. Framework Inclusion
+
+Require the gem. This will load the framework.
+
+h3. Application Loading
+
+h3. Index Loading
+
+h3. Rack Application
+
+
+h2. Query Overview
+
+h2. Query Details
+
+
+h2. Indexing Overview
+
+h2. Indexing Details
10 server/lib/bundling.rb
@@ -0,0 +1,10 @@
+# TODO Remove?
+#
+begin
+ require 'bundler'
+rescue LoadError => e
+ require 'rubygems'
+ retry
+end
+Bundler.setup SEARCH_ENVIRONMENT
+Bundler.require
9 server/lib/constants.rb
@@ -0,0 +1,9 @@
+# Set constants.
+#
+
+# Use rack's environment for the search engine.
+#
+ENV['SEARCH_ENV'] ||= ENV['RACK_ENV']
+
+SEARCH_ENVIRONMENT = ENV['SEARCH_ENV'] || 'development' unless defined? SEARCH_ENVIRONMENT
+SEARCH_ROOT = Dir.pwd unless defined? SEARCH_ROOT
212 server/lib/deployment.rb
@@ -0,0 +1,212 @@
+require File.expand_path(File.join(File.dirname(__FILE__), 'constants'))
+
+module Picky
+ module Capistrano
+
+ # Include all
+ #
+ module All
+ def self.extended cap_config
+
+ cap_config.instance_eval do
+
+ # Executes a rake task on the server.
+ #
+ # Options:
+ # * env: The SEARCH_ENV. Will not set if set explicitly to false. Default: production.
+ # * All other options get passed on to the Capistrano run task.
+ #
+ def execute_rake_task name, options = {}, &block
+ env = options.delete :env
+ env = env == false ? '' : "SEARCH_ENV=#{env || 'production'}"
+ run "cd #{current_path}; rake #{name} #{env}", options, &block
+ end
+
+ end
+
+ cap_config.extend Standard
+ cap_config.extend Deploy
+ cap_config.extend Caching
+ cap_config.extend Overrides
+
+ end
+ end
+
+ # Removes unneeded Rails defaults.
+ #
+ module Overrides
+ def self.extended cap_config
+ cap_config.instance_eval do
+
+ namespace :deploy do
+ tasks.delete :check
+ tasks.delete :cold
+ tasks.delete :migrations
+ tasks.delete :migrate
+ tasks.delete :upload
+
+ namespace :web do
+ tasks.delete :enable
+ tasks.delete :disable
+ end
+ end
+
+ end
+ end
+ end
+
+ module Standard
+ def self.extended cap_config
+ cap_config.load 'standard'
+ cap_config.load 'deploy'
+ end
+ end
+
+ module Deploy
+
+ def self.extended cap_config
+ cap_config.instance_eval do
+
+ namespace :deploy do
+ %w(start stop).each do |action|
+ desc "#{action} the Servers"
+ task action.to_sym, :roles => :app do
+ execute_rake_task "server:#{action}"
+ end
+ end
+ desc "Restart the Servers sequentially"
+ task :restart, :roles => :app do
+ find_servers(:roles => :app).each do |server|
+ execute_rake_task "server:restart", :hosts => server.host
+ end
+ end
+
+ desc 'Hot deploy the code'
+ task 'hot', :roles => :app do
+ update
+ execute_rake_task 'server:usr1', :env => false # No env needed.
+ end
+
+ desc "Setup a GitHub-style deployment."
+ task :setup, :roles => :app do
+ cmd = "git clone #{repository} #{current_path}-clone-cache &&" +
+ "rm #{current_path} &&" +
+ "mv #{current_path}-clone-cache #{current_path}"
+ run cmd
+ end
+
+ desc "Deploy"
+ task :default, :roles => :app do
+ update
+ restart
+ end
+
+ desc "Update the deployed code."
+ task :update_code do # code needs to be updated with all servers
+ puts "updating code to branch #{branch}"
+ cmd = "cd #{current_path} &&" +
+ "git fetch origin &&" +
+ "(git checkout -f #{branch} || git checkout -b #{branch} origin/#{branch}) &&" +
+ "git pull;" +
+ "git branch"
+ run cmd
+ symlink
+ end
+
+ desc "Cleans up the git checkout"
+ task :cleanup, :roles => :app do
+ run "cd #{current_path} && git gc --aggressive"
+ end
+
+ desc "create the symlinks to the shared dirs"
+ task :symlink do
+ set :user, 'deploy'
+ run "rm -rf #{current_path}/log; ln -sf #{shared_path}/log #{current_path}/log"
+ run "rm -rf #{current_path}/index; ln -sf #{shared_path}/index #{current_path}/index"
+ # link database-config files
+ run "ln -sf #{shared_path}/config/base.yml #{current_path}/config/db/base.yml"
+ run "ln -sf #{shared_path}/config/source.yml #{current_path}/config/db/source.yml"
+ # link unicorn.ru
+ run "ln -sf #{shared_path}/config/unicorn.ru #{current_path}/config/unicorn.ru"
+ end
+
+ namespace :rollback do
+ desc "Rollback to last release."
+ task :default, :roles => :app do
+ set :branch, branches[-2]
+ puts "rolling back to branch #{branch}"
+ deploy.update_code
+ end
+
+ task :code, :roles => :app do
+ # implicit
+ end
+ end
+ end
+
+ end
+ end
+
+ end
+
+ module Caching
+
+ def self.extended cap_config
+ cap_config.instance_eval do
+ namespace :cache do
+ desc "check the index files if they are ready to be used"
+ task :check, :roles => :cache do
+ execute_rake_task 'cache:check'
+ end
+ end
+ namespace :cache do
+ namespace :structure do
+ desc "create the index cache structure"
+ task :create, :roles => :app do
+ execute_rake_task 'cache:structure:create'
+ end
+ end
+ end
+ namespace :solr do
+ desc "create the index cache structure"
+ task :index, :roles => :cache do
+ execute_rake_task 'solr:index'
+ end
+ %w|start stop restart|.collect(&:to_sym).each do |action|
+ desc "#{action} the solr server"
+ task action, :roles => :app do
+ execute_rake_task 'solr:start'
+ end
+ end
+ end
+ end
+ end
+
+ end
+
+ module Statistics
+
+ def self.extended cap_config
+ namespace :statistics do
+ desc 'Start the statistics server'
+ task :start, :roles => :statistics do
+ set :user, 'root'
+ run "daemonize -c #{current_path} -u deploy -v #{current_path}/script/statistics/start production"
+ end
+ desc 'Stop the statistics server'
+ task :stop, :roles => :statistics do
+ run "#{current_path}/script/statistics/stop production"
+ end
+ desc 'Restart the statistics server'
+ task :restart, :roles => :statistics do
+ stop
+ sleep 2
+ start
+ end
+ end
+ end
+
+ end
+
+ end
+end
6 server/lib/picky-tasks.rb
@@ -0,0 +1,6 @@
+all_rake_files = File.join File.dirname(__FILE__), 'tasks', '**', '*.rake'
+
+Dir[all_rake_files].each do |rakefile|
+ next if rakefile =~ /spec\.rake$/
+ load rakefile
+end
22 server/lib/picky.rb
@@ -0,0 +1,22 @@
+# Add dirname to load path. TODO Necessary?
+#
+$:.unshift File.dirname(__FILE__)
+
+# Require the constants.
+#
+# TODO Move to app?
+#
+require 'constants'
+
+# Library bundling.
+#
+require 'bundling'
+
+# Loader which handles framework and app loading.
+#
+require 'picky/loader'
+
+# Load the framework
+#
+Loader.load_framework
+puts "Loaded picky with environment '#{SEARCH_ENVIRONMENT}' in #{SEARCH_ROOT} on Ruby #{RUBY_VERSION}."
26 server/lib/picky/application.rb
@@ -0,0 +1,26 @@
+class Application
+
+ # Sets the defaults and delegates routing to
+ # the routes.
+ #
+ def self.routing default_options = nil, &block
+ routes.defaults default_options if default_options
+ routes.define_using &block
+ end
+
+ # An application simply delegates to the route set to handle a request.
+ #
+ def self.call env
+ routes.call env
+ end
+
+ #
+ #
+ def self.routes
+ @routes || reset_routes
+ end
+ def self.reset_routes
+ @routes = Routing.new
+ end
+
+end
1 server/lib/picky/cacher/README
@@ -0,0 +1 @@
+I could probably use a single generator with pluggable strategies.
17 server/lib/picky/cacher/generator.rb
@@ -0,0 +1,17 @@
+module Cacher
+
+ # A cache generator holds an index type.
+ #
+ # TODO Rename to index_type.
+ #
+ class Generator
+
+ attr_reader :index
+
+ def initialize index
+ @index = index
+ end
+
+ end
+
+end
7 server/lib/picky/cacher/partial/default.rb
@@ -0,0 +1,7 @@
+module Cacher
+ module Partial
+ # Default is Subtoken, down to 1.
+ #
+ Default = Subtoken.new :down_to => 1
+ end
+end
19 server/lib/picky/cacher/partial/none.rb
@@ -0,0 +1,19 @@
+module Cacher
+
+ module Partial
+
+ # Does not generate a partial index.
+ #
+ class None < Strategy
+
+ # Returns an empty index.
+ #
+ def generate_from index
+ {}
+ end
+
+ end
+
+ end
+
+end
7 server/lib/picky/cacher/partial/strategy.rb
@@ -0,0 +1,7 @@
+module Cacher
+ module Partial
+ # Superclass for partial strategies.
+ #
+ class Strategy; end
+ end
+end
91 server/lib/picky/cacher/partial/subtoken.rb
@@ -0,0 +1,91 @@
+module Cacher
+
+ module Partial
+
+ # The subtoken partial strategy.
+ #
+ # If given
+ # "florian"
+ # will index
+ # "floria"
+ # "flori"
+ # "flor"
+ # "flo"
+ # "fl"
+ # "f"
+ # Depending on what the given down_to value is. (Example with down_to == 1)
+ #
+ class Subtoken < Strategy
+
+ attr_reader :down_to, :starting_at
+
+ # Down to is how far it will go down in generating the subtokens.
+ #
+ # Examples:
+ # With :hello, and starting_at 0
+ # * down to == 1: [:hello, :hell, :hel, :he, :h]
+ # * down to == 4: [:hello, :hell]
+ #
+ # With :hello, and starting_at -1
+ # * down to == 1: [:hell, :hel, :he, :h]
+ # * down to == 4: [:hell]
+ #
+ def initialize options = {}
+ @down_to = options[:down_to] || 1
+ starting_at = options[:starting_at] || 0
+ @starting_at = starting_at.zero? ? 0 : starting_at - 1
+ end
+
+ # Generates a partial index from the given index.
+ #
+ def generate_from index
+ result = {}
+
+ # Generate for each key token the subtokens.
+ #
+ i = 5000
+ index.each_key do |token|
+ i -= 1
+ if i == 0
+ puts "#{Time.now}: Generating partial tokens for token #{token}. This appears every 5000 tokens."
+ i = 5000
+ end
+ generate_for token, index, result
+ end
+
+ # Remove duplicate ids.
+ #
+ # TODO If it is unique for a subtoken, it is
+ # unique for all derived longer tokens.
+ #
+ result.each_value &:uniq! # Removed because of the set combination operation below
+
+ result
+ end
+
+ private
+
+ # To each shortened token of :test
+ # :test, :tes, :te, :t
+ # add all ids of :test
+ #
+ # "token" here means just text.
+ #
+ # TODO Could be improved by appending the aforegoing ids?
+ #
+ def generate_for token, index, result
+ clipped_token = starting_at.zero? ? token : token[0..starting_at].to_sym
+ clipped_token.subtokens(down_to).each do |subtoken|
+ if result[subtoken]
+ result[subtoken] += index[token] # unique
+ else
+ result[subtoken] = index[token].dup
+ end
+ end
+ end
+
+ end
+
+ end
+
+end
15 server/lib/picky/cacher/partial_generator.rb
@@ -0,0 +1,15 @@
+module Cacher
+
+ # The partial generator uses a subtoken(downto:1) generator as default.
+ #
+ class PartialGenerator < Generator
+
+ # Generate a similarity index based on the given index.
+ #
+ def generate strategy = Partial::Subtoken.new(:down_to => 1)
+ strategy.generate_from self.index
+ end
+
+ end
+
+end
7 server/lib/picky/cacher/similarity/default.rb
@@ -0,0 +1,7 @@
+module Cacher
+ module Similarity
+ # Default is no similarity.
+ #
+ Default = None.new
+ end
+end
73 server/lib/picky/cacher/similarity/double_levenshtone.rb
@@ -0,0 +1,73 @@
+# encoding: utf-8
+#
+module Cacher
+
+ module Similarity
+
+ # DoubleLevensthone means that it's a combination of
+ # * DoubleMetaphone
+ # and
+ # * Levenshtein
+ # :)
+ #
+ class DoubleLevenshtone < Strategy
+
+ attr_reader :amount
+
+ #
+ #
+ def initialize amount = 10
+ @amount = amount
+ end
+
+ # Encodes the given symbol.
+ #
+ # Returns a symbol.
+ #
+ def encoded sym
+ codes = Text::Metaphone.double_metaphone sym.to_s
+ codes.first.to_sym unless codes.empty?
+ end
+
+ # Generates an index for the given index (in full index style).
+ #
+ # In the following form:
+ # [:meier, :mueller, :peter, :pater] => { :MR => [:meier], :MLR => [:mueller], :PTR => [:peter, :pater] }
+ #
+ def generate_from index
+ hash = hashify index.keys
+ sort hash
+ end
+
+ private
+
+ # Sorts the index values in place.
+ #
+ def sort index
+ index.each_pair.each do |code, ary|
+ ary.sort_by_levenshtein! code
+ ary.slice! amount, ary.size # size is not perfectly correct, but anyway
+ end
+ index
+ end
+
+ # Hashifies a list of symbols.
+ #
+ # Where:
+ # { encoded_sym => [syms] }
+ #
+ def hashify list
+ list.inject({}) do |total, element|
+ if code = encoded(element)
+ total[code] ||= []
+ total[code] << element
+ end
+ total
+ end
+ end
+
+ end
+
+ end
+
+end
25 server/lib/picky/cacher/similarity/none.rb
@@ -0,0 +1,25 @@
+module Cacher
+
+ module Similarity
+
+ # Similarity strategy that does nothing.
+ #
+ class None < Strategy
+
+ # Does not encode text. Just returns nil.
+ #
+ def encoded text
+ nil
+ end
+
+ # Returns an empty index.
+ #
+ def generate_from index
+ {}
+ end
+
+ end
+
+ end
+
+end
7 server/lib/picky/cacher/similarity/strategy.rb
@@ -0,0 +1,7 @@
+module Cacher
+ module Similarity
+ # Base class for all similarity strategies.
+ #
+ class Strategy; end
+ end
+end
15 server/lib/picky/cacher/similarity_generator.rb
@@ -0,0 +1,15 @@
+module Cacher
+
+ # Uses no similarity as default.
+ #
+ class SimilarityGenerator < Generator
+
+ # Generate a similarity index based on the given index.
+ #
+ def generate strategy = Similarity::None.new
+ strategy.generate_from self.index
+ end
+
+ end
+
+end
7 server/lib/picky/cacher/weights/default.rb
@@ -0,0 +1,7 @@
+module Cacher
+ module Weights
+ # Default is Logarithmic.
+ #
+ Default = Logarithmic.new
+ end
+end
39 server/lib/picky/cacher/weights/logarithmic.rb
@@ -0,0 +1,39 @@
+module Cacher
+
+ module Weights
+
+ # Uses a logarithmic weight.
+ # If for a key k we have x ids, the weight is:
+ # w(x): log(x)
+ # Special case: If x < 1, then we use 0.
+ #
+ class Logarithmic < Strategy
+
+ # Generates a partial index from the given index.
+ #
+ def generate_from index
+ index.inject({}) do |hash, text_ids|
+ text, ids = *text_ids
+ weight = weight_for ids.size
+ hash[text] ||= weight.round(2) if weight
+ hash
+ end
+ end
+
+ # Sets the weight value.
+ #
+ # If the size is 0 or one, we would get -Infinity or 0.0.
+ # Thus we do not set a value if there is just one. The default, dynamically, is 0.
+ #
+ # BUT: We need the value, even if 0. To designate that there is a weight!
+ #
+ def weight_for amount
+ return 0 if amount < 1
+ Math.log amount
+ end
+
+ end
+
+ end
+
+end
7 server/lib/picky/cacher/weights/strategy.rb
@@ -0,0 +1,7 @@
+module Cacher
+ module Weights
+ # Superclass for weighing strategies.
+ #
+ class Strategy; end
+ end
+end
15 server/lib/picky/cacher/weights_generator.rb
@@ -0,0 +1,15 @@
+module Cacher
+
+ # Uses a logarithmic algorithm as default.
+ #
+ class WeightsGenerator < Generator
+
+ # Generate a weights index based on the given index.
+ #
+ def generate strategy = Weights::Logarithmic.new
+ strategy.generate_from self.index
+ end
+
+ end
+
+end
13 server/lib/picky/configuration/configuration.rb
@@ -0,0 +1,13 @@
+module Configuration
+
+ def self.indexes *types
+ Indexes.new(*types).save
+ end
+ def self.type name, *fields
+ Type.new name, *fields
+ end
+ def self.field name, options = {}
+ Field.new name, options
+ end
+
+end
68 server/lib/picky/configuration/field.rb
@@ -0,0 +1,68 @@
+module Configuration
+
+ class Field
+ attr_reader :name, :indexed_field, :virtual
+ attr_accessor :type # convenience
+ def initialize name, options = {}
+ @name = name
+
+ # TODO Dup the options?
+
+ @indexer_class = options.delete(:indexer) || Indexers::Default
+ @tokenizer_class = options.delete(:tokenizer) || Tokenizers::Index # Default
+
+ @indexed_field = options.delete(:indexed_field) || name # TODO Rename to indexed_as?
+ @virtual = options.delete(:virtual) || false
+
+ # Note: Moved to Bundle.
+ #
+ # @weights = options[:weights] || Cacher::Weights::Default
+ # @partial = options[:partial] || Cacher::Partial::Default
+ # @similarity = options[:similarity] || Cacher::Similarity::Default
+
+ # TODO Replace by add.
+ #
+ # Query::Qualifiers.instance << Query::Qualifier.new(name, options.delete(:qualifiers)) if options[:qualifiers]
+ Query::Qualifiers.add(name, options[:qualifiers]) if options[:qualifiers]
+
+ # @remove = options[:remove] || false
+ # @filter = options[:filter] || true
+
+ @options = options
+ end
+ def generate
+ Index::Category.new self.name, type, @options
+ end
+ # TODO Duplicate code in bundle. Move to application.
+ #
+ # TODO Move to type, and use in bundle from there.
+ #
+ def search_index_root
+ File.join SEARCH_ROOT, 'index'
+ end
+ # TODO Move to config. Duplicate Code in field.rb.
+ #
+ def cache_directory
+ File.join search_index_root, SEARCH_ENVIRONMENT, type.name.to_s
+ end
+ def search_index_file_name
+ File.join cache_directory, "#{type.name}_#{name}_index.txt"
+ end
+ def index
+ indexer.index
+ end
+ def cache
+ generate.generate_caches
+ end
+ def indexer
+ @indexer || @indexer = @indexer_class.new(indexed_field, type, self)
+ end
+ def tokenizer
+ @tokenizer || @tokenizer = @tokenizer_class.new # TODO Make instances.
+ end
+ def virtual?
+ !!virtual
+ end
+ end
+
+end
41 server/lib/picky/configuration/indexes.rb
@@ -0,0 +1,41 @@
+module Configuration
+
+ class Indexes
+
+ attr_reader :types
+