Permalink
Browse files

first commit

  • Loading branch information...
Brian Rogoff Brian Rogoff
Brian Rogoff authored and Brian Rogoff committed Sep 16, 2011
0 parents commit 3fe68b8dfadff73065dbd1be2f1a47d968183497
Showing with 1,654 additions and 0 deletions.
  1. +1 −0 .#kdtree.ml
  2. BIN .omakedb
  3. +1 −0 .omakedb.lock
  4. +1 −0 .~lock.msa.csv#
  5. +93 −0 OMakefile
  6. +45 −0 OMakeroot
  7. +41 −0 README.md
  8. +156 −0 kd_tree.ml
  9. +23 −0 multidim.ml
  10. +112 −0 repl.ml
  11. +85 −0 spaces.ml
  12. +96 −0 two_d.ml
  13. +1,000 −0 usa_cities.csv
BIN .omakedb
Binary file not shown.
@@ -0,0 +1 @@
+*** omake: the project was last locked by Brian-Rogoffs-iMac.local:873.
@@ -0,0 +1 @@
+Brian Rogoff,bpr,bpr-laptop,26.07.2011 12:39,file:///home/bpr/.libreoffice/3;
@@ -0,0 +1,93 @@
+########################################################################
+# Phony targets are scoped, so you probably want to declare them first.
+#
+
+.PHONY: all install clean
+
+########################################################################
+# OCaml configuration.
+# Delete this section if you are not building OCaml files.
+#
+
+################################################
+# Configuration. You may want to modify any of these configuration
+# variables.
+#
+
+#
+# This project requires ocamlfind (default - false).
+#
+USE_OCAMLFIND = true
+#
+OCAMLPACKS[] =
+# pack1
+# pack2
+#
+if $(not $(OCAMLFIND_EXISTS))
+ eprintln(This project requires ocamlfind, but is was not found.)
+ eprintln(You need to install ocamlfind and run "omake --configure".)
+ exit 1
+
+#
+# Include path
+#
+OCAMLINCLUDES +=.
+
+#
+# Compile native or byte code?
+#
+# The default values are defined as follows:
+#
+NATIVE_ENABLED = $(OCAMLOPT_EXISTS)
+# BYTE_ENABLED = $(not $(OCAMLOPT_EXISTS))
+
+#
+# Various options
+#
+OCAMLFLAGS +=
+OCAMLCFLAGS +=
+OCAMLOPTFLAGS +=
+OCAML_LINK_FLAGS +=
+OCAML_BYTE_LINK_FLAGS +=
+OCAML_NATIVE_LINK_FLAGS +=
+
+################################################
+# Generated files
+#
+# Workaround for the fact that ocamldep does not pay attention to .mll
+# and .mly files.
+#
+# OCamlGeneratedFiles(parser.ml lexer.ml)
+
+################################################
+# Build an OCaml library
+#
+
+# FILES[] =
+# file1
+# file2
+#
+# LIB = main
+#
+# .DEFAULT: $(OCamlLibrary $(LIB), $(FILES))
+
+################################################
+# Build an OCaml program
+#
+
+FILES[] = multidim two_d kd_tree repl
+# file1
+# file2
+#
+PROGRAM = repl
+# OCAML_LIBS +=
+# OCAML_CLIBS +=
+OCAML_OTHER_LIBS += str
+# OCAML_LIB_FLAGS +=
+#
+.DEFAULT: $(OCamlProgram $(PROGRAM), $(FILES))
+
+.PHONY: clean
+
+clean:
+ $(RM) *~ *.cm* *.o *.annot $(PROGRAM) $(PROGRAM).opt $(PROGRAM).run OMake*.omc
@@ -0,0 +1,45 @@
+########################################################################
+# Permission is hereby granted, free of charge, to any person
+# obtaining a copy of this file, to deal in the File without
+# restriction, including without limitation the rights to use,
+# copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the File, and to permit persons to whom the
+# File is furnished to do so, subject to the following condition:
+#
+# THE FILE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+# DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE FILE OR
+# THE USE OR OTHER DEALINGS IN THE FILE.
+
+########################################################################
+# The standard OMakeroot file.
+# You will not normally need to modify this file.
+# By default, your changes should be placed in the
+# OMakefile in this directory.
+#
+# If you decide to modify this file, note that it uses exactly
+# the same syntax as the OMakefile.
+#
+
+#
+# Include the standard installed configuration files.
+# Any of these can be deleted if you are not using them,
+# but you probably want to keep the Common file.
+#
+open build/C
+open build/OCaml
+open build/LaTeX
+
+#
+# The command-line variables are defined *after* the
+# standard configuration has been loaded.
+#
+DefineCommandVars()
+
+#
+# Include the OMakefile in this directory.
+#
+.SUBDIRS: .
@@ -0,0 +1,41 @@
+# KD Trees
+
+## Introduction
+
+From http://en.wikipedia.org/wiki/K-d_tree :
+
+*The k-d tree is a binary tree in which every node is a k-dimensional
+ point. Every non-leaf node can be thought of as implicitly generating
+ a splitting hyperplane that divides the space into two parts, known
+ as subspaces. Points to the left of this hyperplane represent the
+ left sub-tree of that node and points right of the hyperplane are
+ represented by the right sub-tree. The hyperplane direction is chosen
+ in the following way: every node in the tree is associated with one
+ of the k-dimensions, with the hyperplane perpendicular to that
+ dimension's axis. So, for example, if for a particular split the "x"
+ axis is chosen, all points in the subtree with a smaller "x" value
+ than the node will appear in the left subtree and all points with
+ larger "x" value will be in the right sub tree. In such a case, the
+ hyperplane would be set by the x-value of the point, and its normal
+ would be the unit x-axis.[1]*
+
+## Algorithm
+
+The algorithm was inspired by the Wikipedia entry and by Overmars computational
+geometry book.
+
+
+Implementation: multidim.ml spaces.ml kd_tree.ml
+
+Usage:
+
+* Compile with omake.
+* Run './repl -input_file usa_cities.csv'
+* In the repl, enter lat and long separated by comma, e.g.
+
+ enter (lat,long) >>> -121.46,38.52
+ {name=Parkway-South Sacramento; population=40797; state=California; latlong=(-121.45,38.51)}
+
+Todo:
+* Fix range search
+* Implement algorithm in Scala, Clojure, and Haskell
@@ -0,0 +1,156 @@
+(* Purely functional, functorized kd-tree algorithm from Overmars (2008).
+
+ Note the use of a special comparison function to achieve the effect of having
+ all of the points in general position.
+
+ TODO: range query, versions in other languages (Scala, Clojure, Haskell)
+*)
+module type S =
+ sig
+ type real
+ type point
+ type range
+ type elt
+ (* Make t private and not abstract for debugging and fiddling at the top level *)
+ type t = private Leaf of elt | Branch of int * point * range * t * t
+ val split: int -> elt list -> (elt * range * elt list * elt list)
+ val make: elt list -> t
+
+ (* kd tree functionality *)
+
+ val nearest_neighbors: t -> point -> elt list * float
+ val range_search: t -> range -> elt list
+
+ end;;
+
+(* A static KD tree *)
+
+module Make(M: Multidim.S) :
+ (S with type elt = M.elt
+ with type real = M.real
+ with type point = M.point
+ with type range = M.range) =
+ struct
+ type elt = M.elt
+ type real = M.real
+ type range = M.range
+ type point = M.point
+ type t = Leaf of elt | Branch of int * point * range * t * t
+
+ let iota n =
+ let rec loop curr accum =
+ if curr <= 0 then accum else loop (pred curr) (curr::accum) in
+ loop n []
+
+ let list_split_at l n =
+ if n < 0 then invalid_arg "Kd_tree.list_split_at" else
+ let rec loop l n accum =
+ match l with
+ | [] -> failwith "Kd_tree.list_split_at"
+ | x::xs -> if n = 0 then (List.rev accum, l) else loop xs (n-1) (x::accum)
+ in loop l n []
+
+ let split (depth : int) (elts : elt list) : (elt * range * elt list * elt list) =
+ let axis = depth mod M.dim in
+ match elts with
+ [] -> invalid_arg "split: empty list"
+ | [elt] -> invalid_arg "split: singleton list"
+ | _ ->
+ let cmpf e0 e1 = M.axial_compare axis (M.to_point e0) (M.to_point e1) in
+ let len = List.length elts in
+ let range = List.fold_left M.range_maker M.null_range elts in
+ let sorted = List.sort cmpf elts in
+ let (lt,gte) = list_split_at sorted (len/2) in
+ (List.hd gte, range, lt, gte)
+(*
+ let median = List.nth sorted (len/2) in
+ let (lt,gte) = List.partition (fun elt-> cmpf elt median < 0) sorted in
+ (median, range, lt, gte)
+*)
+ let make elts =
+ let rec mk depth es =
+ match es with
+ [] ->
+ let err_msg = Printf.sprintf "Kd_tree.make: num_elts=%d, depth=%d, null list " (List.length elts) depth in
+ invalid_arg err_msg
+ | [elt] -> Leaf elt
+ | _ ->
+ let depth' = depth + 1 in
+ let (median, range, lt,gte) = split depth es in
+ Branch(depth mod M.dim, M.to_point median, range, mk depth' lt, mk depth' gte)
+ in
+ mk 0 elts
+
+ let nearest_neighbors root q =
+ let update elt ((curr_closest, curr_distance) as curr) =
+ let p = M.to_point elt in
+ let d_2 = M.squared_distance q p in
+ if d_2 < curr_distance then
+ ([elt],d_2)
+ else if d_2 > curr_distance then
+ curr
+ else (* d_2 = curr_distance, so merge *)
+ (elt::curr_closest,d_2)
+ in
+ let rec descend node path ((curr_elts, curr_distance) as curr) =
+ match node,path with
+ | Leaf elt,_ -> ascend path (update elt curr)
+ | Branch(axis, median, _, lt, gte),_ ->
+ let n = if M.axial_compare axis q median >= 0 then gte else lt in
+ descend n (node::path) curr
+ and ascend path curr =
+ match path with
+ [] -> curr
+ | node::nodes -> ascend nodes (check_other_side node [] curr)
+ and check_other_side node path ((_, curr_distance) as curr) =
+ match node with
+ | Branch(axis, median, _, lt, gte)
+ when M.squared_axial_distance axis q median <= curr_distance ->
+ if M.axial_compare axis q median >= 0 then
+ descend lt path curr
+ else
+ descend gte path curr
+ | _ -> curr
+ in
+ descend root [] ([], Pervasives.max_float)
+
+ let range_search t r =
+ let rec search curr accum =
+ match curr with
+ Leaf elt ->
+ if M.point_in_range r (M.to_point elt) then elt::accum else accum
+ | Branch(axis, median, range, lt, gte) ->
+ let range_intersection = M.intersect_ranges r range in
+ if range_intersection <> M.null_range then
+ let accum' = search lt accum in
+ search gte accum'
+ else
+ accum
+ in
+ search t []
+
+ end ;;
+(*
+let elts0 = [(2.,3.); (5.,4.); (9.,6.); (4.,7.); (8.,1.); (7.,2.)];;
+let kd0 = M.make elts0;;
+
+M.nearest_neighbors kd0 (0.5,0.5) ;;
+(* should be ([(2.,3.], 8.5) *)
+M.nearest_neighbors kd0 (7.,4.) ;;
+(* should be ([(5., 4.); (7., 2.)], 4.) *)
+
+let elts1 =
+ [(35., 90.);
+ (70., 80.);
+ (10., 75.);
+ (80., 40.);
+ (50., 90.);
+ (70., 30.);
+ (90., 60.);
+ (50., 25.);
+ (25., 10.);
+ (20., 50.);
+ (60., 10.)];;
+
+let kd1 = M.make elts1;;
+*)
@@ -0,0 +1,23 @@
+module type S =
+ sig
+ (* Dimension of the space we're interested in *)
+ val dim : int
+ (* The "real" or floating point number system for location and distance *)
+ type real
+ (* The n dimensional real values point *)
+ type point
+ (* *)
+ type range
+
+ (* The actual element (eg, a city) with a point *)
+ type elt
+
+ val null_range:range
+ val range_maker: range -> elt -> range
+ val intersect_ranges: range -> range -> range
+ val point_in_range: range -> point -> bool
+ val to_point : elt -> point
+ val axial_compare: int -> point -> point -> int
+ val squared_distance : point -> point -> float
+ val squared_axial_distance : int -> point -> point -> float
+ end;;
Oops, something went wrong.

0 comments on commit 3fe68b8

Please sign in to comment.