Permalink
Browse files

first cut of glob

TODO: tests for glob; Windows support
  • Loading branch information...
0 parents commit 539835d98c1ef976e6355fbd54db9409d3ed871f @jkk jkk committed May 14, 2010
Showing with 124 additions and 0 deletions.
  1. +4 −0 .gitignore
  2. +21 −0 README
  3. +5 −0 project.clj
  4. +40 −0 src/org/satta/glob.clj
  5. +54 −0 test/org/satta/glob_test.clj
@@ -0,0 +1,4 @@
+pom.xml
+*jar
+lib
+classes
21 README
@@ -0,0 +1,21 @@
+# glob
+
+Finds files based on glob patterns like "*.jpg" or "/usr/*/se*". Similar to glob in Perl, Ruby, and PHP.
+
+## Usage
+
+ (glob "*.{jpg,gif}")
+
+ (glob ".*") ; dot files are not included by default
+
+ (glob "/usr/*/se*") ; works on directories and subdirectories
+
+## Installation
+
+ (ns foo.bar
+ (:use org.satta.glob))
+
+## License
+
+EPL
+
@@ -0,0 +1,5 @@
+(defproject clj-glob "1.0.0-SNAPSHOT"
+ :description "Quickly locate files with glob patterns"
+ :dependencies [[org.clojure/clojure "1.2.0-master-SNAPSHOT"]
+ [org.clojure/clojure-contrib "1.2.0-SNAPSHOT"]]
+ :dev-dependencies [[leiningen/lein-swank "1.2.0-SNAPSHOT"]])
@@ -0,0 +1,40 @@
+(ns org.satta.glob
+ (:import [java.io File StringReader]))
+
+;; TODO: make Windows-friendly
+
+(defn- glob->regex
+ "Takes a glob-format string and returns a regex."
+ [s]
+ (let [stream (StringReader. s)]
+ (loop [i (.read stream)
+ re ""
+ curly-depth 0]
+ (let [c (if (= i -1) nil (char i))
+ j (.read stream)]
+ (cond
+ (= i -1) (re-pattern (str (if (= \. (first s)) "" "(?=[^\\.])") re))
+ (= c \\) (recur (.read stream) (str re c (char j)) curly-depth)
+ (= c \/) (recur j (str re (if (= \. (char j)) c "/(?=[^\\.])"))
+ curly-depth)
+ (= c \*) (recur j (str re "[^/]*") curly-depth)
+ (= c \?) (recur j (str re "[^/]") curly-depth)
+ (= c \{) (recur j (str re \() (inc curly-depth))
+ (= c \}) (recur j (str re \)) (dec curly-depth))
+ (and (= c \,) (< 0 curly-depth)) (recur j (str re \|) curly-depth)
+ (#{\. \( \) \| \+ \^ \$ \@ \%} c) (recur j (str re \\ c) curly-depth)
+ :else (recur j (str re c) curly-depth))))))
+
+(defn glob
+ "Returns a list of java.io.File instances that match the given glob pattern.
+ Ignores dot files unless explicitly included.
+
+ Examples: (glob \"*.{jpg,gif}\") (glob \".*\") (glob \"/usr/*/se*\")"
+ [pattern]
+ (let [abs-path? (= \/ (first pattern))
+ start-dir (File. (if abs-path? "/" "."))
+ patterns (map glob->regex
+ (.split (if abs-path? (subs pattern 1) pattern) "/"))
+ expand (fn [re dir]
+ (filter #(re-matches re (.getName %)) (.listFiles dir)))]
+ (reduce #(mapcat (partial expand %2) %1) [start-dir] patterns)))
@@ -0,0 +1,54 @@
+(ns org.satta.glob-test
+ (:use [org.satta.glob] :reload-all)
+ (:use [clojure.test]))
+
+;; TODO: test glob, maybe with some kind of temp file/dir scaffolding?
+
+(def glob->regex #'org.satta.glob/glob->regex)
+
+(defn- matches?
+ "Whether a glob pattern matches a path string"
+ [pattern path]
+ (not (nil? (re-matches (glob->regex pattern) path))))
+
+(deftest glob->regex-matches-correctly
+ (are [pattern path] (matches? pattern path)
+ "abcd" "abcd"
+ "a*d" "abcd"
+ "a??d" "abcd"
+ "*" "abcd"
+ "foo.bar" "foo.bar"
+ "foo.*" "foo.bar"
+ "*.bar" "foo.bar"
+ "*foo.*" "foo.bar"
+ "foo*" "foo.bar"
+ "*.{bar,baz}" "foo.bar"
+ "*.{bar,baz}" "foo.baz"
+ "{foo,bar}" "foo"
+ "{foo,bar}" "bar"
+ "foo/bar.*" "foo/bar.baz"
+ "foo/*.baz" "foo/bar.baz"
+ "*/*" "foo/bar.baz"
+ ".*.foo" ".bar.foo"
+ ".*bar.foo" ".bar.foo"
+ ".*/bar" ".foo/bar"
+ "foo.[ch]" "foo.c"
+ "foo.[ch]" "foo.h"
+ "foo.[c-h]" "foo.c"
+ "foo.[c-h]" "foo.e"
+ "foo.[c-h]" "foo.f"
+ "foo.[c-h]" "foo.h"))
+
+(deftest glob->regex-ignores-dotfiles
+ (are [pattern path] (not (matches? pattern path))
+ "a*d" "abc"
+ "*.foo" ".bar.foo"
+ "*.bar.foo" ".bar.foo"
+ "?bar.foo" ".bar.foo"))
+
+(deftest glob->regex-char-range-nonmatch
+ (are [pattern path] (not (matches? pattern path))
+ "foo.[ch]" "foo.a"
+ "foo.[ch]" "foo.d"
+ "foo.[c-h]" "foo.b"
+ "foo.[c-h]" "foo.i"))

0 comments on commit 539835d

Please sign in to comment.