Skip to content

Commit

Permalink
primitive analysis wip
Browse files Browse the repository at this point in the history
  • Loading branch information
richhickey committed Oct 7, 2012
1 parent 3cf117b commit 5729bce
Show file tree
Hide file tree
Showing 5 changed files with 342 additions and 32 deletions.
4 changes: 3 additions & 1 deletion project.clj
Expand Up @@ -5,4 +5,6 @@
:url "http://www.eclipse.org/legal/epl-v10.html"}
:main datomic.codeq.core
:dependencies [[com.datomic/datomic-free "0.8.3538"]
[org.clojure/clojure "1.5.0-alpha5"]])
[commons-codec "1.7"]
[local.repo/clojure "1.5.0-alpha6"]
#_[org.clojure/clojure "1.5.0-alpha6"]])
68 changes: 68 additions & 0 deletions src/datomic/codeq/analyzer.clj
@@ -0,0 +1,68 @@
;; Copyright (c) Metadata Partners, LLC. All rights reserved.
;; The use and distribution terms for this software are covered by the
;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
;; which can be found in the file epl-v10.html at the root of this distribution.
;; By using this software in any fashion, you are agreeing to be bound by
;; the terms of this license.
;; You must not remove this notice, or any other, from this software.

(ns datomic.codeq.analyzer
(:import [java.io StringReader]
[org.apache.commons.codec.digest DigestUtils]))

(set! *warn-on-reflection* true)

(defprotocol Analyzer
(keyname [a] "keyword name for analyzer")
(revision [a] "long")
(extensions [a] "[string ...], including '.'")
(schemas [a] "map of revisions to (incremental) schema data")
(analyze [a db f src] "f is file entityid, src is string, returns tx-data"))

(defn sha
"Returns the hex string of the sha1 of s"
[^String s]
(org.apache.commons.codec.digest.DigestUtils/shaHex s))

(defn ws-minify
"Consecutive ws becomes a single space, then trim"
[s]
(let [r (java.io.StringReader. s)
sb (StringBuilder.)]
(loop [c (.read r) skip true]
(when-not (= c -1)
(let [ws (Character/isWhitespace c)]
(when (or (not ws) (not skip))
(.append sb (if ws " " (char c))))
(recur (.read r) ws))))
(-> sb str .trim)))

(defn loc
"Returns zero-based [line col endline endcol] given one-based
\"line col endline endcol\" string"
[loc-string]
(mapv dec (read-string (str "[" loc-string "]"))))

(defn line-offsets
"Returns a vector of zero-based offsets of lines. Note the offsets
are where the line would be, the last offset is not necessarily
within the string. i.e. if the last character is a newline, the last
index is the length of the string."
[^String s]
(let [nl (long \newline)]
(persistent!
(loop [ret (transient [0]), i 0]
(if (= i (.length s))
ret
(recur (if (= (.codePointAt s i) nl)
(conj! ret (inc i))
ret)
(inc i)))))))

(defn segment
"Given a string and line offsets, returns text from (zero-based)
line and col to endline/endcol (exclusive)"
[^String s line-offsets line col endline endcol]
(subs s
(+ (nth line-offsets line) col)
(+ (nth line-offsets endline) endcol)))
68 changes: 68 additions & 0 deletions src/datomic/codeq/analyzers/clj.clj
@@ -0,0 +1,68 @@
;; Copyright (c) Metadata Partners, LLC. All rights reserved.
;; The use and distribution terms for this software are covered by the
;; Eclipse Public License 1.0 (http://opensource.org/licenses/eclipse-1.0.php)
;; which can be found in the file epl-v10.html at the root of this distribution.
;; By using this software in any fashion, you are agreeing to be bound by
;; the terms of this license.
;; You must not remove this notice, or any other, from this software.

(ns datomic.codeq.analyzers.clj
(:require [datomic.api :as d]
[datomic.codeq.util :refer [cond-> index->id-fn tempid?]]
[datomic.codeq.analyzer :as az]))

(defn analyze-1
"returns [tx-data ctx]"
[db f x loc seg ret {:keys [sha->id added ns] :as ctx}]
(if loc
(let [sha (-> seg az/ws-minify az/sha)
cid (sha->id sha)
newcid (and (tempid? cid) (not (added cid)))
ret (cond-> ret newcid (conj {:db/id cid :code/sha sha :code/text seg}))
added (cond-> added newcid (conj cid))]
[ret (assoc ctx :added added)])
[ret ctx]))

(defn analyze
[a db f src]
(with-open [r (clojure.lang.LineNumberingPushbackReader. (java.io.StringReader. src))]
(let [loffs (az/line-offsets src)
eof (Object.)
ctx {:sha->id (index->id-fn db :code/sha)
:codename->id (index->id-fn db :code/name)
:added #{}}]
(loop [ret [], ctx ctx, x (read r false eof)]
(if (= eof x)
ret
(let [{:keys [line column]} (meta x)
ctx (if (and (coll? x) (= (first x) 'ns))
(assoc ctx :ns (second x))
ctx)
endline (.getLineNumber r)
endcol (.getColumnNumber r)
[loc seg] (when (and line column)
[(str line " " column " " endline " " endcol)
(az/segment src loffs (dec line) (dec column) (dec endline) (dec endcol))])
[ret ctx] (analyze-1 db f x loc seg ret ctx)]
(recur ret ctx (read r false eof))))))))

(deftype CljAnalyzer []
az/Analyzer
(keyname [a] :clj)
(revision [a] 1)
(extensions [a] [".clj"])
(schemas [a] {1 [{:db/id #db/id[:db.part/db]
:db/ident :clj/ns
:db/valueType :db.type/ref
:db/cardinality :db.cardinality/one
:db/doc "codename of ns defined by expression"
:db.install/_attribute :db.part/db}
{:db/id #db/id[:db.part/db]
:db/ident :clj/def
:db/valueType :db.type/ref
:db/cardinality :db.cardinality/one
:db/doc "codename defined by expression"
:db.install/_attribute :db.part/db}]})
(analyze [a db f src] (analyze a db f src)))

(defn impl [] (CljAnalyzer.))

0 comments on commit 5729bce

Please sign in to comment.