Permalink
Browse files

Add ability to process log files for stats

  • Loading branch information...
1 parent 623d99c commit bfa4f8fddcde775784542a3fd1e6acd34fb34d19 @xeqi xeqi committed Feb 8, 2013
Showing with 56 additions and 27 deletions.
  1. +36 −26 src/clojars/stats.clj
  2. +13 −0 test-resources/fake.access.log
  3. +7 −1 test/clojars/test/unit/stats.clj
View
@@ -1,19 +1,20 @@
(ns clojars.stats "generate usage statistics from web logs"
- (:require [clojure.java.io :as io]
- [clojure.string :as str]
- [net.cgrand.regex :as re]
- [clj-time.format :as timef]))
+ (:require [clojure.java.io :as io]
+ [clojure.string :as str]
+ [net.cgrand.regex :as re]
+ [clj-time.format :as timef])
+ (:use [clojars.config :only [configure]]))
(def time-clf (timef/formatter "dd/MMM/YYYY:HH:mm:ss Z"))
;; net.cgrand/regex currently doesn't allow Patterns
;; but they're too handy so let's enable them anyway
(extend-type java.util.regex.Pattern
re/RegexValue
- (pattern [re] (.pattern re))
- (groupnames [re] [])
+ (pattern [re] (.pattern re))
+ (groupnames [re] [])
re/RegexFragment
- (static? [this _] true))
+ (static? [this _] true))
(def re-clf ; common log format (apache, nginx etc)
(let [field #"\S+"
@@ -38,15 +39,16 @@
[(re/* segment sep) segment :as :group] sep
[segment :as :name] sep
[segment :as :version] sep
- segment \. [#"\w+" :as :ext])))
+ segment \.
+ [#"\w+" :as :ext])))
(defn parse-path [s]
(when s
- (when-let [m (re/exec re-path s)]
- {:name (:name m)
- :group (str/replace (:group m) "/" ".")
- :version (:version m)
- :ext (:ext m)})))
+ (when-let [m (re/exec re-path s)]
+ {:name (:name m)
+ :group (str/replace (:group m) "/" ".")
+ :version (:version m)
+ :ext (:ext m)})))
(defn parse-long [s]
(when-not (#{nil "" "-"} s)
@@ -60,24 +62,32 @@
{:status (parse-long (:status m))
:method (:method m)
:size (parse-long (:size m))
- :time (when (:time m) (timef/parse time-clf (:time m)))})))
+ :time (when (:time m) (try (timef/parse time-clf (:time m))
+ (catch IllegalArgumentException e)))})))
(defn valid-download? [m]
(and m
(= (:status m) 200)
(= (:method m) "GET")
(= (:ext m) "jar")))
-(comment
- ;; top 10 most downloaded jars
- (with-open [rdr (io/reader "clojars.access.log")]
- (->> (line-seq rdr)
- (map parse-clf)
- (filter valid-download?)
- (map (juxt :group :name))
- (frequencies)
- (sort-by val)
- (reverse)
- (take 10)))
- )
+(def as-year-month (partial timef/unparse (timef/formatters :year-month)))
+(defn compute-stats [lines]
+ (->> lines
+ (map parse-clf)
+ (filter valid-download?)
+ (map (juxt :group :name :version))
+ (frequencies)
+ (reduce-kv (fn [acc [a g v] n] (assoc-in acc [[a g] v] n)) {})))
+
+(defn process-log [logfile]
+ (with-open [rdr (io/reader logfile)]
+ (compute-stats (line-seq rdr))))
+
+(defn -main [& args]
+ (-> *in*
+ java.io.BufferedReader.
+ line-seq
+ compute-stats
+ prn))
@@ -0,0 +1,13 @@
+127.0.0.2 - - [01/Jan/2012:06:43:40 +0000] "GET /repo/snowy/snowy/0.2.0/snowy-0.2.0.jar HTTP/1.1" 200 2377 "-" "Java/1.6.0_30"
+127.0.0.2 - - [28/Jan/2012:06:43:40 +0000] "GET /repo/snowy/snowy/0.2.0/snowy-0.2.0.jar HTTP/1.1" 200 2377 "-" "Java/1.6.0_30"
+127.0.0.1 - - [14/May/2012:06:40:59 +0000] "GET /repo/captain/archibald/haddock/0.1.0/haddock-0.1.0.jar HTTP/1.1" 200 2377 "-" "Java/1.6.0_30"
+127.0.0.2 - - [14/May/2012:06:41:59 +0000] "GET /repo/captain/archibald/haddock/0.1.0/haddock-0.1.0.jar HTTP/1.1" 200 2377 "-" "Java/1.6.0_30"
+127.0.0.2 - - [14/May/2012:06:42:40 +0000] "GET /repo/snowy/snowy/0.2.0/snowy-0.2.0.jar HTTP/1.1" 200 2377 "-" "Java/1.6.0_30"
+127.0.0.2 - - [14/May/2012:06:43:40 +0000] "GET /repo/snowy/snowy/0.2.0/snowy-0.2.0.jar HTTP/1.1" 200 2377 "-" "Java/1.6.0_30"
+blistering barnacles
+
+127.0.0.3 - - [14/May/2012:06:44:59 +0000] "GET /repo/captain/archibald/haddock/0.1.0/haddock-0.1.0.jar HTTP/1.1" 200 2377 "-" "Java/1.6.0_30"
+127.0.0.4 - - [14/May/2012:06:45:40 +0000] "GET /repo/snowy/snowy/0.3.0/snowy-0.3.0.jar HTTP/1.1" 200 2377 "-" "Java/1.6.0_30"
+127.0.0.2 - - [14/May/2012:06:43:40 +0000] "GET /repo/snowy/snowy/0.2.0/snowy-0.2.0.jar HTTP/1.1" 200 2377 "-" "Java/1.6.0_30"
+billions of bilious [blue blistering] "barnacles in ten" thousand thundering "typhoons" "!"
+127.0.0.4 - - [14/May/2012:06:45:40 +0000] "GET /repo/snowy/snowy/0.3.0/snowy-0.3.0.jar HTTP/1.1" 200 2377 "-" "Java/1.6.0_30"
@@ -1,6 +1,7 @@
(ns clojars.test.unit.stats
(:require [clojars.stats :as stats]
[clj-time.core :as time]
+ [clojure.java.io :as io]
[clojure.test :refer :all]))
(deftest parse-path
@@ -22,4 +23,9 @@
(is (= "haddock" (:name m)))
(is (= "captain.archibald" (:group m)))
(is (= "0.1.0" (:version m)))
- (is (= "jar" (:ext m)))))
+ (is (= "jar" (:ext m)))))
+
+(deftest compute-stats
+ (let [stats (stats/process-log (io/resource "fake.access.log"))]
+ (is (= 5 (get-in stats [["snowy" "snowy"] "0.2.0"])))
+ (is (= 3 (get-in stats [["captain.archibald" "haddock"] "0.1.0"])))))

0 comments on commit bfa4f8f

Please sign in to comment.