Skip to content
This repository
Browse code

move project graph to s3

  • Loading branch information...
commit e0b2bf75969b5a04bd76e15e032defafef04b6f6 1 parent 4109cc6
Justin Kramer authored January 16, 2013
4  README.md
Source Rendered
@@ -48,13 +48,13 @@ Browsable dependency graph of Clojure projects. See it live here: http://www.clo
48 48
 
49 49
 ## Development
50 50
 
51  
-To refresh the project graph, run:
  51
+To refresh the project graph, copy `resources/config.clj.default` to `resources/config.clj` and update it with AWS credentials. Then run:
52 52
 
53 53
 ```
54 54
 scripts/refresh.sh
55 55
 ```
56 56
 
57  
-This takes a long time - about 2 hours. The script pulls down project data from Clojars and GitHub, processes the data, and saves the result to `resources/project_graph.clj`.
  57
+This takes a long time - about 2 hours. The script pulls down project data from Clojars and GitHub, processes the data, and saves the result to S3.
58 58
 
59 59
 ## License
60 60
 
6  project.clj
... ...
@@ -1,6 +1,6 @@
1 1
 (defproject clojuresphere "0.0.1-SNAPSHOT"
2 2
   :description "Browsable dependency graph of Clojure projects"
3  
-  :dependencies [[org.clojure/clojure "1.4.0"]
  3
+  :dependencies [[org.clojure/clojure "1.5.0-RC2"]
4 4
                  [org.clojure/data.zip "0.1.1"]
5 5
                  [tentacles "0.2.2"]
6 6
                  [compojure "1.1.1"]
@@ -10,5 +10,7 @@
10 10
                  [clj-http "0.5.3"]
11 11
                  [org.apache.maven/maven-artifact "3.0.4"]
12 12
                  [cheshire "4.0.2"]
13  
-                 [amalloy/ring-gzip-middleware "0.1.1"]]
  13
+                 [amalloy/ring-gzip-middleware "0.1.1"]
  14
+                 [clj-aws-s3 "0.3.3"]
  15
+                 [sundry "0.4.0"]]
14 16
   :min-lein-version "2.0.0")
282,425  resources/project_graph.clj
0 additions, 282425 deletions not shown
78  src/clojuresphere/preprocess.clj
@@ -9,7 +9,14 @@
9 9
                                             latest-coord? count-dependents]])
10 10
   (:require [clojure.xml :as xml]
11 11
             [clojure.zip :as zip]
12  
-            [clojure.java.io :as io]))
  12
+            [clojure.java.io :as io]
  13
+            [sundry.io :as sio]
  14
+            [aws.sdk.s3 :as s3]))
  15
+
  16
+(def config (sio/read (io/resource "config.clj")))
  17
+
  18
+(def aws-cred {:access-key (:aws-access-key config)
  19
+               :secret-key (:aws-secret-key config)})
13 20
 
14 21
 ;; TODO: project.clj should actually be eval'd, not just read
15 22
 (defn parse-project-data [[defproj name version & opts]]
@@ -68,12 +75,20 @@
68 75
   (Thread/sleep 1000) ;crude rate limit
69 76
   (println "Fetching repo" owner repo)
70 77
   (flush)
71  
-  (specific-repo owner repo))
  78
+  (specific-repo owner repo {:client_id (:github-client-id config)
  79
+                             :client_secret (:github-client-secret config)}))
72 80
 
73 81
 (defn fetch-repos [start-page]
74 82
   (Thread/sleep 1000) ;crude rate limit
75 83
   (println "Fetching page" start-page) ;FIXME: proper logging
76  
-  (search-repos "clojure" {:language "clojure" :start-page start-page}))
  84
+  (let [resp (search-repos
  85
+               "clojure" {:language "clojure"
  86
+                          :start-page start-page
  87
+                          :client_id (:github-client-id config)
  88
+                          :client_secret (:github-client-secret config)})]
  89
+    (if-not (sequential? resp)
  90
+      (throw (ex-info "Bad response" {:response resp}))
  91
+      resp)))
77 92
 
78 93
 (defn fetch-all-repos []
79 94
   (->> (iterate inc 1)
@@ -127,8 +142,9 @@
127 142
   (let [;; special exception for clojure itself (written in java)
128 143
         clojure-repo (first (search-repos "clojure"))
129 144
         repos (cons clojure-repo (fetch-all-repos))]
130  
-    (remove (comp #{"clojure-slick-rogue"} :name :github) ;broken
131  
-            (fetch-all-repo-projects repos))))
  145
+    (doall
  146
+      (remove (comp #{"clojure-slick-rogue"} :name :github) ;broken
  147
+              (fetch-all-repo-projects repos)))))
132 148
 
133 149
 ;; clojars
134 150
 
@@ -196,18 +212,24 @@
196 212
 
197 213
 (defn build-deps [projects]
198 214
   (reduce
199  
-   (fn [g [[name ver :as coord] [dname dver :as dep-coord]]]
200  
-     (-> g
201  
-         (update-in [name :versions ver :dependencies] (fnil conj #{}) dep-coord)
202  
-         (update-in [dname :versions dver :dependents] (fnil conj #{}) coord)))
203  
-   {}
204  
-   (for [p projects
205  
-         [dname dver] (concat (:dependencies p)
206  
-                              (:dev-dependencies p)
207  
-                              (get-in p [:profiles :dev :dependencies]))]
208  
-     (let [coord (project-coord p)
209  
-           dep-coord (lein-coord dname dver)]
210  
-       [coord dep-coord]))))
  215
+    (fn [g [[name ver :as coord] [dname dver :as dep-coord]]]
  216
+      (-> g
  217
+        (update-in [name :versions ver :dependencies] (fnil conj #{}) dep-coord)
  218
+        (update-in [dname :versions dver :dependents] (fnil conj #{}) coord)))
  219
+    {}
  220
+    (for [p projects
  221
+          :let [deps (concat (:dependencies p)
  222
+                             (:dev-dependencies p)
  223
+                             (get-in p [:profiles :dev :dependencies]))]
  224
+          dep deps
  225
+          :when (and (vector? dep)
  226
+                     (symbol? (first dep))
  227
+                     (string? (second dep))
  228
+                     (pos? (count (second dep))))]
  229
+      (let [[dname dver] dep
  230
+            coord (project-coord p)
  231
+            dep-coord (lein-coord dname dver)]
  232
+        [coord dep-coord]))))
211 233
 
212 234
 ;; For clojars info, we look at the latest stable version, but for
213 235
 ;; github, we look for the most-watched (possibly unstable) version
@@ -267,17 +289,25 @@
267 289
       (build-info projects)
268 290
       (build-counts)))
269 291
 
  292
+(defn upload-project-graph [aws-cred g]
  293
+  (let [tmp (java.io.File/createTempFile "project_graph" ".clj.gz")]
  294
+    (with-open [w (io/writer
  295
+                    (java.util.zip.GZIPOutputStream.
  296
+                      (io/output-stream tmp)))]
  297
+      (binding [*out* w]
  298
+        (prn g)))
  299
+    (s3/put-object
  300
+      aws-cred "clojuresphere.com" "project_graph.clj.gz" tmp
  301
+      {} (s3/grant :all-users :read))
  302
+    (.delete tmp)))
  303
+
270 304
 ;; See scripts/refresh.sh
271 305
 (defn -main [& args]
272 306
   (let [clojars-dir (first args)
273 307
         projects (fetch-all-projects clojars-dir)
274  
-        g (build-project-graph projects)
275  
-        out-path (str (System/getProperty "user.dir")
276  
-                      "/resources/project_graph.clj")]
277  
-    (println "Saving project graph...")
278  
-    (with-open [w (io/writer out-path)]
279  
-      (binding [*out* w]
280  
-        (pprint g)))
  308
+        g (build-project-graph projects)]
  309
+    (println "Uploading project graph...")
  310
+    (upload-project-graph aws-cred g)
281 311
     (println "Done")))
282 312
 
283 313
 
25  src/clojuresphere/project_model.clj
... ...
@@ -1,17 +1,24 @@
1 1
 (ns clojuresphere.project-model
2  
-  (:use [clojuresphere.util :only [read-resource]]
3  
-        [clojure.java.io :as io])
4  
-  (:import [org.apache.maven.artifact.versioning DefaultArtifactVersion]))
  2
+  (:use [clojuresphere.util :only [read-resource]])
  3
+  (:import [org.apache.maven.artifact.versioning DefaultArtifactVersion])
  4
+  (:require [clojure.java.io :as io]
  5
+            [sundry.io :as sio]
  6
+            [clj-http.client :as http]))
5 7
 
6 8
 ;; we don't need no stinkin database
7 9
 
8  
-(def graph-data-file "project_graph.clj")
9  
-(defonce graph (read-resource graph-data-file))
  10
+(def graph-url "https://s3.amazonaws.com/clojuresphere.com/project_graph.clj.gz")
  11
+
  12
+(let [resp (http/get graph-url {:as :stream})]
  13
+  (defonce graph
  14
+    (sundry.io/read
  15
+      (java.util.zip.GZIPInputStream.
  16
+        (:body resp))))
  17
+  (def last-updated (get-in resp [:headers "last-modified"])))
  18
+
10 19
 (def project-count (count (filter #(or (:github %) (:clojars %)) (vals graph))))
11 20
 (def github-count (count (filter :github (vals graph))))
12 21
 (def clojars-count (count (filter :clojars (vals graph))))
13  
-(def last-updated (-> graph-data-file
14  
-                      io/resource io/file .lastModified (java.util.Date.)))
15 22
 
16 23
 (def sorted-pids
17 24
   {:dependents (->> graph (sort-by (comp :all :dependent-counts val) >) keys vec)
@@ -34,8 +41,8 @@
34 41
        (group-by year-quarter)
35 42
        (into (sorted-map))))
36 43
 
37  
-(def first-year (first (key (first creates-per-quarter))))
38  
-(def last-year (first (key (last creates-per-quarter))))
  44
+(def first-year (ffirst (first creates-per-quarter)))
  45
+(def last-year (ffirst (last creates-per-quarter)))
39 46
 (def quarterly-counts
40 47
   (reductions + (map count (vals creates-per-quarter))))
41 48
 

0 notes on commit e0b2bf7

Please sign in to comment.
Something went wrong with that request. Please try again.