Permalink
Browse files

checkin latest cron tasks

  • Loading branch information...
1 parent 6509d3d commit fe8827b683dadc3ca4b5f42258bc820c3f8792a6 @igrigorik committed Nov 18, 2012
Showing with 12 additions and 2 deletions.
  1. +0 −2 crawler/s3.cron
  2. +12 −0 crawler/tasks.cron
View
@@ -1,2 +0,0 @@
-2 * * * * gzip -9 /home/archiver/githubarchive/crawler/data/*.json && /usr/local/bin/s3cmd -c /home/archiver/.s3cfg --acl-public sync /home/archiver/githubarchive/crawler/data/*.gz s3://data.githubarchive.org/
-0 0 * * * find /home/archiver/githubarchive/crawler/data/* -mtime +30 -exec rm {} \;
View
@@ -0,0 +1,12 @@
+# sync data to cloud storage
+5 * * * * gzip -9 /home/archiver/githubarchive/crawler/data/*.json && /usr/bin/gsutil cp -a public-read /home/archiver/githubarchive/crawler/data/`date +"\%Y-\%m-\%d-\%-k" -d "1 hour ago"`.json.gz gs://data.githubarchive.org
+8 * * * * /usr/local/bin/s3cmd -c /home/archiver/.s3cfg --acl-public sync /home/archiver/githubarchive/crawler/data/*.gz s3://data.githubarchive.org/
+
+# import data into bigquery
+15 * * * * /bin/bash -l -c 'wget -nv -P /tmp http://data.githubarchive.org/`date +"\%Y-\%m-\%d-\%-k" -d "1 hour ago"`.json.gz && cd /home/archiver/githubarchive/bigquery && ruby sync.rb -f /tmp/`date +"\%Y-\%m-\%d-\%-k" -d "1 hour ago"`.json.gz --no-sync' >> /var/log/bigquery.log 2>&1
+
+# run daily GH report
+0 20 * * * /bin/bash -l -c 'cd /home/archiver/githubarchive/reports && HOMINID_KEY= HOMINID_LIST= ruby daily.rb'
+
+# keep last 30 days worth of data
+0 0 * * * find /home/archiver/githubarchive/crawler/data/* -mtime +30 -exec rm {} \;

0 comments on commit fe8827b

Please sign in to comment.