Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

- store the timestamp of the last processed logfile in S3. Some refac…

…torings and removed obsolete code. Updated aws-sdk to latest version
  • Loading branch information...
commit 1835cbb34fc0cb04706e67d44e5cc2525300368c 1 parent 281a597
@ingorichter ingorichter authored
View
48 downloadStats/downloadStatsUpdater.js
@@ -31,11 +31,11 @@ var fs = require("fs"),
request = require("request-json"),
temporary = require("temporary"),
LogfileProcessor = require("./logfileProcessor").LogfileProcessor,
- program = require('commander'),
+ programArgs = require('commander'),
Promise = require("bluebird"),
writeFile = Promise.promisify(require("fs").writeFile);
-program
+programArgs
.version('0.0.1')
.option('-d, --download', 'Download logfiles from S3')
.option('-e, --extract', 'Extract Extension download data from downloaded logfiles')
@@ -51,40 +51,32 @@ process.env.NODE_TLS_REJECT_UNAUTHORIZED = "0";
// read the config. This file must exists. Otherwise follow these setup instructions
// https://github.com/adobe/brackets-registry to create it.
var config = JSON.parse(fs.readFileSync(path.resolve(__dirname, "../config/config.json")));
-var lastProcessedTimestamp = {};
var httpPort = config.port || 4040; // default port for registry webapp
var protocol = config.insecure ? "http" : "https";
-try {
- lastProcessedTimestamp = JSON.parse(fs.readFileSync(path.resolve(__dirname, "lastProcessedLogfile.json")));
-
- if (!lastProcessedTimestamp.ts) {
- lastProcessedTimestamp.ts = 0;
- }
-} catch (Exception) {
- lastProcessedTimestamp.ts = 0;
-}
+// Constants
+var DOWNLOAD_STATS_FILENAME = "downloadStats.json",
+ RECENT_DOWNLOAD_STATS_FILENAME = "recentDownloadStats.json";
/**
* This is a helper log function that can be turned on and off by providing `-v``
* when running this script from the command line.
*/
function log() {
- if (program.verbose) {
+ if (programArgs.verbose) {
console.log(Array.prototype.slice.apply(arguments).join(' '));
}
}
-// Constants
-var DOWNLOAD_STATS_FILENAME = "downloadStats.json",
- RECENT_DOWNLOAD_STATS_FILENAME = "recentDownloadStats.json";
-
// create temp folder for logfiles
-var tempFolder = config.tempFolder || program.tempFolder;
+var tempFolder = programArgs.tempFolder || config.tempFolder;
if (tempFolder) {
try {
fs.mkdirSync(tempFolder);
} catch (e) {
+ // we don't care if the temp directory already exist,
+ // since this should only happen during testing.
+ // Usually we will use a generated temp dir that is unique
if (e.code !== "EEXIST") {
// tell us what went wrong
console.error(e.toString());
@@ -101,11 +93,9 @@ function downloadLogFiles(progress) {
log("Downloading logfiles from S3");
var logfileProcessor = new LogfileProcessor(config);
- var promise = logfileProcessor.downloadLogfiles(tempFolder, lastProcessedTimestamp.ts);
+ var promise = logfileProcessor.downloadLogfiles(tempFolder);
promise.then(function (timestampLastProcessedLogfile) {
- writeFile(path.resolve(__dirname, "lastProcessedLogfile.json"), JSON.stringify({ts: Date.parse(timestampLastProcessedLogfile)})).then(function () {
- deferred.resolve();
- });
+ deferred.resolve();
});
if (progress) {
@@ -144,7 +134,9 @@ function doItAll(progress) {
extractExtensionDownloadData(progress).then(function (downloadStats) {
writeFile(DOWNLOAD_STATS_FILENAME, JSON.stringify(downloadStats)).then(function () {
// posting works only from localhost
- var client = request.newClient(protocol + "://localhost:" + httpPort);
+ var url = protocol + "://localhost:" + httpPort;
+
+ var client = request.newClient(url);
client.sendFile("/stats", path.resolve(__dirname, DOWNLOAD_STATS_FILENAME), null, function (err, res, body) {
if (err) {
console.error(err);
@@ -162,10 +154,10 @@ function doItAll(progress) {
}
// Let's get to work
-if (program.download) {
- downloadLogFiles(program.progress);
-} else if (program.extract) {
- extractExtensionDownloadData(program.progress);
+if (programArgs.download) {
+ downloadLogFiles(programArgs.progress);
+} else if (programArgs.extract) {
+ extractExtensionDownloadData(programArgs.progress);
} else {
- doItAll(program.progress);
+ doItAll(programArgs.progress);
}
View
264 downloadStats/logfileProcessor.js
@@ -31,7 +31,6 @@ var AWS = require("aws-sdk"),
path = require("path"),
readline = require("readline"),
FileQueue = require("filequeue"),
- eachline = require("eachline"),
Promise = require("bluebird"),
_ = require("lodash"),
readFile = Promise.promisify(require("fs").readFile);
@@ -86,13 +85,75 @@ function formatDownloadDate(date) {
LogfileProcessor.prototype = {
/**
+ * Write the timestamp of the last processed logfile to S3.
+ *
+ * @param {JSON} - lastProcessedTimestamp {ts: timestamp}
+ *
+ * @return {Promise} - promise that resolves with the `lastProcessedTimestamp`
+ * once the object was written to S3. Will rejected with err in case of any error.
+ */
+ setLastProcessedTimestamp: function (lastProcessedTimestamp) {
+ var self = this,
+ writeTSPromise = Promise.defer();
+
+ var s3 = new AWS.S3.Client({
+ sslEnabled: true
+ });
+
+ s3.putObject({
+ Bucket: self.bucketName,
+ Key: "logfileProcessing/lastProcessedLogfile.json",
+ ACL: "public-read",
+ ContentType: "application/json",
+ Body: new Buffer(JSON.stringify(lastProcessedTimestamp))
+ }, function (err, data) {
+ if (err) {
+ writeTSPromise.reject(err);
+ } else {
+ writeTSPromise.resolve(lastProcessedTimestamp);
+ }
+ });
+
+ return writeTSPromise.promise;
+ },
+
+ getLastProcessedTimestamp: function () {
+ var s3 = new AWS.S3.Client({
+ sslEnabled: true
+ });
+
+ var self = this,
+ readTSPromise = Promise.defer();
+
+ s3.getObject({
+ Bucket: self.bucketName,
+ Key: "logfileProcessing/lastProcessedLogfile.json"
+ }, function (err, data) {
+ if (err) {
+ if (err.code === "NoSuchKey") {
+ // return default: read all logs
+ readTSPromise.resolve(0);
+ } else {
+ readTSPromise.reject(err);
+ }
+ } else {
+ readTSPromise.resolve(data);
+ }
+ });
+
+ return readTSPromise.promise;
+ },
+
+ /**
* Download the S3 logfiles into the directory tempFolderName. The lastProcessedTimestamp indicates the last processed logfile.
* All previous logfiles will be skipped and not downloaded for further processing.
* @param {String} - tempFolderName temp location to store the logfiles
* @param {String} - lastProcessedTimestamp timestamp of the logfile last processed. Should be either 0 (include all)
- * or something much greater
+ * or something greater. If undefined, we will retrieve all logfiles from S3.
+ *
+ * @return {Promise} - resolved when all logfiles have been downloaded from S3.
*/
- downloadLogfiles: function (tempFolderName, lastProcessedTimestamp) {
+ _downloadLogfiles: function (tempFolderName, lastProcessedTimestamp) {
var self = this;
if (!lastProcessedTimestamp) {
@@ -150,6 +211,31 @@ LogfileProcessor.prototype = {
return globalPromise.promise;
},
+ downloadLogfiles: function (tempFolderName, lastProcessedTimestamp) {
+ var self = this,
+ downloadLogfilePromise = Promise.defer();
+
+ self.getLastProcessedTimestamp().then(function (timestamp) {
+ var promise = self._downloadLogfiles(tempFolderName, timestamp);
+ promise.then(function (timestampLastProcessedLogfile) {
+ var lastProcessedTimestamp = {ts: Date.parse(timestampLastProcessedLogfile)};
+ self.setLastProcessedTimestamp(lastProcessedTimestamp).then(function () {
+ downloadLogfilePromise.resolve(timestampLastProcessedLogfile);
+ }, function (err) {
+ downloadLogfilePromise.reject(err);
+ });
+ });
+
+ promise.progressed(function (value) {
+ downloadLogfilePromise.progress(value);
+ });
+ }, function () {
+ downloadLogfilePromise.reject("Error downloading last timestamp");
+ });
+
+ return downloadLogfilePromise.promise;
+ },
+
/**
* Process all the logfiles in tempFolderName. We are extracting the name and version of the extension (derived from the zip filename).
*
@@ -157,99 +243,101 @@ LogfileProcessor.prototype = {
* @return {JSON} - {"extensionname": downloads: {versions: {"version": downloadsPerVersion}}}
*/
extractDownloadStats: function (tempFolderName) {
- var deferred = Promise.defer();
-
- var fq = new FileQueue(100);
-
- fs.readdir(tempFolderName, function (err, files) {
- var result = {};
-
- files.forEach(function (file) {
-// fq.readFile(path.resolve(tempFolderName, file), function (err, content) {
-// var lines = content.toString().split('\n');
-// lines.forEach(function (line) {
-// var matchResult = line.match(AwsLogFileParserRegex);
-// if (matchResult) {
-// var uri = matchResult[8];
-// // we are only interested in the Extension zip files
-// if (uri.lastIndexOf(".zip") > -1) {
-// var m = uri.match(/(\S+)\/(\S+)\-(.*)\.zip/);
-// if (m) {
-// var extensionName = m[1],
-// version = m[3];
-//
-// if (!result[extensionName]) {
-// result[extensionName] = {downloads : { versions: {} }};
-//
-// result[extensionName].downloads.versions[version] = 1;
-// } else {
-// var downloadsForVersion = result[extensionName].downloads.versions[version];
-//
-// if (downloadsForVersion && !isNaN(downloadsForVersion)) {
-// downloadsForVersion++;
-// result[extensionName].downloads.versions[version] = downloadsForVersion;
-// } else {
-// result[extensionName].downloads.versions[version] = 1;
-// }
-// }
-// }
-// }
-// }
-//
-// deferred.progress(".");
-// });
-// });
- var content = fs.readFileSync(path.resolve(tempFolderName, file));
-
- var lines = content.toString().split('\n');
- lines.forEach(function (line) {
- var matchResult = line.match(AWSLogFileParserRegex);
- if (matchResult) {
- var uri = matchResult[8],
- date = matchResult[3],
- downloadDate = formatDownloadDate(date);
-
- // we are only interested in the Extension zip files
- if (uri.lastIndexOf(".zip") > -1) {
- var m = uri.match(/(\S+)\/(\S+)\-(.*)\.zip/);
- if (m) {
- var extensionName = m[1],
- version = m[3];
-
- if (!result[extensionName]) {
- result[extensionName] = {downloads : { versions: {}, recent: {}}};
-
- result[extensionName].downloads.versions[version] = 1;
- } else {
- var downloadsForVersion = result[extensionName].downloads.versions[version];
-
- if (downloadsForVersion && !isNaN(downloadsForVersion)) {
- downloadsForVersion++;
- result[extensionName].downloads.versions[version] = downloadsForVersion;
- } else {
- result[extensionName].downloads.versions[version] = 1;
- }
- }
-
- // count the recent downloads
- var recentDownloads = result[extensionName].downloads.recent;
-
- if (recentDownloads[downloadDate]) {
- recentDownloads[downloadDate]++;
- } else {
- recentDownloads[downloadDate] = 1;
- }
+ var globalPromise = Promise.defer(),
+ result = {};
+
+ function _readLogfileHelper(fq, fileName) {
+ var fileReadPromise = Promise.defer();
+
+ fq.readFile(fileName, function (err, fileContent) {
+ if (err) {
+ fileReadPromise.reject(err);
+ } else {
+ fileReadPromise.resolve(fileContent);
+ }
+ });
+
+ return fileReadPromise.promise;
+ }
+
+ function parseLogfile(content) {
+ var readContentPromise = Promise.defer();
+
+ var lines = content.toString().split('\n');
+ lines.forEach(function (line, index) {
+ var matchResult = line.match(AWSLogFileParserRegex);
+ if (matchResult) {
+ var uri = matchResult[8],
+ date = matchResult[3],
+ downloadDate = formatDownloadDate(date);
+
+// // we are only interested in the Extension zip files
+ var m = uri.match(/(\S+)\/(\S+)\-(.*)\.zip/);
+ if (m) {
+ var extensionName = m[1],
+ version = m[3];
+
+ if (!result[extensionName]) {
+ result[extensionName] = {downloads : { versions: {}, recent: {}}};
+
+ result[extensionName].downloads.versions[version] = 1;
+ } else {
+ var downloadsForVersion = result[extensionName].downloads.versions[version];
+
+ if (downloadsForVersion && !isNaN(downloadsForVersion)) {
+ downloadsForVersion++;
+ result[extensionName].downloads.versions[version] = downloadsForVersion;
+ } else {
+ result[extensionName].downloads.versions[version] = 1;
}
}
+
+ // count the recent downloads
+ var recentDownloads = result[extensionName].downloads.recent;
+
+ if (recentDownloads[downloadDate]) {
+ recentDownloads[downloadDate]++;
+ } else {
+ recentDownloads[downloadDate] = 1;
+ }
}
+ }
+
+ if (index === (lines.length - 1)) {
+ readContentPromise.resolve(result);
+ } else {
+ readContentPromise.progress(".");
+ }
+ });
+
+ return readContentPromise.promise;
+ }
+
+ fs.readdir(tempFolderName, function (err, allFiles) {
+ var fq = new FileQueue(150);
+
+ var allPromises = allFiles.map(function (file) {
+ var deferred = Promise.defer();
- deferred.progress(".");
+ var _readLogfileHelperPromise = _readLogfileHelper(fq, path.resolve(tempFolderName, file));
+
+ _readLogfileHelperPromise.then(function (content) {
+ parseLogfile(content).then(function () {
+ deferred.resolve();
+ });
+ }).done(function () {
+ globalPromise.progress(".");
});
+
+ return deferred.promise;
+ });
+
+ Promise.settle(allPromises).then(function () {
+ globalPromise.resolve(result);
});
- deferred.resolve(result);
});
- return deferred.promise;
+ return globalPromise.promise;
}
};
View
4 lib/repository.js
@@ -147,8 +147,8 @@ function addDownloadDataToPackage(name, version, newDownloads, recentDownloads)
var updated = false;
logger.debug("Extension package with name " + name + " found");
-
var packageVersions = registry[name].versions;
+
packageVersions.forEach(function (versionInfo) {
// we found the version
if (versionInfo.version === version) {
@@ -172,7 +172,7 @@ function addDownloadDataToPackage(name, version, newDownloads, recentDownloads)
var recentDownloadsUpdated = updateRecentDownloadsForPackage(name, recentDownloads);
// save changes to registry if there were any updates
- if (updated) {
+ if (updated || recentDownloadsUpdated) {
storage.saveRegistry(registry);
}
}
View
28 lib/routes.js
@@ -181,22 +181,6 @@ function _respondUnauthorized(req, res, htmlTemplate, data) {
_respond(req, res, htmlTemplate, data);
}
-/**
- * Remove the "recentDownloads" object from the registry. This is needed to return a proper registry when
- * _rss and _registryList is called.
- *
- * @param {object} registry The extension registry json
- * @return {object} clone of registry, where the "recentDownloads" object was removed.
- */
-function _removeRecentDownloadObject(registry) {
- var registryClone = JSON.parse(JSON.stringify(registry));
-
- if (registryClone.hasOwnProperty("recentDownloads")) {
- delete registryClone.recentDownloads;
- }
-
- return registryClone;
-}
///////////////////////////////
// Handlers for specific routes
///////////////////////////////
@@ -204,14 +188,14 @@ function _removeRecentDownloadObject(registry) {
function _index(req, res) {
_respond(req, res, "index", {
user: registry_utils.formatUserId.call({owner: req.user}),
- registry: registry_utils.sortRegistry(_removeRecentDownloadObject(repository.getRegistry())),
+ registry: registry_utils.sortRegistry(repository.getRegistry()),
repositoryBaseURL: config.repositoryBaseURL,
helpURL: config.helpURL
});
}
function _rss(req, res) {
- var items = registry_utils.sortRegistry(_removeRecentDownloadObject(repository.getRegistry()));
+ var items = registry_utils.sortRegistry(repository.getRegistry());
//max of 10
items = items.splice(0, 10);
@@ -222,7 +206,7 @@ function _rss(req, res) {
config.rss.description = config.rss.description || "";
config.rss.feedURL = config.rss.feedURL || "";
config.rss.siteURL = config.rss.siteURL || "";
-
+
var feed = new RSS({
title: config.rss.title,
description: config.rss.description,
@@ -233,11 +217,11 @@ function _rss(req, res) {
items.forEach(function (itm) {
var author = "";
var title = itm.metadata.title || itm.metadata.name || "";
-
+
if (itm.metadata.author && itm.metadata.author.name) {
author = itm.metadata.author.name;
}
-
+
feed.item({
title: title + " v" + itm.metadata.version,
description: itm.metadata.description,
@@ -255,7 +239,7 @@ function _rss(req, res) {
function _registryList(req, res) {
_respond(req, res, "registryList", {
layout: false,
- registry: registry_utils.sortRegistry(_removeRecentDownloadObject(repository.getRegistry())),
+ registry: registry_utils.sortRegistry(repository.getRegistry()),
repositoryBaseURL: config.repositoryBaseURL,
helpURL: config.helpURL
});
View
2  package.json
@@ -26,7 +26,7 @@
"passport-github": "~0.1.5",
"brackets-extensibility": ">0.31.0",
"semver": "2.x",
- "aws-sdk": "~2.0.0-rc5",
+ "aws-sdk": "~2.0.0-rc9",
"replify": "~1.1.4",
"clone": "~0.1.9",
"rss": "0.2.1",
View
13 spec/logfileProcessor.spec.js
@@ -53,7 +53,7 @@ describe("LogfileProcessor", function () {
it("should return the information for 1 Extension", function (done) {
var lfp = new logfileProcessor.LogfileProcessor(config);
- lfp.extractDownloadStats(testLogfileDirectory + "/one-extension").then(function (downloadStats) {
+ lfp.extractDownloadStats(path.join(testLogfileDirectory, "one-extension")).then(function (downloadStats) {
expect(downloadStats["select-parent"].downloads.versions["1.0.0"]).toBe(1);
done();
@@ -62,7 +62,7 @@ describe("LogfileProcessor", function () {
it("should return the information for 1 Extension and multiple versions", function (done) {
var lfp = new logfileProcessor.LogfileProcessor(config);
- lfp.extractDownloadStats(testLogfileDirectory + "/one-extension-multiple-versions").then(function (downloadStats) {
+ lfp.extractDownloadStats(path.join(testLogfileDirectory, "one-extension-multiple-versions")).then(function (downloadStats) {
expect(downloadStats["select-parent"].downloads.versions["1.0.0"]).toBe(1);
expect(downloadStats["select-parent"].downloads.versions["1.0.2"]).toBe(1);
expect(downloadStats["select-parent"].downloads.versions["1.0.3"]).toBe(1);
@@ -70,6 +70,15 @@ describe("LogfileProcessor", function () {
done();
});
});
+
+ it("should return no information for extension with rar extension", function (done) {
+ var lfp = new logfileProcessor.LogfileProcessor(config);
+ lfp.extractDownloadStats(path.join(testLogfileDirectory, "one-invalid-extension-log")).then(function (downloadStats) {
+ expect(downloadStats).toEqual({});
+
+ done();
+ });
+ });
});
describe("Create recent download stats", function () {
View
11 spec/routes.spec.js
@@ -43,7 +43,6 @@ var _index = routes.__get__("_index"),
_upload = routes.__get__("_upload"),
_rss = routes.__get__("_rss"),
_stats = routes.__get__("_stats"),
- _removeRecentDownloadObject = routes.__get__("_removeRecentDownloadObject"),
lastVersionDate = registry_utils.lastVersionDate,
formatUserId = registry_utils.formatUserId,
ownerLink = registry_utils.ownerLink;
@@ -449,16 +448,6 @@ describe("routes", function () {
expect(registry["snippets-extension"].versions[0].downloads).toBe(6);
expect(registry["snippets-extension"].totalDownloads).toBe(6);
});
-
- it("should remove the recentDownloads object from the registry", function () {
- var registry = JSON.parse(fs.readFileSync(path.join(path.dirname(module.filename), "testRegistry", "registryWithRecentDownloadData.json")));
-
- var modifiedRegistry = _removeRecentDownloadObject(registry);
-
- expect(Object.keys(modifiedRegistry).length).toBe(1);
- expect(modifiedRegistry["snippets-extension"]).not.toBeNull();
- expect(modifiedRegistry.recentDownloads).toBeUndefined();
- });
});
});
View
123 spec/testRegistry/registryWithRecentDownloadData.json
@@ -1,123 +0,0 @@
-{
- "snippets-extension": {
- "metadata": {
- "name": "snippets-extension",
- "title": "Brackets Snippets",
- "homepage": "https://github.com/jrowny/brackets-snippets",
- "author": {
- "name": "Jonathan Rowny"
- },
- "version": "1.0.0",
- "engines": {
- "brackets": ">=0.24"
- },
- "description": "A simple brackets snippets extension."
- },
- "owner": "irichter",
- "versions": [
- {
- "version": "0.3.0",
- "published": "2014-01-10T17:27:25.996Z",
- "brackets": ">=0.24"
- }
- ]
- },
- "recentDownloads": {
- "startDate": "2014-01-08T08:46:14.663Z",
- "endDate": "2014-01-15T08:46:15.579Z",
- "mostDownloadedExtensions": [
- {
- "extensionName": "basic-valid-extension",
- "totalDownloads": 43
- },
- {
- "extensionName": "stylussupport",
- "totalDownloads": 36
- },
- {
- "extensionName": "snippets-extension",
- "totalDownloads": 29
- },
- {
- "extensionName": "everyscrub",
- "totalDownloads": 23
- },
- {
- "extensionName": "select-parent",
- "totalDownloads": 21
- },
- {
- "extensionName": "keywords-extension",
- "totalDownloads": 16
- },
- {
- "extensionName": "com.adrocknaphobia.inlineimage",
- "totalDownloads": 15
- },
- {
- "extensionName": "long-desc-extension",
- "totalDownloads": 14
- },
- {
- "extensionName": "test-quickly",
- "totalDownloads": 8
- },
- {
- "extensionName": "malicious-script-extension",
- "totalDownloads": 8
- },
- {
- "extensionName": "couzteau.edgecode.spellcheck",
- "totalDownloads": 6
- },
- {
- "extensionName": "jrowny.brackets.snippets",
- "totalDownloads": 4
- },
- {
- "extensionName": "incompatible-newer",
- "totalDownloads": 4
- },
- {
- "extensionName": "incompatible-version",
- "totalDownloads": 3
- },
- {
- "extensionName": "testquickly",
- "totalDownloads": 2
- },
- {
- "extensionName": "pflynn.brackets.commands.guide",
- "totalDownloads": 2
- },
- {
- "extensionName": "extensions/basic-valid-extension",
- "totalDownloads": 1
- },
- {
- "extensionName": "bogus-top-dir",
- "totalDownloads": 1
- },
- {
- "extensionName": "one-level-extension",
- "totalDownloads": 1
- },
- {
- "extensionName": "long-desc-extension3",
- "totalDownloads": 1
- },
- {
- "extensionName": "hoverpreview",
- "totalDownloads": 1
- },
- {
- "extensionName": "long-desc-extension2",
- "totalDownloads": 1
- },
- {
- "extensionName": "gruehle-hover-preview",
- "totalDownloads": 1
- }
- ]
- }
-}
Please sign in to comment.
Something went wrong with that request. Please try again.