Skip to content

Commit

Permalink
Add support for extracting features from JavaScript files
Browse files Browse the repository at this point in the history
  • Loading branch information
pavol-bielik committed Jan 22, 2015
1 parent d36067c commit 874b20e
Show file tree
Hide file tree
Showing 8 changed files with 1,290 additions and 2 deletions.
111 changes: 111 additions & 0 deletions bin/js_features.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
//#! /usr/bin/env nodejs
// -*- js -*-

"use strict";

var UglifyJS = require("../tools/node");
var sys = require("util");
var yargs = require("yargs");
var fs = require("fs");

var ARGS = yargs
.usage("$0 input1.js \n")
.describe("print_ast", "Prints a dot file describing the internal abstract syntax")
.describe("json_formatting", "Prints the JSON nicelly formatted")
.describe("skip_minified", "Whether to skip processing minified files")
.describe("features", "Comma separated list of features: \n" +
"ASTREL - relations in AST, \n" +
"FNAMES - function names to internal calls")
.demand(1)
.default('features', 'ASTREL,FNAMES')
.boolean("print_ast")
.boolean("skip_minified")
.boolean("json_formatting")
.string("features")
.wrap(80)
.argv
;

normalize(ARGS);

if (ARGS.h || ARGS.help) {
sys.puts(yargs.help());
process.exit(0);
}

var files = ARGS._.slice();
if (files.length > 1) {
sys.error("WARNING: expected only single input file. Processing file '" + files[0] + "' while the rest is ignored.");
};

if (ARGS.features === true) {
sys.error("ERROR: empty set of features.");
process.exit(1);
};

var features = ARGS.features.split(",");
for (var i = 0; i < features.length; i++) {
if (features[i] != "FNAMES" && features[i] != "ASTREL") {
sys.error("WARNING: ignoring not suppored feature '" + features[i] + "'.");
};
};

for (var i = 0; i < files.length; i++) {
processFile(files[i], ARGS.print_ast, ARGS.features, ARGS.json_formatting, ARGS.skip_minified);
};

function stripInterpreter(code){
if (code.slice(0,2) != "#!"){
return code;
}

return code.slice(code.indexOf('\n') + 1);
}

function processFile(file, print_ast, features, json_formatting, skip_minified) {
var code;
try {
code = fs.readFileSync(file, "utf-8");
}
catch (ex) {
sys.error("ERROR: can't read file: " + file);
return;
}

//if it is a script, the UglifyJS parser will fail to parse it
code = stripInterpreter(code);

var output = UglifyJS.extractFeatures(code, file, print_ast, features, skip_minified);
if (output == null) {
return;
}

if (!json_formatting) {
output = removeWhitespace(output);
}

//validate JSON
try {
JSON.parse(output);
} catch (e) {
throw e;
}

if (removeWhitespace(output) != '{"query":[],"assign":[]}') {
console.log(output);
}

}

/* ------------------------ */

function normalize(o) {
for (var i in o) if (o.hasOwnProperty(i) && /-/.test(i)) {
o[i.replace(/-/g, "_")] = o[i];
delete o[i];
}
}

function removeWhitespace(input){
return input.replace(/\s/g,"");
}
61 changes: 61 additions & 0 deletions extract_features.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/python

import multiprocessing
import os
import sys
import shutil

def PrintUsage():
print """
Usage:
extract_features.py --filelist <file>
OR
extract_features.py --dir <directory>
"""
exit(1)

def GetJSFilesInDir(d):
for root, _, files in os.walk(d):
for f in files:
fname = os.path.join(root, f)
if fname.endswith('.js'):
yield fname


TMP_DIR = ""

def ExtractFeaturesForFile(f):
global TMP_DIR
os.system("nodejs bin/js_features.js --skip_minified '%s' >> %s/%d" % (f, TMP_DIR, os.getpid()))

def ExtractFeaturesForFileList(files):
global TMP_DIR
TMP_DIR = "/tmp/feature_extractor%d" % (os.getpid())
if os.path.exists(TMP_DIR):
shutil.rmtree(TMP_DIR)
os.makedirs(TMP_DIR)
try:
p = multiprocessing.Pool(multiprocessing.cpu_count())
p.map(ExtractFeaturesForFile, files)
output_files = os.listdir(TMP_DIR)
for f in output_files:
os.system("cat %s/%s" % (TMP_DIR, f))
finally:
shutil.rmtree(TMP_DIR)


if __name__ == '__main__':
if (len(sys.argv) <= 1):
PrintUsage()

# Process command line arguments
if (sys.argv[1] == "--filelist"):
files = open(sys.argv[2], 'r').read().split('\n')
elif (sys.argv[1] == "--dir"):
files = [f for f in GetJSFilesInDir(sys.argv[2])]
else:
PrintUsage()
# Remove files that say they are minified.
files = [f for f in files if not f.endswith('.min.js')]
ExtractFeaturesForFileList(files)

Loading

0 comments on commit 874b20e

Please sign in to comment.