Skip to content

Commit

Permalink
Adding base classes for re-factored module analysis.
Browse files Browse the repository at this point in the history
Started to fill out interfaces and abstract classes for the new
AMD-compatible module analysis package.
  • Loading branch information
James Thomas committed Apr 20, 2012
1 parent 0f3588b commit 9135e16
Show file tree
Hide file tree
Showing 10 changed files with 235 additions and 0 deletions.
6 changes: 6 additions & 0 deletions src/main/java/org/dtk/analysis/ModuleFormat.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
package org.dtk.analysis;

public enum ModuleFormat {
AMD,
NON_AMD
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
package org.dtk.analysis.exceptions;

/**
* Fatal error occurred during module analysis of source files.
* Module analysis was unable to complete successfully.
*
* @author James Thomas
*/

public class FatalAnalysisError extends Exception {

}
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
package org.dtk.analysis.exceptions;

/**
* Source contents are not available for the module identifier given.
* This module source was not retrieved during analysis, despite being
* identified.
*
* @author James Thomas
*/

public class ModuleSourceNotAvailable extends Exception {

}
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
package org.dtk.analysis.exceptions;

/**
* Module identifier provided wasn't encountered during module analysis.
*
* @author James Thomas
*/

public class UnknownModuleIdentifier extends Exception {

}
7 changes: 7 additions & 0 deletions src/main/java/org/dtk/analysis/page/ParsePhase.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
package org.dtk.analysis.page;

public enum ParsePhase {
PRE_DOJO,
POST_DOJO,
ERROR
}
151 changes: 151 additions & 0 deletions src/main/java/org/dtk/analysis/page/WebPage.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
package org.dtk.analysis.page;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.http.ParseException;
import org.dtk.analysis.ModuleAnalysis;
import org.dtk.analysis.ModuleFormat;
import org.dtk.analysis.exceptions.FatalAnalysisError;
import org.dtk.analysis.script.AMDScriptParser;
import org.dtk.analysis.script.NonAMDScriptParser;
import org.dtk.analysis.script.ScriptDependencyParser;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
* Base class for analysing web pages for modules dependencies. Each instance
* is initialised with a Document representing the parsed HTML page. During the
* parsing phase, the class searches through each script tag present within the
* page looking for module dependencies and module paths configuration.
*
* At the start, each script tag is checked to ascertain whether it contains the
* Dojo loader. Once this has been detected, each subsequent script has its source
* retrieve and scanned for module dependencies contained within.
*
* All module dependencies discovered are maintained into an internal Map, arranging
* by package.
*
* @author james
*
*/

public abstract class WebPage implements ModuleAnalysis {

protected Document document;

protected Map<String, List<String>> discoveredModules = new HashMap<String, List <String>>();

protected ParsePhase parsePhase = ParsePhase.PRE_DOJO;
protected ModuleFormat moduleFormat = ModuleFormat.NON_AMD;

public WebPage(Document document) {
this.document = document;
parse();
}

@Override
public Map<String, List<String>> getModules() throws FatalAnalysisError {
return discoveredModules;
}

protected void parse() {
Elements scriptTags = findAllScriptTags();

for (Element scriptTag: scriptTags) {
// Before we've found Dojo, just look for djConfig
// variable and actual Dojo script.
if (!hasFoundDojoScript()) {
parsePreDojoScript(scriptTag);
} else {
parsePostDojoScript(scriptTag);
}
}
}

protected void parsePreDojoScript(Element script) {
if (isDojoScript(script)) {
parsePhase = ParsePhase.POST_DOJO;
}
}

abstract protected boolean isDojoScript(Element script);

/**
* Extract complete script contents and search through
* for any dojo.require calls.
*
* @param script - <script> tag element
* @throws ParseException - Error parsing this script
* @throws IOException - Error retrieving this element
*/
protected void parsePostDojoScript(Element script) {
String scriptContents = retrieveScriptContents(script);

// If there was a problem retrieving this script source, don't try
// to parse result.
if (scriptContents != null) {
List<String> moduleDependencies = analyseModuleDependencies(scriptContents);

for(String moduleIdentifier: moduleDependencies) {
String absoluteModuleIdentifier = getAbsoluteModuleIdentifer(moduleIdentifier, script),
packageName = getPackageIdentifier(absoluteModuleIdentifier);

updateDiscoveredModules(packageName, absoluteModuleIdentifier);
}
}
}

abstract protected String getAbsoluteModuleIdentifer(String moduleIdentifer, Element script);

abstract protected String getPackageIdentifier(String moduleIdentifer);

abstract protected String retrieveScriptContents(Element script);

protected List<String> analyseModuleDependencies(String scriptContents) {
ScriptDependencyParser scriptParser = getScriptParser(scriptContents);
return scriptParser.getModuleDependencies();
}

protected void updateDiscoveredModules(String packageName, String moduleIdentifier) {
List<String> modules = getPackageModules(packageName);

if (!modules.contains(moduleIdentifier)) {
modules.add(moduleIdentifier);
}
}



protected List<String> getPackageModules(String packageName) {
List<String> modules = discoveredModules.get(packageName);

if (modules == null) {
modules = new ArrayList<String>();
discoveredModules.put(packageName, modules);
}

return modules;
}

protected ScriptDependencyParser getScriptParser(String scriptContents) {
if (moduleFormat.equals(ModuleFormat.NON_AMD)) {
return new NonAMDScriptParser(scriptContents);
}

return new AMDScriptParser(scriptContents);
}

protected boolean hasFoundDojoScript() {
return parsePhase.equals(ParsePhase.POST_DOJO);
}

protected Elements findAllScriptTags () {
return this.document.getElementsByTag("script");
}
}
16 changes: 16 additions & 0 deletions src/main/java/org/dtk/analysis/script/AMDScriptParser.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package org.dtk.analysis.script;

import java.util.List;

public class AMDScriptParser extends BaseScriptParser implements ScriptDependencyParser {

public AMDScriptParser(String scriptSoure) {
super(scriptSoure);
}

@Override
public List<String> getModuleDependencies() {
return null;
}

}
9 changes: 9 additions & 0 deletions src/main/java/org/dtk/analysis/script/BaseScriptParser.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package org.dtk.analysis.script;

public abstract class BaseScriptParser {
protected String scriptSource;

public BaseScriptParser(String scriptSource) {
this.scriptSource = scriptSource;
}
}
17 changes: 17 additions & 0 deletions src/main/java/org/dtk/analysis/script/NonAMDScriptParser.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package org.dtk.analysis.script;

import java.util.List;

public class NonAMDScriptParser extends BaseScriptParser implements ScriptDependencyParser {

public NonAMDScriptParser(String scriptSoure) {
super(scriptSoure);
}

@Override
public List<String> getModuleDependencies() {
// TODO Auto-generated method stub
return null;
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package org.dtk.analysis.script;

import java.util.List;

public interface ScriptDependencyParser {

public List<String> getModuleDependencies();
}

0 comments on commit 9135e16

Please sign in to comment.