-
Notifications
You must be signed in to change notification settings - Fork 48
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
@W-9729358@ Make CPD engine production ready #522
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
module.exports = { | ||
"CpdViolationMessage": "%s: %s of %s duplication segments detected. %s line(s), %s tokens.", | ||
"CpdViolationMessage": "%s: %s of %s duplication segments detected. %s line(s), %s tokens." | ||
}; |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7,38 +7,87 @@ import {Config} from '../util/Config'; | |
import {ENGINE, LANGUAGE, Severity} from '../../Constants'; | ||
import * as engineUtils from '../util/CommonEngineUtils'; | ||
import CpdWrapper from './CpdWrapper'; | ||
import {FILE_EXTS_TO_LANGUAGE} from '../../Constants'; | ||
import {uxEvents} from '../ScannerEvents'; | ||
import crypto = require('crypto'); | ||
import * as EnvVariable from '../util/EnvironmentVariable'; | ||
|
||
|
||
Messages.importMessagesDirectory(__dirname); | ||
const messages = Messages.loadMessages('@salesforce/sfdx-scanner', 'CpdEngine'); | ||
const eventMessages = Messages.loadMessages("@salesforce/sfdx-scanner", "EventKeyTemplates"); | ||
|
||
// CPD supported languages: [apex, java, vf, xml] | ||
const FileExtToLanguage: Map<string, LANGUAGE> = new Map([ | ||
// apex | ||
['cls', LANGUAGE.APEX], | ||
['trigger', LANGUAGE.APEX], | ||
// java | ||
['java', LANGUAGE.JAVA], | ||
// vf | ||
['component', LANGUAGE.VISUALFORCE], | ||
['page', LANGUAGE.VISUALFORCE], | ||
// xml | ||
['xml', LANGUAGE.XML], | ||
]); | ||
|
||
|
||
// exported for visibility in tests | ||
export const CpdRuleName = 'copy-paste-detected'; | ||
export const CpdRuleDescription = 'Identify duplicate code blocks.'; | ||
export const CpdRuleCategory = 'Copy/Paste Detected'; | ||
export const CpdInfoUrl = 'https://pmd.github.io/latest/pmd_userdocs_cpd.html#refactoring-duplicates'; | ||
export const CpdViolationSeverity = Severity.LOW; | ||
export const CpdLanguagesSupported: LANGUAGE[] = [...new Set (FileExtToLanguage.values())]; | ||
|
||
export class CpdEngine extends AbstractRuleEngine { | ||
|
||
public readonly ENGINE_ENUM: ENGINE = ENGINE.CPD; | ||
public readonly ENGINE_NAME: string = ENGINE.CPD.valueOf(); | ||
private readonly ENGINE_ENUM: ENGINE = ENGINE.CPD; | ||
private readonly ENGINE_NAME: string = ENGINE.CPD.valueOf(); | ||
|
||
|
||
private minimumTokens: number; | ||
private logger: Logger; | ||
private config: Config; | ||
private initialized: boolean; | ||
private cpdCatalog: Catalog; | ||
private validCPDLanguages: LANGUAGE[]; | ||
|
||
|
||
private initialized = false; | ||
jfeingold35 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
public getName(): string { | ||
return this.ENGINE_NAME; | ||
} | ||
|
||
public async init(): Promise<void> { | ||
if (this.initialized) { | ||
return; | ||
} | ||
this.logger = await Logger.child(this.getName()); | ||
this.config = await Controller.getConfig(); | ||
this.minimumTokens = EnvVariable.getEnvVariableAsNumber(this.ENGINE_ENUM, EnvVariable.CONFIG_NAME.MINIMUM_TOKENS) || ( await this.config.getMinimumTokens(this.ENGINE_ENUM) ); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Environment variable would override config value for MinimumTokens. |
||
this.initialized = true; | ||
} | ||
|
||
public getTargetPatterns(): Promise<TargetPattern[]> { | ||
return this.config.getTargetPatterns(this.ENGINE_ENUM); | ||
} | ||
|
||
public getCatalog(): Promise<Catalog>{ | ||
return Promise.resolve(this.cpdCatalog); | ||
const cpdCatalog = { | ||
rules: [{ | ||
engine: this.ENGINE_NAME, | ||
sourcepackage: this.ENGINE_NAME, | ||
name: CpdRuleName, | ||
description: CpdRuleDescription, | ||
categories: [CpdRuleCategory], | ||
rulesets: [], | ||
languages: CpdLanguagesSupported, | ||
defaultEnabled: true | ||
}], | ||
categories: [{ | ||
engine: this.ENGINE_NAME, | ||
name: CpdRuleCategory, | ||
paths: [] | ||
}], | ||
rulesets: [] | ||
}; | ||
return Promise.resolve(cpdCatalog); | ||
} | ||
|
||
/* eslint-disable-next-line no-unused-vars, @typescript-eslint/no-unused-vars */ | ||
|
@@ -72,34 +121,47 @@ export class CpdEngine extends AbstractRuleEngine { | |
return results; | ||
} | ||
|
||
private sortPaths(targets: RuleTarget[]): Map<LANGUAGE, string[]> { | ||
const languageToPaths = new Map(); | ||
|
||
private sortPaths(targets: RuleTarget[]): Map<LANGUAGE, string[]> { | ||
const languageToPaths = new Map<LANGUAGE, string[]>(); | ||
const unmatchedPaths: string[] = []; | ||
for (const target of targets) { | ||
for (const path of target.paths) { | ||
const i = path.lastIndexOf("."); | ||
if (i === -1) { | ||
uxEvents.emit('warning-always', `Target: '${path}' was not processed by CPD, no file extension found.`); | ||
continue; | ||
if (!this.matchPathToLanguage(path, languageToPaths)) { | ||
// If language could not be identified, note down the path | ||
unmatchedPaths.push(path); | ||
} | ||
const ext = path.substr(i).toLowerCase(); | ||
if (FILE_EXTS_TO_LANGUAGE.has(ext)) { | ||
if (this.validCPDLanguages.includes(FILE_EXTS_TO_LANGUAGE.get(ext))) { | ||
const language = FILE_EXTS_TO_LANGUAGE.get(ext); | ||
if (languageToPaths.has(language)) { | ||
languageToPaths.get(language).push(path); | ||
} else { | ||
languageToPaths.set(language, [path]); | ||
} | ||
} else { | ||
uxEvents.emit('warning-always', `Target: '${path}' was not processed by CPD, language '${FILE_EXTS_TO_LANGUAGE.get(ext)}' not supported.`); | ||
} | ||
} | ||
} | ||
|
||
// Let user know about file paths that could not be matched | ||
if (unmatchedPaths.length > 0) { | ||
uxEvents.emit('info-verbose', eventMessages.getMessage('info.unmatchedPathExtensionCpd', [unmatchedPaths.join(",")])); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Simplified logic in |
||
} | ||
|
||
return languageToPaths; | ||
} | ||
|
||
/** | ||
* Identify the language of a file using the file extension | ||
* @param path to be examined | ||
* @param languageToPaths map with entries of language to paths matched so far | ||
* @returns true if the language was identifed and false if not | ||
*/ | ||
private matchPathToLanguage(path: string, languageToPaths: Map<LANGUAGE, string[]>): boolean { | ||
const ext = path.slice(path.lastIndexOf(".") + 1); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this lowercase There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Missed this. Will fix it. |
||
if (ext) { | ||
const language = FileExtToLanguage.get(ext.toLowerCase()); | ||
if (language && CpdLanguagesSupported.includes(language)) { | ||
if (languageToPaths.has(language)) { | ||
languageToPaths.get(language).push(path); | ||
} else { | ||
uxEvents.emit('warning-always', `Target: '${path}' was not processed by CPD, file extension '${ext}' not supported.`); | ||
languageToPaths.set(language, [path]); | ||
} | ||
return true; | ||
} | ||
} | ||
return languageToPaths; | ||
return false; | ||
} | ||
|
||
private runLanguage(language: string, targetPaths: string[]): Promise<string>{ | ||
|
@@ -116,7 +178,7 @@ export class CpdEngine extends AbstractRuleEngine { | |
|
||
|
||
protected processStdOut(stdout: string): RuleResult[] { | ||
let violations: RuleResult[] = []; | ||
let ruleResults: RuleResult[] = []; | ||
|
||
this.logger.trace(`Output received from CPD: ${stdout}`); | ||
|
||
|
@@ -129,15 +191,15 @@ export class CpdEngine extends AbstractRuleEngine { | |
|
||
const duplications = cpdJson.elements[0].elements; | ||
if (duplications) { | ||
violations = this.jsonToRuleResults(duplications); | ||
ruleResults = this.jsonToRuleResults(duplications); | ||
} | ||
} | ||
|
||
if (violations.length > 0) { | ||
if (ruleResults.length > 0) { | ||
this.logger.trace('Found rule violations.'); | ||
} | ||
|
||
return violations; | ||
return ruleResults; | ||
} | ||
|
||
// eslint-disable-next-line @typescript-eslint/no-explicit-any | ||
|
@@ -160,11 +222,11 @@ export class CpdEngine extends AbstractRuleEngine { | |
column: occ.attributes.column, | ||
endLine: occ.attributes.endline, | ||
endColumn: occ.attributes.endcolumn, | ||
ruleName: this.cpdCatalog.rules[0].name, | ||
severity: Severity.LOW, | ||
ruleName: CpdRuleName, | ||
severity: CpdViolationSeverity, | ||
message: messages.getMessage("CpdViolationMessage", [codeFragmentID, occCount, occurences.length, duplication.attributes.lines, duplication.attributes.tokens]), | ||
category: this.cpdCatalog.categories[0].name, | ||
url: 'https://pmd.github.io/latest/pmd_userdocs_cpd.html#refactoring-duplicates', | ||
category: CpdRuleCategory, | ||
url: CpdInfoUrl | ||
}; | ||
occCount++; | ||
|
||
|
@@ -186,52 +248,6 @@ export class CpdEngine extends AbstractRuleEngine { | |
return ruleResults; | ||
} | ||
|
||
|
||
public async init(): Promise<void> { | ||
if (this.initialized) { | ||
return; | ||
} | ||
this.logger = await Logger.child(this.getName()); | ||
this.config = await Controller.getConfig(); | ||
this.initialized = true; | ||
this.logger = await Logger.child(this.getName()) | ||
|
||
this.cpdCatalog = { | ||
rules: [{ | ||
engine: this.ENGINE_ENUM.valueOf(), | ||
sourcepackage: this.ENGINE_ENUM.valueOf(), | ||
name: 'copy-paste-detected', | ||
description: 'Identify duplicate code blocks.', | ||
categories: ['Copy/Paste Detected'], | ||
rulesets: [], | ||
languages: await this.getLanguages(), | ||
defaultEnabled: true | ||
}], | ||
categories: [{ | ||
engine: this.ENGINE_ENUM.valueOf(), | ||
name: 'Copy/Paste Detected', | ||
paths: [] | ||
}], | ||
rulesets: [] | ||
}; | ||
|
||
this.minimumTokens = await this.config.getMinimumTokens(this.ENGINE_ENUM); | ||
this.initialized = true; | ||
this.validCPDLanguages = [LANGUAGE.APEX, LANGUAGE.JAVA, LANGUAGE.ECMASCRIPT, LANGUAGE.VISUALFORCE, LANGUAGE.XML]; | ||
} | ||
|
||
private async getLanguages(): Promise<string[]> { | ||
const languages: Set<string> = new Set(); | ||
for (const pattern of await this.config.getTargetPatterns(this.ENGINE_ENUM)){ | ||
const ext = pattern.substr(pattern.lastIndexOf(".")).toLowerCase(); | ||
if (FILE_EXTS_TO_LANGUAGE.has(ext)) { | ||
languages.add(FILE_EXTS_TO_LANGUAGE.get(ext)); | ||
} | ||
} | ||
return Array.from(languages); | ||
} | ||
|
||
|
||
public matchPath(path: string): boolean { | ||
this.logger.trace(`Engine CPD does not support custom rules: ${path}`); | ||
return false; | ||
|
@@ -251,6 +267,4 @@ export class CpdEngine extends AbstractRuleEngine { | |
return severity; | ||
} | ||
|
||
|
||
|
||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
/** | ||
* Builds expected env variables in this format: | ||
* SFDX_SCANNER.<ENGINE>.VARIABLE_NAME | ||
*/ | ||
|
||
import { ENGINE } from "../../Constants"; | ||
|
||
const PREFIX = 'SFDX_SCANNER'; | ||
const SEPARATOR = '_'; | ||
|
||
export enum CONFIG_NAME { | ||
MINIMUM_TOKENS = 'Minimum_Tokens' | ||
} | ||
Comment on lines
+11
to
+13
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. New enum to track config values that can be fetched through environment variables. I've also attempted to standardize the access in this new module. |
||
|
||
function getEnvVariableName(engine: ENGINE, configName: CONFIG_NAME): string { | ||
return `${PREFIX}${SEPARATOR}${engine.toUpperCase()}${SEPARATOR}${configName.toUpperCase()}`; | ||
} | ||
|
||
export function getEnvVariableAsString(engine: ENGINE, configName: CONFIG_NAME): string { | ||
const envVariableName = getEnvVariableName(engine, configName); | ||
return process.env[envVariableName]; | ||
} | ||
|
||
export function getEnvVariableAsNumber(engine: ENGINE, configName: CONFIG_NAME): number { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this will currently return NaN when the environment variable isn't set or isn't a number. Do we want it to return NaN if it's set, but not a number, but null if it isn't set? Whatever we choose, can you add documentation describing the contract when the value isn't set or the value can't be parsed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good catch. We don't have a way to control what value the user may set. I see a suggestion on online forums to clean up the string before parsing:
where There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jbartolotta-sfdc Let me know what you think about the fix. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @rmohan20 - this change looks good. |
||
const envVariable = getEnvVariableAsString(engine, configName); | ||
if (envVariable) { | ||
// Clean up variable if it has any non-digit values | ||
return parseInt(envVariable.replace(/\D/g, ""), 10); | ||
} | ||
return undefined; | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As discussed, languages supported will be deduced from the file extensions to languages mapping.