Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions extensions/ql-vscode/src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -711,3 +711,10 @@ const QUERIES_PANEL = new Setting("queriesPanel", ROOT_SETTING);
export function showQueriesPanel(): boolean {
return !!QUERIES_PANEL.getValue<boolean>();
}

const DATA_EXTENSIONS = new Setting("dataExtensions", ROOT_SETTING);
const LLM_GENERATION = new Setting("llmGeneration", DATA_EXTENSIONS);

export function showLlmGeneration(): boolean {
return !!LLM_GENERATION.getValue<boolean>();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import { Credentials } from "../common/authentication";
import { OctokitResponse } from "@octokit/types";

export enum ClassificationType {
Unknown = "CLASSIFICATION_TYPE_UNKNOWN",
Neutral = "CLASSIFICATION_TYPE_NEUTRAL",
Source = "CLASSIFICATION_TYPE_SOURCE",
Sink = "CLASSIFICATION_TYPE_SINK",
Summary = "CLASSIFICATION_TYPE_SUMMARY",
}

export interface Classification {
type: ClassificationType;
kind: string;
explanation: string;
}

export interface Method {
package: string;
type: string;
name: string;
signature: string;
usages: string[];
classification?: Classification;
input?: string;
output?: string;
}

export interface ModelRequest {
language: string;
candidates: Method[];
samples: Method[];
}

export interface ModelResponse {
language: string;
predicted: Method[];
}

export async function autoModel(
credentials: Credentials,
request: ModelRequest,
): Promise<ModelResponse> {
const octokit = await credentials.getOctokit();

const response: OctokitResponse<ModelResponse> = await octokit.request(
"POST /repos/github/codeql/code-scanning/codeql/auto-model",
{
data: request,
},
);

return response.data;
}
218 changes: 218 additions & 0 deletions extensions/ql-vscode/src/data-extensions-editor/auto-model.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
import { ExternalApiUsage } from "./external-api-usage";
import { ModeledMethod, ModeledMethodType } from "./modeled-method";
import {
Classification,
ClassificationType,
Method,
ModelRequest,
} from "./auto-model-api";

export function createAutoModelRequest(
language: string,
externalApiUsages: ExternalApiUsage[],
modeledMethods: Record<string, ModeledMethod>,
): ModelRequest {
const request: ModelRequest = {
language,
samples: [],
candidates: [],
};

// Sort by number of usages so we always send the most used methods first
externalApiUsages = [...externalApiUsages];
externalApiUsages.sort((a, b) => b.usages.length - a.usages.length);

for (const externalApiUsage of externalApiUsages) {
const modeledMethod: ModeledMethod = modeledMethods[
externalApiUsage.signature
] ?? {
type: "none",
};

const numberOfArguments =
externalApiUsage.methodParameters === "()"
? 0
: externalApiUsage.methodParameters.split(",").length;

for (
let argumentIndex = 0;
argumentIndex < numberOfArguments;
argumentIndex++
) {
const method: Method = {
package: externalApiUsage.packageName,
type: externalApiUsage.typeName,
name: externalApiUsage.methodName,
signature: externalApiUsage.methodParameters,
classification:
modeledMethod.type === "none"
? undefined
: toMethodClassification(modeledMethod),
usages: externalApiUsage.usages
.slice(0, 10)
.map((usage) => usage.label),
input: `Argument[${argumentIndex}]`,
};

if (modeledMethod.type === "none") {
request.candidates.push(method);
} else {
request.samples.push(method);
}
}
}

Comment thread
charisk marked this conversation as resolved.
request.candidates = request.candidates.slice(0, 20);
request.samples = request.samples.slice(0, 100);

return request;
}

/**
* For now, we have a simplified model that only models methods as sinks. It does not model methods as neutral,
* so we aren't actually able to correctly determine that a method is neutral; it could still be a source or summary.
* However, to keep this method simple and give output to the user, we will model any method for which none of its
* arguments are modeled as sinks as neutral.
*
* If there are multiple arguments which are modeled as sinks, we will only model the first one.
*/
export function parsePredictedClassifications(
Comment thread
koesie10 marked this conversation as resolved.
predicted: Method[],
): Record<string, ModeledMethod> {
const predictedBySignature: Record<string, Method[]> = {};
for (const method of predicted) {
if (!method.classification) {
continue;
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we log or warn if some samples come back unclassified?

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think so, I think we should log that on the Turbomodel side, the user can't do anything with this information.

}

const signature = toFullMethodSignature(method);

if (!(signature in predictedBySignature)) {
predictedBySignature[signature] = [];
}

predictedBySignature[signature].push(method);
}

const modeledMethods: Record<string, ModeledMethod> = {};

for (const signature in predictedBySignature) {
const predictedMethods = predictedBySignature[signature];

const sinks = predictedMethods.filter(
(method) => method.classification?.type === ClassificationType.Sink,
);
if (sinks.length === 0) {
// For now, model any method for which none of its arguments are modeled as sinks as neutral
modeledMethods[signature] = {
type: "neutral",
kind: "",
input: "",
output: "",
};
continue;
}

// Order the sinks by the input alphabetically. This will ensure that the first argument is always
// first in the list of sinks, the second argument is always second, etc.
// If we get back "Argument[1]" and "Argument[3]", "Argument[1]" should always be first
sinks.sort((a, b) => compareInputOutput(a.input ?? "", b.input ?? ""));

const sink = sinks[0];

modeledMethods[signature] = {
type: "sink",
kind: sink.classification?.kind ?? "",
input: sink.input ?? "",
output: sink.output ?? "",
};
}

return modeledMethods;
}

function toMethodClassificationType(
type: ModeledMethodType,
): ClassificationType {
switch (type) {
case "source":
return ClassificationType.Source;
case "sink":
return ClassificationType.Sink;
case "summary":
return ClassificationType.Summary;
case "neutral":
return ClassificationType.Neutral;
default:
return ClassificationType.Unknown;
}
}

function toMethodClassification(modeledMethod: ModeledMethod): Classification {
return {
type: toMethodClassificationType(modeledMethod.type),
kind: modeledMethod.kind,
explanation: "",
};
}

function toFullMethodSignature(method: Method): string {
return `${method.package}.${method.type}#${method.name}${method.signature}`;
}

const argumentRegex = /^Argument\[(\d+)]$/;

// Argument[this] is before ReturnValue
const nonNumericArgumentOrder = ["Argument[this]", "ReturnValue"];

/**
* Compare two inputs or outputs matching `Argument[<number>]`, `Argument[this]`, or `ReturnValue`.
* If they are the same, return 0. If a is less than b, returns a negative number.
* If a is greater than b, returns a positive number.
*/
export function compareInputOutput(a: string, b: string): number {
if (a === b) {
return 0;
}

const aMatch = a.match(argumentRegex);
const bMatch = b.match(argumentRegex);

// Numeric arguments are always first
if (aMatch && !bMatch) {
return -1;
}
if (!aMatch && bMatch) {
return 1;
}

// Neither is an argument
if (!aMatch && !bMatch) {
const aIndex = nonNumericArgumentOrder.indexOf(a);
const bIndex = nonNumericArgumentOrder.indexOf(b);

// If either one is unknown, it is sorted last
if (aIndex === -1 && bIndex === -1) {
return a.localeCompare(b);
}
if (aIndex === -1) {
return 1;
}
if (bIndex === -1) {
return -1;
}

return aIndex - bIndex;
}

// This case shouldn't happen, but makes TypeScript happy
if (!aMatch || !bMatch) {
return 0;
}

// Both are arguments
const aIndex = parseInt(aMatch[1]);
const bIndex = parseInt(bMatch[1]);

return aIndex - bIndex;
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,12 @@ import { createDataExtensionYaml, loadDataExtensionYaml } from "./yaml";
import { ExternalApiUsage } from "./external-api-usage";
import { ModeledMethod } from "./modeled-method";
import { ExtensionPackModelFile } from "./shared/extension-pack";
import { autoModel } from "./auto-model-api";
import {
createAutoModelRequest,
parsePredictedClassifications,
} from "./auto-model";
import { showLlmGeneration } from "../config";

function getQlSubmoduleFolder(): WorkspaceFolder | undefined {
const workspaceFolder = workspace.workspaceFolders?.find(
Expand Down Expand Up @@ -127,6 +133,13 @@ export class DataExtensionsEditorView extends AbstractWebview<
case "generateExternalApi":
await this.generateModeledMethods();

break;
case "generateExternalApiFromLlm":
await this.generateModeledMethodsFromLlm(
msg.externalApiUsages,
msg.modeledMethods,
);

break;
default:
assertNever(msg);
Expand All @@ -149,6 +162,7 @@ export class DataExtensionsEditorView extends AbstractWebview<
viewState: {
extensionPackModelFile: this.modelFile,
modelFileExists: await pathExists(this.modelFile.filename),
showLlmButton: showLlmGeneration(),
},
});
}
Expand Down Expand Up @@ -367,6 +381,29 @@ export class DataExtensionsEditorView extends AbstractWebview<
await this.clearProgress();
}

private async generateModeledMethodsFromLlm(
externalApiUsages: ExternalApiUsage[],
modeledMethods: Record<string, ModeledMethod>,
): Promise<void> {
const request = createAutoModelRequest(
this.databaseItem.language,
externalApiUsages,
modeledMethods,
);

const response = await autoModel(this.app.credentials, request);
Comment thread
charisk marked this conversation as resolved.

const predictedModeledMethods = parsePredictedClassifications(
response.predicted,
);

await this.postMessage({
t: "addModeledMethods",
modeledMethods: predictedModeledMethods,
overrideNone: true,
});
}

/*
* Progress in this class is a bit weird. Most of the progress is based on running the query.
* Query progress is always between 0 and 1000. However, we still have some steps that need
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ import { ExtensionPackModelFile } from "./extension-pack";
export interface DataExtensionEditorViewState {
extensionPackModelFile: ExtensionPackModelFile;
modelFileExists: boolean;
showLlmButton: boolean;
}
9 changes: 8 additions & 1 deletion extensions/ql-vscode/src/pure/interface-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,12 @@ export interface GenerateExternalApiMessage {
t: "generateExternalApi";
}

export interface GenerateExternalApiFromLlmMessage {
t: "generateExternalApiFromLlm";
externalApiUsages: ExternalApiUsage[];
modeledMethods: Record<string, ModeledMethod>;
}

export type ToDataExtensionsEditorMessage =
| SetExtensionPackStateMessage
| SetExternalApiUsagesMessage
Expand All @@ -556,4 +562,5 @@ export type FromDataExtensionsEditorMessage =
| OpenExtensionPackMessage
| JumpToUsageMessage
| SaveModeledMethods
| GenerateExternalApiMessage;
| GenerateExternalApiMessage
| GenerateExternalApiFromLlmMessage;
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ DataExtensionsEditor.args = {
"/home/user/vscode-codeql-starter/codeql-custom-queries-java/sql2o/models/sql2o.yml",
},
modelFileExists: true,
showLlmButton: true,
},
initialExternalApiUsages: [
{
Expand Down
Loading