-
Notifications
You must be signed in to change notification settings - Fork 226
Download databases from GitHub #1229
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
45d805d
bc727d5
18bfd67
9c21b66
b5a0ee6
d7a1e42
ed7b285
1bf8cc6
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,6 +21,8 @@ import { | |
| } from './commandRunner'; | ||
| import { logger } from './logging'; | ||
| import { tmpDir } from './helpers'; | ||
| import { Credentials } from './authentication'; | ||
| import { REPO_REGEX } from './pure/helpers-pure'; | ||
|
|
||
| /** | ||
| * Prompts a user to fetch a database from a remote location. Database is assumed to be an archive file. | ||
|
|
@@ -46,6 +48,7 @@ export async function promptImportInternetDatabase( | |
|
|
||
| const item = await databaseArchiveFetcher( | ||
| databaseUrl, | ||
| {}, | ||
| databaseManager, | ||
| storagePath, | ||
| progress, | ||
|
|
@@ -61,6 +64,79 @@ export async function promptImportInternetDatabase( | |
|
|
||
| } | ||
|
|
||
| /** | ||
| * Prompts a user to fetch a database from GitHub. | ||
| * User enters a GitHub repository and then the user is asked which language | ||
| * to download (if there is more than one) | ||
| * | ||
| * @param databaseManager the DatabaseManager | ||
| * @param storagePath where to store the unzipped database. | ||
| */ | ||
| export async function promptImportGithubDatabase( | ||
| databaseManager: DatabaseManager, | ||
| storagePath: string, | ||
| credentials: Credentials, | ||
| progress: ProgressCallback, | ||
| token: CancellationToken, | ||
| cli?: CodeQLCliServer | ||
| ): Promise<DatabaseItem | undefined> { | ||
| progress({ | ||
| message: 'Choose repository', | ||
| step: 1, | ||
| maxStep: 2 | ||
| }); | ||
| const githubRepo = await window.showInputBox({ | ||
| title: 'Enter a GitHub repository in the format <owner>/<repo> (e.g. github/codeql)', | ||
| placeHolder: '<owner>/<repo>', | ||
| ignoreFocusOut: true, | ||
| }); | ||
| if (!githubRepo) { | ||
| return; | ||
| } | ||
|
|
||
| if (!REPO_REGEX.test(githubRepo)) { | ||
| throw new Error(`Invalid GitHub repository: ${githubRepo}`); | ||
| } | ||
|
|
||
| const databaseUrl = await convertGithubNwoToDatabaseUrl(githubRepo, credentials, progress); | ||
| if (!databaseUrl) { | ||
| return; | ||
| } | ||
|
|
||
| const octokit = await credentials.getOctokit(); | ||
| /** | ||
| * The 'token' property of the token object returned by `octokit.auth()`. | ||
| * The object is undocumented, but looks something like this: | ||
| * { | ||
| * token: 'xxxx', | ||
| * tokenType: 'oauth', | ||
| * type: 'token', | ||
| * } | ||
| * We only need the actual token string. | ||
| */ | ||
| const octokitToken = (await octokit.auth() as { token: string })?.token; | ||
| if (!octokitToken) { | ||
| // Just print a generic error message for now. Ideally we could show more debugging info, like the | ||
| // octokit object, but that would expose a user token. | ||
| throw new Error('Unable to get GitHub token.'); | ||
| } | ||
| const item = await databaseArchiveFetcher( | ||
| databaseUrl, | ||
| { 'Accept': 'application/zip', 'Authorization': `Bearer ${octokitToken}` }, | ||
| databaseManager, | ||
| storagePath, | ||
| progress, | ||
| token, | ||
| cli | ||
| ); | ||
| if (item) { | ||
| await commands.executeCommand('codeQLDatabases.focus'); | ||
| void showAndLogInformationMessage('Database downloaded and imported successfully.'); | ||
| return item; | ||
| } | ||
| return; | ||
| } | ||
|
|
||
| /** | ||
| * Prompts a user to fetch a database from lgtm. | ||
| * User enters a project url and then the user is asked which language | ||
|
|
@@ -94,6 +170,7 @@ export async function promptImportLgtmDatabase( | |
| if (databaseUrl) { | ||
| const item = await databaseArchiveFetcher( | ||
| databaseUrl, | ||
| {}, | ||
| databaseManager, | ||
| storagePath, | ||
| progress, | ||
|
|
@@ -140,6 +217,7 @@ export async function importArchiveDatabase( | |
| try { | ||
| const item = await databaseArchiveFetcher( | ||
| databaseUrl, | ||
| {}, | ||
| databaseManager, | ||
| storagePath, | ||
| progress, | ||
|
|
@@ -166,13 +244,15 @@ export async function importArchiveDatabase( | |
| * or in the local filesystem. | ||
| * | ||
| * @param databaseUrl URL from which to grab the database | ||
| * @param requestHeaders Headers to send with the request | ||
| * @param databaseManager the DatabaseManager | ||
| * @param storagePath where to store the unzipped database. | ||
| * @param progress callback to send progress messages to | ||
| * @param token cancellation token | ||
| */ | ||
| async function databaseArchiveFetcher( | ||
| databaseUrl: string, | ||
| requestHeaders: { [key: string]: string }, | ||
|
shati-patel marked this conversation as resolved.
|
||
| databaseManager: DatabaseManager, | ||
| storagePath: string, | ||
| progress: ProgressCallback, | ||
|
|
@@ -193,7 +273,7 @@ async function databaseArchiveFetcher( | |
| if (isFile(databaseUrl)) { | ||
| await readAndUnzip(databaseUrl, unzipPath, cli, progress); | ||
| } else { | ||
| await fetchAndUnzip(databaseUrl, unzipPath, cli, progress); | ||
| await fetchAndUnzip(databaseUrl, requestHeaders, unzipPath, cli, progress); | ||
| } | ||
|
|
||
| progress({ | ||
|
|
@@ -292,6 +372,7 @@ async function readAndUnzip( | |
|
|
||
| async function fetchAndUnzip( | ||
| databaseUrl: string, | ||
| requestHeaders: { [key: string]: string }, | ||
| unzipPath: string, | ||
| cli?: CodeQLCliServer, | ||
| progress?: ProgressCallback | ||
|
|
@@ -310,7 +391,10 @@ async function fetchAndUnzip( | |
| step: 1, | ||
| }); | ||
|
|
||
| const response = await checkForFailingResponse(await fetch(databaseUrl), 'Error downloading database'); | ||
| const response = await checkForFailingResponse( | ||
| await fetch(databaseUrl, { headers: requestHeaders }), | ||
| 'Error downloading database' | ||
| ); | ||
| const archiveFileStream = fs.createWriteStream(archivePath); | ||
|
|
||
| const contentLength = response.headers.get('content-length'); | ||
|
|
@@ -381,6 +465,37 @@ export async function findDirWithFile( | |
| return; | ||
| } | ||
|
|
||
| export async function convertGithubNwoToDatabaseUrl( | ||
| githubRepo: string, | ||
| credentials: Credentials, | ||
| progress: ProgressCallback): Promise<string | undefined> { | ||
| try { | ||
| // TODO: In future, we could accept GitHub URLs in addition to NWOs. | ||
| // Similar to "looksLikeLgtmUrl". | ||
| if (!REPO_REGEX.test(githubRepo)) { | ||
| throw new Error('Invalid repository format. Must be in the format <owner>/<repo> (e.g. github/codeql)'); | ||
| } | ||
|
|
||
| const [owner, repo] = githubRepo.split('/'); | ||
|
|
||
| const octokit = await credentials.getOctokit(); | ||
| const response = await octokit.request('GET /repos/:owner/:repo/code-scanning/codeql/databases', { owner, repo }); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm pretty sure we're ok here, but can you make sure that we can handle misplaced cApS? Remember the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also, does this support anonymous downloading of public repos?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The API currently does not allow anonymous requests because it is only staff shipped. Even in the private beta it will still require logging in. Once we get to public beta it will likely start accepting anonymous requests for public repos. However for the extension I'd argue we still want to always make the user log in. It'll simplify our lives because we only have to handle one case. And I would expect the requirement to log in is not a blocked to users here.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the explanation. I think that's fine for now. It does seem like a small barrier for users who are not using any other github features. This is something we can discuss later when the API endpoint starts accepting anonymous requests.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Good point, I hadn't thought of that 💪🏽 From a bit of manual testing, it looks like the API is case-insensitive (e.g.
Not yet. The entire MRVA API is still staff-only, so we need to pass in credentials for now 🔒 |
||
|
|
||
| const languages = response.data.map((db: any) => db.language); | ||
|
|
||
| const language = await promptForLanguage(languages, progress); | ||
| if (!language) { | ||
| return; | ||
| } | ||
|
|
||
| return `https://api.github.com/repos/${owner}/${repo}/code-scanning/codeql/databases/${language}`; | ||
|
|
||
| } catch (e) { | ||
| void logger.log(`Error: ${e.message}`); | ||
| throw new Error(`Unable to get database for '${githubRepo}'`); | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * The URL pattern is https://lgtm.com/projects/{provider}/{org}/{name}/{irrelevant-subpages}. | ||
| * There are several possibilities for the provider: in addition to GitHub.com (g), | ||
|
|
@@ -506,7 +621,7 @@ async function promptForLanguage( | |
| maxStep: 2 | ||
| }); | ||
| if (!languages.length) { | ||
| return; | ||
| throw new Error('No databases found'); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is also a change to downloading LGTM databases. I can't remember if there was a reason why we didn't throw when there were no languages found for an LGTM project. Can you just make sure that nothing awkward happens when downloading LGTM dbs?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wasn't able to find an LGTM project that doesn't have any languages, so I suspect that's a sufficiently rare case that we've never needed to handle it previously 😅 |
||
| } | ||
| if (languages.length === 1) { | ||
| return languages[0]; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Minor: Move this up into the
if(item)case, so it's more obvious what's going on. Thereturnat the end of the function will handle all the fall-through cases where we didn't return early.