Skip to content

Commit

Permalink
Lazy / stateless installation (#1675)
Browse files Browse the repository at this point in the history
* Throw error when calling install if core version is managed by a workflow_settings.yaml

* Make the compile method stateless

* Make installation happen in a temporary directory, remove package-lock usage

* Fix lint

* Make installation happen in the temporary directory, add test for weird files

* Fix lint

* Fix tests

* Fix tests
  • Loading branch information
Ekrekr committed Feb 20, 2024
1 parent 800e36d commit d898ca1
Show file tree
Hide file tree
Showing 17 changed files with 238 additions and 112 deletions.
4 changes: 4 additions & 0 deletions cli/api/BUILD
Expand Up @@ -22,19 +22,23 @@ ts_library(
"//sqlx:lexer",
"//cli/vm:vm",
"@npm//@google-cloud/bigquery",
"@npm//@types/fs-extra",
"@npm//@types/glob",
"@npm//@types/js-beautify",
"@npm//@types/js-yaml",
"@npm//@types/long",
"@npm//@types/node",
"@npm//@types/semver",
"@npm//@types/tmp",
"@npm//deepmerge",
"@npm//fs-extra",
"@npm//glob",
"@npm//js-beautify",
"@npm//js-yaml",
"@npm//promise-pool-executor",
"@npm//protobufjs",
"@npm//semver",
"@npm//tmp",
"@npm//sql-formatter",
],
)
62 changes: 55 additions & 7 deletions cli/api/commands/compile.ts
@@ -1,26 +1,74 @@
import * as fs from "fs-extra";
import * as path from "path";
import * as tmp from "tmp";
import { promisify } from "util";

import { ChildProcess, fork } from "child_process";
import { ChildProcess, exec, fork } from "child_process";
import { MISSING_CORE_VERSION_ERROR } from "df/cli/api/commands/install";
import { readDataformCoreVersionFromWorkflowSettings } from "df/cli/api/utils";
import { coerceAsError } from "df/common/errors/errors";
import { decode64 } from "df/common/protos";
import { setOrValidateTableEnumType } from "df/core/utils";
import { dataform } from "df/protos/ts";

export class CompilationTimeoutError extends Error {}

export async function compile(
compileConfig: dataform.ICompileConfig = {}
): Promise<dataform.CompiledGraph> {
// Resolve the path in case it hasn't been resolved already.
path.resolve(compileConfig.projectDir);
let compiledGraph = dataform.CompiledGraph.create();

const resolvedProjectPath = path.resolve(compileConfig.projectDir);
const packageJsonPath = path.join(resolvedProjectPath, "package.json");
const packageLockJsonPath = path.join(resolvedProjectPath, "package-lock.json");
const projectNodeModulesPath = path.join(resolvedProjectPath, "node_modules");

const temporaryProjectPath = tmp.dirSync().name;

const workflowSettingsDataformCoreVersion = readDataformCoreVersionFromWorkflowSettings(
resolvedProjectPath
);

if (!workflowSettingsDataformCoreVersion && !fs.existsSync(packageJsonPath)) {
throw new Error(MISSING_CORE_VERSION_ERROR);
}

// For stateless package installation, a temporary directory is used in order to avoid interfering
// with user's project directories.
if (workflowSettingsDataformCoreVersion) {
[projectNodeModulesPath, packageJsonPath, packageLockJsonPath].forEach(npmPath => {
if (fs.existsSync(npmPath)) {
throw new Error(`'${npmPath}' unexpected; remove it and try again`);
}
});

fs.copySync(resolvedProjectPath, temporaryProjectPath);

fs.writeFileSync(
path.join(temporaryProjectPath, "package.json"),
`{
"dependencies": {
"@dataform/core": "${workflowSettingsDataformCoreVersion}"
}
}`
);

await promisify(exec)("npm i --ignore-scripts", {
cwd: temporaryProjectPath
});

compileConfig.projectDir = temporaryProjectPath;
}

const result = await CompileChildProcess.forkProcess().compile(compileConfig);

const decodedResult = decode64(dataform.CoreExecutionResponse, result);
const compileResult = dataform.CompiledGraph.create(decodedResult.compile.compiledGraph);
compiledGraph = dataform.CompiledGraph.create(decodedResult.compile.compiledGraph);

if (workflowSettingsDataformCoreVersion) {
fs.rmdirSync(temporaryProjectPath, { recursive: true });
}

compileResult.tables.forEach(setOrValidateTableEnumType);
return compileResult;
return compiledGraph;
}

export class CompileChildProcess {
Expand Down
15 changes: 2 additions & 13 deletions cli/api/commands/init.ts
Expand Up @@ -3,7 +3,6 @@ import { dump as dumpYaml } from "js-yaml";
import * as path from "path";

import { CREDENTIALS_FILENAME } from "df/cli/api/commands/credentials";
import { install } from "df/cli/api/commands/install";
import { version } from "df/core/version";
import { dataform } from "df/protos/ts";

Expand All @@ -15,17 +14,11 @@ node_modules/
export interface IInitResult {
filesWritten: string[];
dirsCreated: string[];
installedNpmPackages: boolean;
}

export interface IInitOptions {
skipInstall?: boolean;
}

export async function init(
projectDir: string,
projectConfig: dataform.IProjectConfig,
options: IInitOptions = {}
projectConfig: dataform.IProjectConfig
): Promise<IInitResult> {
const workflowSettingsYamlPath = path.join(projectDir, "workflow_settings.yaml");
const packageJsonPath = path.join(projectDir, "package.json");
Expand Down Expand Up @@ -87,12 +80,8 @@ export async function init(
fs.mkdirSync(includesDir);
dirsCreated.push(includesDir);

// Install packages.
await install(projectDir, options.skipInstall);

return {
filesWritten,
dirsCreated,
installedNpmPackages: !options.skipInstall
dirsCreated
};
}
47 changes: 11 additions & 36 deletions cli/api/commands/install.ts
@@ -1,55 +1,30 @@
import * as fs from "fs";
import { load as loadYaml, YAMLException } from "js-yaml";
import * as path from "path";
import { promisify } from "util";

import * as childProcess from "child_process";
import { dataform } from "df/protos/ts";
import { readDataformCoreVersionFromWorkflowSettings } from "df/cli/api/utils";

export async function install(projectPath: string, skipInstall?: boolean) {
if (skipInstall) {
return;
}
export const MISSING_CORE_VERSION_ERROR =
"dataformCoreVersion must be specified either in workflow_settings.yaml or via a package.json";

export async function install(projectPath: string) {
const resolvedProjectPath = path.resolve(projectPath);
const workflowSettingsPath = path.join(resolvedProjectPath, "workflow_settings.yaml");
const packageJsonPath = path.join(resolvedProjectPath, "package.json");
const packageLockJsonPath = path.join(resolvedProjectPath, "package-lock.json");

let installCommand = "npm i --ignore-scripts";

// Core's readWorkflowSettings method cannot be used for this because Core assumes that
// `require` can read YAML files directly.
const dataformCoreVersion = readDataformCoreVersionIfPresent(workflowSettingsPath);

const dataformCoreVersion = readDataformCoreVersionFromWorkflowSettings(resolvedProjectPath);
if (dataformCoreVersion) {
// If there are other packages already in the package.json, specifying a specific package to
// install will trigger the other packages to be installed too.
installCommand += ` @dataform/core@${dataformCoreVersion}`;
}

if (!dataformCoreVersion && !fs.existsSync(packageJsonPath)) {
throw new Error(
"dataformCoreVersion must be specified either in workflow_settings.yaml or via a package.json"
"Package installation is only supported when specifying @dataform/core version in " +
"'package.json'"
);
}

await promisify(childProcess.exec)(installCommand, { cwd: resolvedProjectPath });
}

function readDataformCoreVersionIfPresent(workflowSettingsPath: string): string {
if (!fs.existsSync(workflowSettingsPath)) {
return "";
if (!fs.existsSync(packageJsonPath)) {
throw new Error(MISSING_CORE_VERSION_ERROR);
}

const workflowSettingsContent = fs.readFileSync(workflowSettingsPath, "utf-8");
let workflowSettingsAsJson = {};
try {
workflowSettingsAsJson = loadYaml(workflowSettingsContent);
} catch (e) {
if (e instanceof YAMLException) {
throw Error(`${path} is not a valid YAML file: ${e}`);
}
throw e;
}
return dataform.WorkflowSettings.create(workflowSettingsAsJson).dataformCoreVersion;
await promisify(childProcess.exec)("npm i --ignore-scripts", { cwd: resolvedProjectPath });
}
27 changes: 27 additions & 0 deletions cli/api/utils.ts
@@ -1,3 +1,30 @@
import * as fs from "fs-extra";
import { load as loadYaml, YAMLException } from "js-yaml";
import * as path from "path";

import { dataform } from "df/protos/ts";

export function prettyJsonStringify(obj: object) {
return JSON.stringify(obj, null, 4) + "\n";
}

export function readDataformCoreVersionFromWorkflowSettings(
resolvedProjectPath: string
): string | undefined {
const workflowSettingsPath = path.join(resolvedProjectPath, "workflow_settings.yaml");
if (!fs.existsSync(workflowSettingsPath)) {
return;
}

const workflowSettingsContent = fs.readFileSync(workflowSettingsPath, "utf-8");
let workflowSettingsAsJson = {};
try {
workflowSettingsAsJson = loadYaml(workflowSettingsContent);
} catch (e) {
if (e instanceof YAMLException) {
throw new Error(`${path} is not a valid YAML file: ${e}`);
}
throw e;
}
return dataform.WorkflowSettings.create(workflowSettingsAsJson).dataformCoreVersion;
}
3 changes: 0 additions & 3 deletions cli/console.ts
Expand Up @@ -109,9 +109,6 @@ export function printInitResult(result: IInitResult) {
writeStdOut(successOutput("Files successfully written:"));
result.filesWritten.forEach(file => writeStdOut(file, 1));
}
if (result.installedNpmPackages) {
writeStdOut(successOutput("NPM packages successfully installed."));
}
}

export function printInitCredsResult(writtenFilePath: string) {
Expand Down
27 changes: 5 additions & 22 deletions cli/index.ts
Expand Up @@ -166,8 +166,6 @@ const jobPrefixOption: INamedOption<yargs.Options> = {
}
};

const skipInstallOptionName = "skip-install";

const testConnectionOptionName = "test-connection";

const watchOptionName = "watch";
Expand Down Expand Up @@ -232,28 +230,13 @@ export function runCli() {
}
}
],
options: [
trackOption,
{
name: skipInstallOptionName,
option: {
describe: "Whether to skip installing NPM packages.",
default: false
}
}
],
options: [trackOption],
processFn: async argv => {
print("Writing project files...\n");
const initResult = await init(
argv[projectDirOption.name],
{
defaultDatabase: argv[ProjectConfigOptions.defaultDatabase.name],
defaultLocation: argv[ProjectConfigOptions.defaultLocation.name]
},
{
skipInstall: argv[skipInstallOptionName]
}
);
const initResult = await init(argv[projectDirOption.name], {
defaultDatabase: argv[ProjectConfigOptions.defaultDatabase.name],
defaultLocation: argv[ProjectConfigOptions.defaultLocation.name]
});
printInitResult(initResult);
return 0;
}
Expand Down

0 comments on commit d898ca1

Please sign in to comment.