Skip to content

Commit

Permalink
feat: Model extract jobs.
Browse files Browse the repository at this point in the history
  • Loading branch information
amanda-tarafa committed Feb 10, 2021
1 parent 0326717 commit a07ebae
Show file tree
Hide file tree
Showing 11 changed files with 459 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,33 @@
ReturnType="BigQueryJob">

<AdditionalParameters>
<Parameter Name="destinationUris" Type="IEnumerable&lt;string&gt;" Comment="The Google Cloud Storage URIs (possibly including a wildcard) to extract the data to. Must not be null or empty." />
<Parameter Name="destinationUris" Type="IEnumerable&lt;string&gt;" Comment="The Google Cloud Storage URIs (possibly including a wildcard) to extract the data to. Must not be null or empty." />
</AdditionalParameters>

<Comments>
<summary>
Creates a job to extract data from the specified BigQuery table to Google Cloud Storage.
See [the BigQuery documentation](https://cloud.google.com/bigquery/docs/exporting-data) for more information on extract jobs.
See [the BigQuery documentation](https://cloud.google.com/bigquery/docs/exporting-data) for more information on extract jobs.
</summary>
<returns>The job created for the extract operation.</returns>
</Comments>
</Method>

<!-- CreateModelExtractJob(single uri) can't be generated yet, as it has an implementation. -->

<Method Name="CreateModelExtractJob"
RegionLabel="CreateModelExtractJob(multiple uris)"
TargetType="Model"
ReturnType="BigQueryJob">

<AdditionalParameters>
<Parameter Name="destinationUris" Type="IEnumerable&lt;string&gt;" Comment="The Google Cloud Storage URIs (possibly including a wildcard) to extract the model to. Must not be null or empty." />
</AdditionalParameters>

<Comments>
<summary>
Creates a job to extract the specified BigQuery model to Google Cloud Storage.
See [the BigQuery documentation](https://cloud.google.com/bigquery-ml/docs/exporting-models) for more information on model extract jobs.
</summary>
<returns>The job created for the extract operation.</returns>
</Comments>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@
// limitations under the License.

using Google.Cloud.ClientTesting;
using System;
using System.Collections.Generic;
using System.Text;
using Google.Cloud.Storage.V1;
using Xunit;

namespace Google.Cloud.BigQuery.V2.IntegrationTests
Expand All @@ -26,12 +24,19 @@ public class BigQueryMLFixture : CloudProjectFixtureBase, ICollectionFixture<Big
public string DatasetId { get; }
public string ModelId { get; }

public string StorageBucketName { get; }

public StorageClient StorageClient { get; }

public BigQueryMLFixture()
{
DatasetId = IdGenerator.FromDateTime(prefix: "testml_");
ModelId = CreateModelId();
StorageClient = StorageClient.Create();

CreateData();
StorageBucketName = IdGenerator.FromDateTime(prefix: "bigquerytestsml-");
StorageClient.CreateBucket(ProjectId, StorageBucketName);
}

private void CreateData()
Expand Down Expand Up @@ -60,5 +65,7 @@ UNION ALL
}

internal string CreateModelId() => IdGenerator.FromGuid(prefix: "model_", separator: "_");

internal string GenerateStoragePrefixName() => IdGenerator.FromGuid(prefix: "file-");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -154,5 +154,38 @@ public async Task GetModelAsync()

Assert.Equal(modelId, model.Reference.ModelId);
}

[Fact]
public void CreateModelExtractJob()
{
var client = BigQueryClient.Create(_fixture.ProjectId);
var modelReference = client.GetModel(_fixture.DatasetId, _fixture.ModelId);
var destinationBucket = _fixture.StorageBucketName;
var destinationPrefix = _fixture.GenerateStoragePrefixName();
var destinationUri = $"gs://{destinationBucket}/{destinationPrefix}";

client.CreateModelExtractJob(modelReference.Reference, destinationUri).PollUntilCompleted().ThrowOnAnyError();

// We don't know the format of the extracted model,
// there are several files, so, let's check that we at least have one.
Assert.True(_fixture.StorageClient.ListObjects(destinationBucket, destinationPrefix).Any());
}

[Fact]
public async Task CreateModelExtractJobAsync()
{
var client = BigQueryClient.Create(_fixture.ProjectId);
var modelReference = client.GetModel(_fixture.DatasetId, _fixture.ModelId);
var destinationBucket = _fixture.StorageBucketName;
var destinationPrefix = _fixture.GenerateStoragePrefixName();
var destinationUri = $"gs://{destinationBucket}/{destinationPrefix}";

var job = await client.CreateModelExtractJobAsync(modelReference.Reference, destinationUri);
job.PollUntilCompleted().ThrowOnAnyError();

// We don't know the format of the extracted model,
// there are several files, so, let's check that we at least have one.
Assert.True(_fixture.StorageClient.ListObjects(destinationBucket, destinationPrefix).Any());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -862,6 +862,27 @@ public void CreateExtractJobEquivalents()
client => new BigQueryTable(client, GetTable(tableReference)).CreateExtractJob(new[] { uri }, options));
}

[Fact]
public void CreateModelExtractJobEquivalents()
{
var datasetId = "dataset";
var modelId = "model";
var jobReference = GetJobReference("job");
var modelReference = GetModelReference(datasetId, modelId);
var uri = "gs://bucket/object";
var options = new CreateModelExtractJobOptions();

VerifyEquivalent(new BigQueryJob(new DerivedBigQueryClient(), new Job { JobReference = jobReference }),
client => client.CreateModelExtractJob(MatchesWhenSerialized(modelReference), new[] { uri }, options),
client => client.CreateModelExtractJob(ProjectId, datasetId, modelId, uri, options),
client => client.CreateModelExtractJob(datasetId, modelId, uri, options),
client => client.CreateModelExtractJob(modelReference, uri, options),
client => client.CreateModelExtractJob(ProjectId, datasetId, modelId, new[] { uri }, options),
client => client.CreateModelExtractJob(datasetId, modelId, new[] { uri }, options),
client => new BigQueryModel(client, GetModel(modelReference)).CreateModelExtractJob(uri, options),
client => new BigQueryModel(client, GetModel(modelReference)).CreateModelExtractJob(new[] { uri }, options));
}

[Fact]
public void CreateCopyJobEquivalents()
{
Expand Down Expand Up @@ -1601,6 +1622,28 @@ public void CreateExtractJobAsyncEquivalents()
client => new BigQueryTable(client, GetTable(tableReference)).CreateExtractJobAsync(new[] { uri }, options, token));
}

[Fact]
public void CreateModelExtractJobAsyncEquivalents()
{
var datasetId = "dataset";
var modelId = "model";
var jobReference = GetJobReference("job");
var modelReference = GetModelReference(datasetId, modelId);
var uri = "gs://bucket/object";
var options = new CreateModelExtractJobOptions();
var token = new CancellationTokenSource().Token;

VerifyEquivalentAsync(new BigQueryJob(new DerivedBigQueryClient(), new Job { JobReference = jobReference }),
client => client.CreateModelExtractJobAsync(MatchesWhenSerialized(modelReference), new[] { uri }, options, token),
client => client.CreateModelExtractJobAsync(ProjectId, datasetId, modelId, uri, options, token),
client => client.CreateModelExtractJobAsync(datasetId, modelId, uri, options, token),
client => client.CreateModelExtractJobAsync(modelReference, uri, options, token),
client => client.CreateModelExtractJobAsync(ProjectId, datasetId, modelId, new[] { uri }, options, token),
client => client.CreateModelExtractJobAsync(datasetId, modelId, new[] { uri }, options, token),
client => new BigQueryModel(client, GetModel(modelReference)).CreateModelExtractJobAsync(uri, options, token),
client => new BigQueryModel(client, GetModel(modelReference)).CreateModelExtractJobAsync(new[] { uri }, options, token));
}

[Fact]
public void CreateCopyJobAsyncEquivalents()
{
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright 2021 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

using Google.Apis.Bigquery.v2.Data;
using Xunit;

namespace Google.Cloud.BigQuery.V2.Tests
{
public class CreateModelExtractJobOptionsTest
{
[Fact]
public void ModifyRequest()
{
var options = new CreateModelExtractJobOptions
{
DestinationFormat = ModelFormat.XGBoostBooster
};
JobConfigurationExtract extract = new JobConfigurationExtract();
options.ModifyRequest(extract);
Assert.Equal("ML_XGBOOST_BOOSTER", extract.DestinationFormat);
}
}
}
Loading

0 comments on commit a07ebae

Please sign in to comment.