Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Spark: Support dropping views #9421

Merged
merged 1 commit into from Jan 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Expand Up @@ -117,8 +117,8 @@ case class ResolveViews(spark: SparkSession) extends Rule[LogicalPlan] with Look
}

private def qualifyFunctionIdentifiers(
plan: LogicalPlan,
catalogAndNamespace: Seq[String]): LogicalPlan = plan transformExpressions {
plan: LogicalPlan,
catalogAndNamespace: Seq[String]): LogicalPlan = plan transformExpressions {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

formatting was off here

case u@UnresolvedFunction(Seq(name), _, _, _, _) =>
if (!isBuiltinFunction(name)) {
u.copy(nameParts = catalogAndNamespace :+ name)
Expand All @@ -143,11 +143,11 @@ case class ResolveViews(spark: SparkSession) extends Rule[LogicalPlan] with Look
}

private def isCatalog(name: String): Boolean = {
spark.sessionState.catalogManager.isCatalogRegistered(name)
catalogManager.isCatalogRegistered(name)
}

private def isBuiltinFunction(name: String): Boolean = {
spark.sessionState.catalogManager.v1SessionCatalog.isBuiltinFunction(FunctionIdentifier(name))
catalogManager.v1SessionCatalog.isBuiltinFunction(FunctionIdentifier(name))
}


Expand Down
@@ -0,0 +1,65 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.spark.sql.catalyst.analysis

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.plans.logical.DropView
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.plans.logical.views.DropIcebergView
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.spark.sql.connector.catalog.CatalogManager
import org.apache.spark.sql.connector.catalog.CatalogPlugin
import org.apache.spark.sql.connector.catalog.LookupCatalog
import org.apache.spark.sql.connector.catalog.ViewCatalog

/**
* ResolveSessionCatalog exits early for some v2 View commands,
* thus they are pre-substituted here and then handled in ResolveViews
*/
case class RewriteViewCommands(spark: SparkSession) extends Rule[LogicalPlan] with LookupCatalog {

protected lazy val catalogManager: CatalogManager = spark.sessionState.catalogManager

override def apply(plan: LogicalPlan): LogicalPlan = plan.resolveOperatorsUp {
case DropView(ResolvedView(resolved), ifExists) =>
DropIcebergView(resolved, ifExists)
}

private def isTempView(nameParts: Seq[String]): Boolean = {
catalogManager.v1SessionCatalog.isTempView(nameParts)
}

private def isViewCatalog(catalog: CatalogPlugin): Boolean = {
catalog.isInstanceOf[ViewCatalog]
}

object ResolvedView {
def unapply(unresolved: UnresolvedIdentifier): Option[ResolvedIdentifier] = unresolved match {
case UnresolvedIdentifier(nameParts, true) if isTempView(nameParts) =>
None

case UnresolvedIdentifier(CatalogAndIdentifier(catalog, ident), _) if isViewCatalog(catalog) =>
Some(ResolvedIdentifier(catalog, ident))

case _ =>
None
}
}
}
Expand Up @@ -35,6 +35,7 @@ import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.catalyst.FunctionIdentifier
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
import org.apache.spark.sql.catalyst.analysis.RewriteViewCommands
import org.apache.spark.sql.catalyst.analysis.UnresolvedRelation
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.parser.ParserInterface
Expand Down Expand Up @@ -122,7 +123,7 @@ class IcebergSparkSqlExtensionsParser(delegate: ParserInterface) extends ParserI
if (isIcebergCommand(sqlTextAfterSubstitution)) {
parse(sqlTextAfterSubstitution) { parser => astBuilder.visit(parser.singleStatement()) }.asInstanceOf[LogicalPlan]
} else {
delegate.parsePlan(sqlText)
RewriteViewCommands(SparkSession.active).apply(delegate.parsePlan(sqlText))
}
}

Expand Down
@@ -0,0 +1,30 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.spark.sql.catalyst.plans.logical.views

import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.plans.logical.UnaryCommand

case class DropIcebergView(
child: LogicalPlan,
ifExists: Boolean) extends UnaryCommand {
override protected def withNewChildInternal(newChild: LogicalPlan): DropIcebergView =
copy(child = newChild)
}
@@ -0,0 +1,48 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.spark.sql.execution.datasources.v2

import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.NoSuchViewException
import org.apache.spark.sql.catalyst.expressions.Attribute
import org.apache.spark.sql.connector.catalog.Identifier
import org.apache.spark.sql.connector.catalog.ViewCatalog


case class DropV2ViewExec(
catalog: ViewCatalog,
ident: Identifier,
ifExists: Boolean) extends LeafV2CommandExec {

override lazy val output: Seq[Attribute] = Nil

override protected def run(): Seq[InternalRow] = {
val dropped = catalog.dropView(ident)
if (!dropped && !ifExists) {
throw new NoSuchViewException(ident)
}

Nil
}

override def simpleString(maxFields: Int): String = {
s"DropV2View: ${ident}"
}
}
Expand Up @@ -26,6 +26,7 @@ import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.Strategy
import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.analysis.ResolvedIdentifier
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.expressions.GenericInternalRow
import org.apache.spark.sql.catalyst.expressions.PredicateHelper
Expand All @@ -43,6 +44,7 @@ import org.apache.spark.sql.catalyst.plans.logical.RenameTable
import org.apache.spark.sql.catalyst.plans.logical.ReplacePartitionField
import org.apache.spark.sql.catalyst.plans.logical.SetIdentifierFields
import org.apache.spark.sql.catalyst.plans.logical.SetWriteDistributionAndOrdering
import org.apache.spark.sql.catalyst.plans.logical.views.DropIcebergView
import org.apache.spark.sql.catalyst.plans.logical.views.ResolvedV2View
import org.apache.spark.sql.connector.catalog.Identifier
import org.apache.spark.sql.connector.catalog.TableCatalog
Expand Down Expand Up @@ -102,6 +104,9 @@ case class ExtendedDataSourceV2Strategy(spark: SparkSession) extends Strategy wi
}
RenameV2ViewExec(oldCatalog, oldIdent, newIdent.identifier()) :: Nil

case DropIcebergView(ResolvedIdentifier(viewCatalog: ViewCatalog, ident), ifExists) =>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what happens if this isn't a ViewCatalog, but the new rewrite rule should ensure that it always is.

DropV2ViewExec(viewCatalog, ident, ifExists) :: Nil

case _ => Nil
}

Expand Down
Expand Up @@ -19,6 +19,7 @@
package org.apache.iceberg.spark.extensions;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatNoException;
import static org.assertj.core.api.Assertions.assertThatThrownBy;

import java.util.List;
Expand All @@ -43,6 +44,7 @@
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.catalyst.analysis.NoSuchTableException;
import org.apache.spark.sql.catalyst.catalog.SessionCatalog;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
Expand Down Expand Up @@ -780,6 +782,106 @@ public void renameViewTargetAlreadyExistsAsTable() {
String.format("Cannot create view default.%s because it already exists", target));
}

@Test
public void dropView() {
String viewName = "viewToBeDropped";
String sql = String.format("SELECT id FROM %s", tableName);

ViewCatalog viewCatalog = viewCatalog();

TableIdentifier identifier = TableIdentifier.of(NAMESPACE, viewName);
viewCatalog
.buildView(identifier)
.withQuery("spark", sql)
.withDefaultNamespace(NAMESPACE)
.withDefaultCatalog(catalogName)
.withSchema(schema(sql))
.create();

assertThat(viewCatalog.viewExists(identifier)).isTrue();

sql("DROP VIEW %s", viewName);
assertThat(viewCatalog.viewExists(identifier)).isFalse();
}

@Test
public void dropNonExistingView() {
assertThatThrownBy(() -> sql("DROP VIEW non_existing"))
.isInstanceOf(AnalysisException.class)
.hasMessageContaining("The view %s.%s cannot be found", NAMESPACE, "non_existing");
}

@Test
public void dropViewIfExists() {
String viewName = "viewToBeDropped";
String sql = String.format("SELECT id FROM %s", tableName);

ViewCatalog viewCatalog = viewCatalog();

TableIdentifier identifier = TableIdentifier.of(NAMESPACE, viewName);
viewCatalog
.buildView(identifier)
.withQuery("spark", sql)
.withDefaultNamespace(NAMESPACE)
.withDefaultCatalog(catalogName)
.withSchema(schema(sql))
.create();

assertThat(viewCatalog.viewExists(identifier)).isTrue();

sql("DROP VIEW IF EXISTS %s", viewName);
assertThat(viewCatalog.viewExists(identifier)).isFalse();

assertThatNoException().isThrownBy(() -> sql("DROP VIEW IF EXISTS %s", viewName));
}

/** The purpose of this test is mainly to make sure that normal view deletion isn't messed up */
@Test
public void dropGlobalTempView() {
String globalTempView = "globalViewToBeDropped";
nastra marked this conversation as resolved.
Show resolved Hide resolved
sql("CREATE GLOBAL TEMPORARY VIEW %s AS SELECT id FROM %s", globalTempView, tableName);
assertThat(v1SessionCatalog().getGlobalTempView(globalTempView).isDefined()).isTrue();

sql("DROP VIEW global_temp.%s", globalTempView);
assertThat(v1SessionCatalog().getGlobalTempView(globalTempView).isDefined()).isFalse();
}

/** The purpose of this test is mainly to make sure that normal view deletion isn't messed up */
@Test
public void dropTempView() {
String tempView = "tempViewToBeDropped";
sql("CREATE TEMPORARY VIEW %s AS SELECT id FROM %s", tempView, tableName);
assertThat(v1SessionCatalog().getTempView(tempView).isDefined()).isTrue();

sql("DROP VIEW %s", tempView);
assertThat(v1SessionCatalog().getTempView(tempView).isDefined()).isFalse();
}

/** The purpose of this test is mainly to make sure that normal view deletion isn't messed up */
@Test
public void dropV1View() {
String v1View = "v1ViewToBeDropped";
sql("USE spark_catalog");
sql("CREATE NAMESPACE IF NOT EXISTS %s", NAMESPACE);
sql("CREATE TABLE %s (id INT, data STRING)", tableName);
sql("CREATE VIEW %s AS SELECT id FROM %s", v1View, tableName);
sql("USE %s", catalogName);
assertThat(
v1SessionCatalog()
.tableExists(new org.apache.spark.sql.catalyst.TableIdentifier(v1View)))
.isTrue();

sql("DROP VIEW spark_catalog.%s.%s", NAMESPACE, v1View);
assertThat(
v1SessionCatalog()
.tableExists(new org.apache.spark.sql.catalyst.TableIdentifier(v1View)))
.isFalse();
}

private SessionCatalog v1SessionCatalog() {
return spark.sessionState().catalogManager().v1SessionCatalog();
}

private String viewName(String viewName) {
return viewName + new Random().nextInt(1000000);
}
Expand Down
Expand Up @@ -581,8 +581,11 @@ public View alterView(Identifier ident, ViewChange... changes)

@Override
public boolean dropView(Identifier ident) {
throw new UnsupportedOperationException(
"Dropping a view is not supported by catalog: " + catalogName);
if (null != asViewCatalog) {
return asViewCatalog.dropView(buildIdentifier(ident));
}

return false;
}

@Override
Expand Down