From 1c2727e47a39e01c6ffb8f4a391d421a51e6f93e Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Tue, 29 Jul 2014 15:22:42 -0700
Subject: [PATCH 1/3] Deprecate LocalHiveContext

---
 .../sbt_app_hive/src/main/scala/HiveApp.scala     |  4 ++--
 docs/sql-programming-guide.md                     |  6 +++---
 .../spark/examples/sql/hive/HiveFromSpark.scala   |  4 ++--
 python/pyspark/java_gateway.py                    |  2 +-
 python/pyspark/sql.py                             |  8 +++++++-
 .../org/apache/spark/sql/hive/HiveContext.scala   |  7 +++++--
 .../org/apache/spark/sql/hive/TestHive.scala      | 15 ++++++++++++---
 7 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala b/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala
index 7257d17d10116..a21410f3b9813 100644
--- a/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala
+++ b/dev/audit-release/sbt_app_hive/src/main/scala/HiveApp.scala
@@ -22,7 +22,7 @@ import scala.collection.mutable.{ListBuffer, Queue}
 import org.apache.spark.SparkConf
 import org.apache.spark.SparkContext
 import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.hive.LocalHiveContext
+import org.apache.spark.sql.hive.HiveContext
 
 case class Person(name: String, age: Int)
 
@@ -34,7 +34,7 @@ object SparkSqlExample {
       case None => new SparkConf().setAppName("Simple Sql App")
     }
     val sc = new SparkContext(conf)
-    val hiveContext = new LocalHiveContext(sc)
+    val hiveContext = new HiveContext(sc)
 
     import hiveContext._
     hql("DROP TABLE IF EXISTS src")
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 156e0aebdebe6..a047d32b6ee6c 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -487,9 +487,9 @@ Configuration of Hive is done by placing your `hive-site.xml` file in `conf/`.
 
 When working with Hive one must construct a `HiveContext`, which inherits from `SQLContext`, and
 adds support for finding tables in in the MetaStore and writing queries using HiveQL. Users who do
-not have an existing Hive deployment can also experiment with the `LocalHiveContext`,
-which is similar to `HiveContext`, but creates a local copy of the `metastore` and `warehouse`
-automatically.
+not have an existing Hive deployment can still create a HiveContext.  When not configured by the
+hive-site.xml, the context automatically creates `metastore_db` and `warehouse` in the current
+directory.
 
 {% highlight scala %}
 // sc is an existing SparkContext.
diff --git a/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala b/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
index 66a23fac39999..dc5290fb4f10e 100644
--- a/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/sql/hive/HiveFromSpark.scala
@@ -19,7 +19,7 @@ package org.apache.spark.examples.sql.hive
 
 import org.apache.spark.{SparkConf, SparkContext}
 import org.apache.spark.sql._
-import org.apache.spark.sql.hive.LocalHiveContext
+import org.apache.spark.sql.hive.HiveContext
 
 object HiveFromSpark {
   case class Record(key: Int, value: String)
@@ -31,7 +31,7 @@ object HiveFromSpark {
     // A local hive context creates an instance of the Hive Metastore in process, storing the
     // the warehouse data in the current directory.  This location can be overridden by
     // specifying a second parameter to the constructor.
-    val hiveContext = new LocalHiveContext(sc)
+    val hiveContext = new HiveContext(sc)
     import hiveContext._
 
     hql("CREATE TABLE IF NOT EXISTS src (key INT, value STRING)")
diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index 2c129679f47f3..b1951040a1056 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -86,7 +86,7 @@ def run(self):
     java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*")
     java_import(gateway.jvm, "org.apache.spark.sql.SQLContext")
     java_import(gateway.jvm, "org.apache.spark.sql.hive.HiveContext")
-    java_import(gateway.jvm, "org.apache.spark.sql.hive.LocalHiveContext")
+    java_import(gateway.jvm, "org.apache.spark.sql.hive.HiveContext")
     java_import(gateway.jvm, "org.apache.spark.sql.hive.TestHiveContext")
     java_import(gateway.jvm, "scala.Tuple2")
 
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
index a6b3277db3266..560b9e448afd0 100644
--- a/python/pyspark/sql.py
+++ b/python/pyspark/sql.py
@@ -15,12 +15,14 @@
 # limitations under the License.
 #
 
+import warnings
+
 from pyspark.rdd import RDD, PipelinedRDD
 from pyspark.serializers import BatchedSerializer, PickleSerializer
 
 from py4j.protocol import Py4JError
 
-__all__ = ["SQLContext", "HiveContext", "LocalHiveContext", "TestHiveContext", "SchemaRDD", "Row"]
+__all__ = ["SQLContext", "HiveContext", "TestHiveContext", "SchemaRDD", "Row"]
 
 
 class SQLContext:
@@ -278,6 +280,10 @@ class LocalHiveContext(HiveContext):
     130091
     """
 
+    def __init__(self, sparkContext, sqlContext=None):
+      HiveContext.__init__(self, sparkContext, sqlContext)
+      warnings.warn("LocalHiveContext is deprecated.  Use HiveContext instead.", DeprecationWarning)
+
     def _get_hive_ctx(self):
         return self._jvm.LocalHiveContext(self._jsc.sc())
 
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index 84d43eaeea51d..b2983d269866b 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -43,9 +43,12 @@ import org.apache.spark.sql.execution.{Command => PhysicalCommand}
 import org.apache.spark.sql.hive.execution.DescribeHiveTableCommand
 
 /**
- * Starts up an instance of hive where metadata is stored locally. An in-process metadata data is
- * created with data stored in ./metadata.  Warehouse data is stored in in ./warehouse.
+ * DEPRECATED: Use HiveContext instead.
  */
+@deprecated("""
+  Use HiveContext instead.  It will still create a local metastore if one is not specified.
+  However, note that the default directory is ./metastore_db, not ./metastore
+  """)
 class LocalHiveContext(sc: SparkContext) extends HiveContext(sc) {
 
   lazy val metastorePath = new File("metastore").getCanonicalPath
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
index 9386008d02d51..c50e8c4b5c5d3 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/TestHive.scala
@@ -53,15 +53,24 @@ object TestHive
  * hive metastore seems to lead to weird non-deterministic failures.  Therefore, the execution of
  * test cases that rely on TestHive must be serialized.
  */
-class TestHiveContext(sc: SparkContext) extends LocalHiveContext(sc) {
+class TestHiveContext(sc: SparkContext) extends HiveContext(sc) {
   self =>
 
   // By clearing the port we force Spark to pick a new one.  This allows us to rerun tests
   // without restarting the JVM.
   System.clearProperty("spark.hostPort")
 
-  override lazy val warehousePath = getTempFilePath("sparkHiveWarehouse").getCanonicalPath
-  override lazy val metastorePath = getTempFilePath("sparkHiveMetastore").getCanonicalPath
+  lazy val warehousePath = getTempFilePath("sparkHiveWarehouse").getCanonicalPath
+  lazy val metastorePath = getTempFilePath("sparkHiveMetastore").getCanonicalPath
+
+  /** Sets up the system initially or after a RESET command */
+  protected def configure() {
+    set("javax.jdo.option.ConnectionURL",
+      s"jdbc:derby:;databaseName=$metastorePath;create=true")
+    set("hive.metastore.warehouse.dir", warehousePath)
+  }
+
+  configure() // Must be called before initializing the catalog below.
 
   /** The location of the compiled hive distribution */
   lazy val hiveHome = envVarToFile("HIVE_HOME")

From 626e056385b9613863273d56366fec2c4493e121 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Wed, 30 Jul 2014 17:21:24 -0700
Subject: [PATCH 2/3] Don't remove from imports yet

---
 python/pyspark/java_gateway.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py
index b1951040a1056..2c129679f47f3 100644
--- a/python/pyspark/java_gateway.py
+++ b/python/pyspark/java_gateway.py
@@ -86,7 +86,7 @@ def run(self):
     java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*")
     java_import(gateway.jvm, "org.apache.spark.sql.SQLContext")
     java_import(gateway.jvm, "org.apache.spark.sql.hive.HiveContext")
-    java_import(gateway.jvm, "org.apache.spark.sql.hive.HiveContext")
+    java_import(gateway.jvm, "org.apache.spark.sql.hive.LocalHiveContext")
     java_import(gateway.jvm, "org.apache.spark.sql.hive.TestHiveContext")
     java_import(gateway.jvm, "scala.Tuple2")
 

From e5ec497247d02628d221deca0d99ccae8ba50579 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Wed, 30 Jul 2014 20:01:16 -0700
Subject: [PATCH 3/3] Add deprecation version

---
 .../src/main/scala/org/apache/spark/sql/hive/HiveContext.scala  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
index e1c90eb830f4b..27b444daba2d4 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveContext.scala
@@ -47,7 +47,7 @@ import org.apache.spark.sql.hive.execution.DescribeHiveTableCommand
 @deprecated("""
   Use HiveContext instead.  It will still create a local metastore if one is not specified.
   However, note that the default directory is ./metastore_db, not ./metastore
-  """)
+  """, "1.1")
 class LocalHiveContext(sc: SparkContext) extends HiveContext(sc) {
 
   lazy val metastorePath = new File("metastore").getCanonicalPath