/
SharedSparkSession.scala
118 lines (102 loc) · 3.76 KB
/
SharedSparkSession.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.test
import scala.concurrent.duration._
import org.scalatest.{BeforeAndAfterEach, Suite}
import org.scalatest.concurrent.Eventually
import org.apache.spark.{DebugFilesystem, SparkConf}
import org.apache.spark.sql.{SparkSession, SQLContext}
import org.apache.spark.sql.internal.SQLConf
/**
* Helper trait for SQL test suites where all tests share a single [[TestSparkSession]].
*/
trait SharedSparkSession
extends SQLTestUtilsBase
with BeforeAndAfterEach
with Eventually { self: Suite =>
protected def sparkConf = {
new SparkConf()
.set("spark.hadoop.fs.file.impl", classOf[DebugFilesystem].getName)
.set("spark.unsafe.exceptionOnMemoryLeak", "true")
.set(SQLConf.CODEGEN_FALLBACK.key, "false")
}
/**
* The [[TestSparkSession]] to use for all tests in this suite.
*
* By default, the underlying [[org.apache.spark.SparkContext]] will be run in local
* mode with the default test configurations.
*/
private var _spark: TestSparkSession = null
/**
* The [[TestSparkSession]] to use for all tests in this suite.
*/
protected implicit def spark: SparkSession = _spark
/**
* The [[TestSQLContext]] to use for all tests in this suite.
*/
protected implicit def sqlContext: SQLContext = _spark.sqlContext
protected def createSparkSession: TestSparkSession = {
new TestSparkSession(sparkConf)
}
/**
* Initialize the [[TestSparkSession]]. Generally, this is just called from
* beforeAll; however, in test using styles other than FunSuite, there is
* often code that relies on the session between test group constructs and
* the actual tests, which may need this session. It is purely a semantic
* difference, but semantically, it makes more sense to call
* 'initializeSession' between a 'describe' and an 'it' call than it does to
* call 'beforeAll'.
*/
protected def initializeSession(): Unit = {
if (_spark == null) {
_spark = createSparkSession
}
}
/**
* Make sure the [[TestSparkSession]] is initialized before any tests are run.
*/
protected override def beforeAll(): Unit = {
initializeSession()
// Ensure we have initialized the context before calling parent code
super.beforeAll()
}
/**
* Stop the underlying [[org.apache.spark.SparkContext]], if any.
*/
protected override def afterAll(): Unit = {
super.afterAll()
if (_spark != null) {
_spark.sessionState.catalog.reset()
_spark.stop()
_spark = null
}
}
protected override def beforeEach(): Unit = {
super.beforeEach()
DebugFilesystem.clearOpenStreams()
}
protected override def afterEach(): Unit = {
super.afterEach()
// Clear all persistent datasets after each test
spark.sharedState.cacheManager.clearCache()
// files can be closed from other threads, so wait a bit
// normally this doesn't take more than 1s
eventually(timeout(10.seconds), interval(2.seconds)) {
DebugFilesystem.assertNoOpenStreams()
}
}
}