/
JavaXmlSuite.java
93 lines (78 loc) · 2.94 KB
/
JavaXmlSuite.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
/*
* Copyright 2014 Databricks
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.databricks.spark.xml;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.HashMap;
import java.util.Map;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SparkSession;
public final class JavaXmlSuite {
private static final int numBooks = 12;
private static final String booksFile = "src/test/resources/books.xml";
private static final String booksFileTag = "book";
private SparkSession spark;
private Path tempDir;
@Before
public void setUp() throws IOException {
spark = SparkSession.builder().
master("local[2]").
appName("XmlSuite").
config("spark.ui.enabled", false).
getOrCreate();
spark.sparkContext().setLogLevel("WARN");
tempDir = Files.createTempDirectory("JavaXmlSuite");
tempDir.toFile().deleteOnExit();
}
@After
public void tearDown() {
spark.stop();
spark = null;
}
private Path getEmptyTempDir() throws IOException {
return Files.createTempDirectory(tempDir, "test");
}
@Test
public void testXmlParser() {
Dataset<Row> df = (new XmlReader()).withRowTag(booksFileTag).xmlFile(spark, booksFile);
String prefix = XmlOptions.DEFAULT_ATTRIBUTE_PREFIX();
long result = df.select(prefix + "id").count();
Assert.assertEquals(result, numBooks);
}
@Test
public void testLoad() {
Map<String, String> options = new HashMap<>();
options.put("rowTag", booksFileTag);
Dataset<Row> df = spark.read().options(options).format("xml").load(booksFile);
long result = df.select("description").count();
Assert.assertEquals(result, numBooks);
}
@Test
public void testSave() throws IOException {
Path booksPath = getEmptyTempDir().resolve("booksFile");
Dataset<Row> df = (new XmlReader()).withRowTag(booksFileTag).xmlFile(spark, booksFile);
df.select("price", "description").write().format("xml").save(booksPath.toString());
Dataset<Row> newDf = (new XmlReader()).xmlFile(spark, booksPath.toString());
long result = newDf.select("price").count();
Assert.assertEquals(result, numBooks);
}
}