/
defs.jl
152 lines (112 loc) · 4.64 KB
/
defs.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
import Base: min, max, minimum, maximum, sum, count
import Statistics: mean
const JSparkConf = @jimport org.apache.spark.SparkConf
const JRuntimeConfig = @jimport org.apache.spark.sql.RuntimeConfig
const JSparkContext = @jimport org.apache.spark.SparkContext
const JJavaSparkContext = @jimport org.apache.spark.api.java.JavaSparkContext
const JRDD = @jimport org.apache.spark.rdd.RDD
const JJavaRDD = @jimport org.apache.spark.api.java.JavaRDD
const JSparkSession = @jimport org.apache.spark.sql.SparkSession
const JSparkSessionBuilder = @jimport org.apache.spark.sql.SparkSession$Builder
const JDataFrameReader = @jimport org.apache.spark.sql.DataFrameReader
const JDataFrameWriter = @jimport org.apache.spark.sql.DataFrameWriter
const JDataStreamReader = @jimport org.apache.spark.sql.streaming.DataStreamReader
const JDataStreamWriter = @jimport org.apache.spark.sql.streaming.DataStreamWriter
const JStreamingQuery = @jimport org.apache.spark.sql.streaming.StreamingQuery
const JDataset = @jimport org.apache.spark.sql.Dataset
const JRelationalGroupedDataset = @jimport org.apache.spark.sql.RelationalGroupedDataset
# const JRowFactory = @jimport org.apache.spark.sql.RowFactory
const JGenericRow = @jimport org.apache.spark.sql.catalyst.expressions.GenericRow
const JGenericRowWithSchema = @jimport org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
const JRow = @jimport org.apache.spark.sql.Row
const JColumn = @jimport org.apache.spark.sql.Column
const JDataType = @jimport org.apache.spark.sql.types.DataType
const JMetadata = @jimport org.apache.spark.sql.types.Metadata
const JStructType = @jimport org.apache.spark.sql.types.StructType
const JStructField = @jimport org.apache.spark.sql.types.StructField
const JSQLFunctions = @jimport org.apache.spark.sql.functions
const JWindow = @jimport org.apache.spark.sql.expressions.Window
const JWindowSpec = @jimport org.apache.spark.sql.expressions.WindowSpec
const JInteger = @jimport java.lang.Integer
const JLong = @jimport java.lang.Long
const JFloat = @jimport java.lang.Float
const JDouble = @jimport java.lang.Double
const JBoolean = @jimport java.lang.Boolean
const JDate = @jimport java.sql.Date
const JTimestamp = @jimport java.sql.Timestamp
const JMap = @jimport java.util.Map
const JHashMap = @jimport java.util.HashMap
const JList = @jimport java.util.List
const JArrayList = @jimport java.util.ArrayList
# const JWrappedArray = @jimport scala.collection.mutable.WrappedArray
const JArraySeq = @jimport scala.collection.mutable.ArraySeq
const JSeq = @jimport scala.collection.immutable.Seq
toString(jobj::JavaObject) = jcall(jobj, "toString", JString, ())
###############################################################################
# Type Definitions #
###############################################################################
"Builder for [`SparkSession`](@ref)"
struct SparkSessionBuilder
jbuilder::JSparkSessionBuilder
end
"The entry point to programming Spark with the Dataset and DataFrame API"
struct SparkSession
jspark::JSparkSession
end
"User-facing configuration API, accessible through SparkSession.conf"
struct RuntimeConfig
jconf::JRuntimeConfig
end
"A distributed collection of data grouped into named columns"
struct DataFrame
jdf::JDataset
end
"A set of methods for aggregations on a `DataFrame`, created by `DataFrame.groupBy()`"
struct GroupedData
# here we use PySpark's type name, not the underlying Scala's name
jgdf::JRelationalGroupedDataset
end
"A column in a DataFrame"
struct Column
jcol::JColumn
end
"A row in DataFrame"
struct Row
jrow::JRow
end
"Struct type, consisting of a list of [`StructField`](@ref)"
struct StructType
jst::JStructType
end
"A field in [`StructType`](@ref)"
struct StructField
jsf::JStructField
end
"Utility functions for defining window in DataFrames"
struct Window
jwin::JWindow
end
"A window specification that defines the partitioning, ordering, and frame boundaries"
struct WindowSpec
jwin::JWindowSpec
end
"Interface used to load a `DataFrame` from external storage systems"
struct DataFrameReader
jreader::JDataFrameReader
end
"Interface used to write a `DataFrame` to external storage systems"
struct DataFrameWriter
jwriter::JDataFrameWriter
end
"Interface used to load a streaming `DataFrame` from external storage systems"
struct DataStreamReader
jreader::JDataStreamReader
end
"Interface used to write a streaming `DataFrame` to external"
struct DataStreamWriter
jwriter::JDataStreamWriter
end
"A handle to a query that is executing continuously in the background as new data arrives"
struct StreamingQuery
jquery::JStreamingQuery
end