-
Notifications
You must be signed in to change notification settings - Fork 2
/
BigQueryTypes.scala
82 lines (70 loc) · 3.19 KB
/
BigQueryTypes.scala
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
package org.datatools.cc2db.types
import org.datatools.cc2db.conversions._
import com.google.cloud.bigquery.Field.Mode
import com.google.cloud.bigquery.{Field, StandardSQLTypeName}
import org.datatools.cc2db.types.basic._
/** Type class to convert generic SqlTypes into BigQuery specific fields
* In BigQuery, a table is made with a List of fields so as an example:
* a case class will be converted into SqlTypes and then into a List of BigQuery fields
*
* @tparam A the type we want to obtain an schema from
*/
trait BigQueryTypes[A] {
/** @return a list of [[Field]]s that represents [[A]]
*/
def getFields: List[Field]
}
object BigQueryTypes {
/** Summoner method. Allows the syntax */
def apply[A](implicit instance: BigQueryTypes[A]): BigQueryTypes[A] = instance
/** Factory constructor - allows easier construction of instances */
def instance[A](fs: List[Field]): BigQueryTypes[A] =
new BigQueryTypes[A] {
def getFields: List[Field] = fs
}
/** Instance derivation via SqlTypeConversion.
* Automatically converts camelCase names into snake_case in the process
* TODO: pass a function as a parameter, we should be able to decide if we want snake_case or other things from outside
*/
implicit def fieldsFromSqlTypeConversion[A: SqlTypeConversion]: BigQueryTypes[A] =
instance(getSchema(SqlTypeConversion[A].getType))
private def getSchema(sqlType: SqlType): List[Field] = sqlType match {
case SqlStruct(Nil, _) => Nil
case SqlStruct((name, sqlType) :: records, mode) =>
getSchemaWithName(snakify(name), sqlType) :: getSchema(basic.SqlStruct(records, mode))
}
/** Basic SqlTypes conversions to BigQuery Fields
*/
private def getSchemaWithName(name: String, sqlType: SqlType): Field = sqlType match {
case SqlInt(mode) =>
Field.newBuilder(name, StandardSQLTypeName.INT64).setMode(sqlModeToBigQueryMode(mode)).build()
case SqlFloat(mode) =>
Field.newBuilder(name, StandardSQLTypeName.FLOAT64).setMode(sqlModeToBigQueryMode(mode)).build()
case SqlDecimal(mode) =>
Field.newBuilder(name, StandardSQLTypeName.NUMERIC).setMode(sqlModeToBigQueryMode(mode)).build()
case SqlBool(mode) =>
Field.newBuilder(name, StandardSQLTypeName.BOOL).setMode(sqlModeToBigQueryMode(mode)).build()
case SqlString(mode) =>
Field.newBuilder(name, StandardSQLTypeName.STRING).setMode(sqlModeToBigQueryMode(mode)).build()
case SqlTimestamp(mode) =>
Field.newBuilder(name, StandardSQLTypeName.TIMESTAMP).setMode(sqlModeToBigQueryMode(mode)).build()
case SqlStruct(subType, mode) =>
Field
.newBuilder(name, StandardSQLTypeName.STRUCT, getSchema(SqlStruct(subType)): _*)
.setMode(sqlModeToBigQueryMode(mode))
.build()
}
private def sqlModeToBigQueryMode(sqlTypeMode: SqlTypeMode): Mode = sqlTypeMode match {
case Nullable => Mode.NULLABLE
case Repeated => Mode.REPEATED
case Required => Mode.REQUIRED
}
/** Turn a string of format "FooBar" into snake case "foo_bar"
* TODO move this somewhere else
*/
private def snakify(name: String): String =
name
.replaceAll("([A-Z]+)([A-Z][a-z])", "$1_$2")
.replaceAll("([a-z\\d])([A-Z])", "$1_$2")
.toLowerCase
}