# Read / Insert and Update Operations to RethinkDB using Spark with Scala
___

### Importing Packages

In [15]:
import org.apache.spark.sql.SparkSession
import com.rethinkdb.RethinkDB
import com.rethinkdb.net.Connection
import play.api.libs.json._

lastException: Throwable = null


### Adding RethinkDB Dependancy Jars

In [16]:
%AddJar https://repo1.maven.org/maven2/com/rethinkdb/rethinkdb-driver/2.3.3/rethinkdb-driver-2.3.3.jar
%AddJar https://repo1.maven.org/maven2/com/typesafe/play/play-json_2.11/2.6.0-M7/play-json_2.11-2.6.0-M7.jar
%AddJar https://repo1.maven.org/maven2/com/googlecode/json-simple/json-simple/1.1.1/json-simple-1.1.1.jar

Using cached version of rethinkdb-driver-2.3.3.jar
Using cached version of play-json_2.11-2.6.0-M7.jar
Using cached version of json-simple-1.1.1.jar


### Establishing Rethink Connection

In [17]:
val r: RethinkDB = RethinkDB.r

r = com.rethinkdb.RethinkDB@19e1d37


com.rethinkdb.RethinkDB@19e1d37

In [18]:
val rethinkClient: Connection = r.connection().hostname("192.168.2.16").port(28015).db("MetaData").connect()

rethinkClient = com.rethinkdb.net.Connection@7535ba13


com.rethinkdb.net.Connection@7535ba13

### 1) Inserting Data to `SYS_CNTL` table

In [111]:
r.table("sys_cntl")
 .insert(r.hashMap("id", "UPLOAD_BATCH_NO")
 .`with`("payload", r.hashMap("CNTL_KEY", "UPLOAD_BATCH_NO")
                     .`with`("CREATED_DATE", "")
                     .`with`("CREATED_USER", "Admin")
                     .`with`("ID", 1)
                     .`with`("INC_VALUE", 1)
                     .`with`("INIT_VALUE", 1)
                     .`with`("MAX_VALUE", "99999999")
                     .`with`("MODIFIED_DATE", "")
                     .`with`("MODIFIED_USER", "Admin")
                     .`with`("SEQ_NO", 3368)
        )
      )
 .run(rethinkClient)
 .toString


{deleted=0, inserted=1, unchanged=0, replaced=0, errors=0, skipped=0}

### 2) Reading the JSON value using the Key

In [112]:
val jsonstr = r.table("sys_cntl").get("UPLOAD_BATCH_NO").getField("payload").toJson.run(rethinkClient).toString

jsonstr = {"CNTL_KEY":"UPLOAD_BATCH_NO","CREATED_DATE":"","CREATED_USER":"Admin","ID":1,"INC_VALUE":1,"INIT_VALUE":1,"MAX_VALUE":"99999999","MODIFIED_DATE":"","MODIFIED_USER":"Admin","SEQ_NO":3368}


{"CNTL_KEY":"UPLOAD_BATCH_NO","CREATED_DATE":"","CREATED_USER":"Admin","ID":1,"INC_VALUE":1,"INIT_VALUE":1,"MAX_VALUE":"99999999","MODIFIED_DATE":"","MODIFIED_USER":"Admin","SEQ_NO":3368}

#### Convert String(JSON) to Spark DataFrame

In [113]:
var sysCntlDf = spark.read.json(Seq(jsonstr).toDS).toDF()

sysCntlDf = [CNTL_KEY: string, CREATED_DATE: string ... 8 more fields]


[CNTL_KEY: string, CREATED_DATE: string ... 8 more fields]

### Show

In [114]:
sysCntlDf.show()

+---------------+------------+------------+---+---------+----------+---------+-------------+-------------+------+
|       CNTL_KEY|CREATED_DATE|CREATED_USER| ID|INC_VALUE|INIT_VALUE|MAX_VALUE|MODIFIED_DATE|MODIFIED_USER|SEQ_NO|
+---------------+------------+------------+---+---------+----------+---------+-------------+-------------+------+
|UPLOAD_BATCH_NO|            |       Admin|  1|        1|         1| 99999999|             |        Admin|  3368|
+---------------+------------+------------+---+---------+----------+---------+-------------+-------------+------+



### 3) Updating Data

In [115]:
r.table("sys_cntl").get("UPLOAD_BATCH_NO").update(r.hashMap("payload",r.hashMap("SEQ_NO",3369))).run(rethinkClient).toString

{deleted=0, inserted=0, unchanged=0, replaced=1, errors=0, skipped=0}

#### Verifying the Data

In [148]:
r.table("sys_cntl").get("UPLOAD_BATCH_NO").getField("payload").run(rethinkClient).toString

{MODIFIED_USER=Admin, INIT_VALUE=1, CNTL_KEY=UPLOAD_BATCH_NO, CREATED_DATE=, SEQ_NO=3369, INC_VALUE=1, CREATED_USER=Admin, ID=1, MAX_VALUE=99999999, MODIFIED_DATE=}

### Closing Spark Session

In [149]:
spark.stop()