# Read / Write Operations to Oracle using Spark with Scala

___

### Importing Packages

In [1]:
import org.apache.spark.sql.{SparkSession,SaveMode}

### Adding Oracle Dependancy

In [2]:
%AddJar http://www.datanucleus.org/downloads/maven2/oracle/ojdbc6/11.2.0.3/ojdbc6-11.2.0.3.jar

Starting download from http://www.datanucleus.org/downloads/maven2/oracle/ojdbc6/11.2.0.3/ojdbc6-11.2.0.3.jar
Finished download of ojdbc6-11.2.0.3.jar


### Creating Spark Session

In [3]:
val spark = SparkSession.builder.getOrCreate()

spark = org.apache.spark.sql.SparkSession@2a2f77db


org.apache.spark.sql.SparkSession@2a2f77db

### Oracle Server Credentials

In [4]:
var url = "jdbc:oracle:thin:@192.168.2.11:1521/ORA12C"
var dbtable = "LIMA_TEST.SI_ATTRIB"
var user = "sys as sysdba"
var password = "oracle"

url = jdbc:oracle:thin:@192.168.2.11:1521/ORA12C
dbtable = LIMA_TEST.SI_ATTRIB
user = sys as sysdba
password = oracle


oracle

### Reading File

In [6]:
var filedf = spark.read.format("csv")
                   .options(Map(("header","true"),("inferSchema","true"),("delimiter","\t")))
                   .load("../Resources/SI_attr.tab")

filedf = [Sales_Item_Id: string, Sales_Item_Description: string ... 4 more fields]


[Sales_Item_Id: string, Sales_Item_Description: string ... 4 more fields]

### Show

In [7]:
filedf.show(5)

+------------------+----------------------+-----+-----------------------+----------------+--------------------------------+
|     Sales_Item_Id|Sales_Item_Description|  GIC|Product_Lifecycle_State|Purchasing_Group|Average Standard Production Cost|
+------------------+----------------------+-----+-----------------------+----------------+--------------------------------+
|    IE2:15HP-RFU-7|           7GHZ HP ODU|07599|               OBSOLETE|            null|                             0.0|
|    IE2:15HP-RFU-8|  1500HP RF UNIT, F...|07599|               OBSOLETE|            null|                             0.0|
|IE2:15HP-SHORT-112|    1500HP SHORT, FGHZ|07599|               OBSOLETE|            null|                             0.0|
|IE2:15HP-SHORT-137|     1500P SHORT, FGHZ|07599|               OBSOLETE|            null|                             0.0|
| IE2:15HP-TERM-112|  1500HP 50 OHM TER...|09208|               OBSOLETE|            null|                             0.0|
+-------

### Schema

In [8]:
filedf.printSchema

root
 |-- Sales_Item_Id: string (nullable = true)
 |-- Sales_Item_Description: string (nullable = true)
 |-- GIC: string (nullable = true)
 |-- Product_Lifecycle_State: string (nullable = true)
 |-- Purchasing_Group: string (nullable = true)
 |-- Average Standard Production Cost: double (nullable = true)



### Reading table from ORACLE

In [9]:
val jdbcDF = spark.read
    .format("jdbc")
    .option("url", url)
    .option("dbtable", dbtable)
    .option("user", user)
    .option("password", password)
    .option("driver", "oracle.jdbc.OracleDriver")
    .load()

jdbcDF = [SALES_ITEM_ID: string, SALES_ITEM_DESCRIPTION: string ... 4 more fields]


[SALES_ITEM_ID: string, SALES_ITEM_DESCRIPTION: string ... 4 more fields]

### Checking Schema of the table

In [10]:
jdbcDF.printSchema

root
 |-- SALES_ITEM_ID: string (nullable = true)
 |-- SALES_ITEM_DESCRIPTION: string (nullable = true)
 |-- GIC: string (nullable = true)
 |-- PRODUCT_LIFECYCLE_STATE: string (nullable = true)
 |-- PURCHASING_GROUP: string (nullable = true)
 |-- AVERAGESTANDARDPRODUCTIONCOST: string (nullable = true)



### Changing Dataframe columns same as table columns

In [13]:
filedf = filedf.withColumnRenamed("Average Standard Production Cost","AVERAGESTANDARDPRODUCTIONCOST")

filedf = [Sales_Item_Id: string, Sales_Item_Description: string ... 4 more fields]


[Sales_Item_Id: string, Sales_Item_Description: string ... 4 more fields]

### Converting all file dataframe columns to upper case

In [14]:
filedf = filedf.toDF(filedf.columns map (_.toUpperCase): _*)

filedf = [SALES_ITEM_ID: string, SALES_ITEM_DESCRIPTION: string ... 4 more fields]


[SALES_ITEM_ID: string, SALES_ITEM_DESCRIPTION: string ... 4 more fields]

### Checking Table Count before writing

In [15]:
jdbcDF.count()

0

### Writing to Oracle

In [16]:
filedf.write
  .format("jdbc")
  .options(Map(("url", url),("dbtable", dbtable),("user", user),("password", password),("driver", "oracle.jdbc.driver.OracleDriver")))
  .mode(SaveMode.Append)
  .save()

### Checking Table Count after writing

In [17]:
val jdbcDF = spark.read
    .format("jdbc")
    .option("url", url)
    .option("dbtable", dbtable)
    .option("user", user)
    .option("password", password)
    .option("driver", "oracle.jdbc.OracleDriver")
    .load()

jdbcDF = [SALES_ITEM_ID: string, SALES_ITEM_DESCRIPTION: string ... 4 more fields]


[SALES_ITEM_ID: string, SALES_ITEM_DESCRIPTION: string ... 4 more fields]

In [18]:
jdbcDF.count()

361560

### Closing Spark Session

In [19]:
spark.stop()