### Importing Vegas-Viz 

In [13]:
import $ivy.`org.vegas-viz::vegas:0.3.11`

import $ivy.`org.vegas-viz::vegas-spark:0.3.11`

import $ivy.`org.apache.spark::spark-sql:2.4.0`
import $ivy.`org.apache.spark::spark-mllib:2.4.0`
import org.apache.log4j.Logger
import org.apache.log4j.Level

import org.apache.spark.sql.SparkSession
import vegas._

import vegas.render.WindowRenderer._

import vegas.sparkExt._
import org.apache.spark.ml.feature.VectorAssembler

[32mimport [39m[36m$ivy.$                            

[39m
[32mimport [39m[36m$ivy.$                                  

[39m
[32mimport [39m[36m$ivy.$                                  
[39m
[32mimport [39m[36m$ivy.$                                    
[39m
[32mimport [39m[36morg.apache.log4j.Logger
[39m
[32mimport [39m[36morg.apache.log4j.Level

[39m
[32mimport [39m[36morg.apache.spark.sql.SparkSession
[39m
[32mimport [39m[36mvegas._

[39m
[32mimport [39m[36mvegas.render.WindowRenderer._

[39m
[32mimport [39m[36mvegas.sparkExt._
[39m
[32mimport [39m[36morg.apache.spark.ml.feature.VectorAssembler[39m

### Creating SparkSession

In [14]:
val sparkSession = SparkSession.builder.appName("Stock Prediction Analysis").master("local[*]").getOrCreate()
Logger.getLogger("org").setLevel(Level.ERROR)

[36msparkSession[39m: [32mSparkSession[39m = org.apache.spark.sql.SparkSession@421fb836

### Reading CSV file to create DataFrame

In [15]:
val inputDataFrame = sparkSession.read.option("inferSchema",true).option("header",true).csv("./Output/*.csv")

[36minputDataFrame[39m: [32morg[39m.[32mapache[39m.[32mspark[39m.[32msql[39m.[32mpackage[39m.[32mDataFrame[39m = [Open: double, High: double ... 5 more fields]

In [16]:
inputDataFrame.show(5)

+---------+---------+-------+--------+------+-------------------+---------------------+
|     Open|     High|    Low|   Close|Volume|               Date|Predicted Close Price|
+---------+---------+-------+--------+------+-------------------+---------------------+
|  1733.58|  1733.58|1733.58| 1733.58| 375.0|2020-12-24 11:13:00|              1734.36|
|1733.6266|1733.6266|1733.08|  1733.4|1318.0|2020-12-24 11:14:00|              1733.98|
|  1733.03|  1733.03|1733.03| 1733.03| 655.0|2020-12-24 11:15:00|              1733.81|
|1733.2177| 1734.165| 1732.7|1734.165|1142.0|2020-12-24 11:16:00|              1734.33|
|  1733.59| 1734.165|1733.16| 1733.84|1400.0|2020-12-24 11:17:00|              1734.48|
+---------+---------+-------+--------+------+-------------------+---------------------+
only showing top 5 rows



### Visualising Close Price Change with respect to Time


In [17]:
Vegas("Change of Predicted Close Price with respect to Time",width = 1200,height = 200)
    .withDataFrame(inputDataFrame)
    .mark(Line)
    .encodeX("Date",Ordinal,axis = Axis(grid = true))
    .encodeY("Predicted Close Price",Quant,scale = Scale(domainValues = List(1700, 1800.0)))
    .show

### Visualising Relation Between Predicted Close Price And Close

In [18]:
Vegas("Relation Between Predicted Close Price And Close",width = 600)
    .withDataFrame(inputDataFrame)
    .mark(Line)
    .encodeX("Predicted Close Price",Quant,axis = Axis(title = "Predicted Close Price",grid = true),bin = Bin(maxbins = 20))
    .encodeY("Close",scale = Scale(domainValues = List(1700, 1800.0)),aggregate = AggOps.Mean)
    .show

### Visualising Relation of Predicted Close Price And Close Price with respect to time
### Red Color - Predicted Close Price And Green Color - Close Price

In [19]:
Vegas.layered("Relation Between Predicted Close Price And Close")
    .withDataFrame(inputDataFrame)
    .withLayers(
        Layer()
        .mark(Line)
        .encodeX("Date",Ordinal,axis = Axis(grid = true))
        .encodeY("Close",Quant,scale = Scale(domainValues = List(1720, 1750.0)),axis = Axis(title = "Close Price"))
        .configMark(color="red"),
        Layer()
        .mark(Line)
        .encodeX("Date",Ordinal,axis = Axis(grid = true))
        .encodeY("Predicted Close Price",Quant,scale = Scale(domainValues = List(1720, 1750.0)))
        .encodeShape(field = "Predicted Close Price",dataType = Quant,legend = Legend(orient = "left",title = "Predicted Price"))
        .configMark(color="green")
        )
    .show

### Creating a Features column

In [20]:
val vectorAssembler = new VectorAssembler().setInputCols(Array("Open","High","Low","Volume")).setOutputCol("features")

[36mvectorAssembler[39m: [32mVectorAssembler[39m = vecAssembler_569bc355ba43

In [21]:
val vectorizedDataFrame = vectorAssembler.transform(inputDataFrame)

[36mvectorizedDataFrame[39m: [32morg[39m.[32mapache[39m.[32mspark[39m.[32msql[39m.[32mpackage[39m.[32mDataFrame[39m = [Open: double, High: double ... 6 more fields]

In [22]:
vectorizedDataFrame.show(5)

+---------+---------+-------+--------+------+-------------------+---------------------+--------------------+
|     Open|     High|    Low|   Close|Volume|               Date|Predicted Close Price|            features|
+---------+---------+-------+--------+------+-------------------+---------------------+--------------------+
|  1733.58|  1733.58|1733.58| 1733.58| 375.0|2020-12-24 11:13:00|              1734.36|[1733.58,1733.58,...|
|1733.6266|1733.6266|1733.08|  1733.4|1318.0|2020-12-24 11:14:00|              1733.98|[1733.6266,1733.6...|
|  1733.03|  1733.03|1733.03| 1733.03| 655.0|2020-12-24 11:15:00|              1733.81|[1733.03,1733.03,...|
|1733.2177| 1734.165| 1732.7|1734.165|1142.0|2020-12-24 11:16:00|              1734.33|[1733.2177,1734.1...|
|  1733.59| 1734.165|1733.16| 1733.84|1400.0|2020-12-24 11:17:00|              1734.48|[1733.59,1734.165...|
+---------+---------+-------+--------+------+-------------------+---------------------+--------------------+
only showing top 5 

### Finding Relation Between Open,High,Low,Volume,Close

In [23]:
Vegas("Visualising Relation Between Open,High,Low,Volume,Close")
    .withDataFrame(vectorizedDataFrame)
    .encodeX(field = "features",Nominal,axis = Axis(title = "Features"),hideAxis = true)
    .encodeY(field = "Close", Quant,scale = Scale(domainValues = List(1720,1750.0)))
    .mark(Point)
    .show

### Relation Between Open,High,Low,Volume,Close And Predcited Close

In [24]:
Vegas.layered("Visualising Relation Between Open,High,Low,Volume,Close And Predcited Close")
    .withDataFrame(vectorizedDataFrame)
    .withLayers(
        Layer()
        .mark(Point)
        .encodeX("features",Nominal,axis = Axis(grid = true),hideAxis = true)
        .encodeY("Close",Quant,scale = Scale(domainValues = List(1720, 1750.0)),axis = Axis(title = "Close Price"))
        .configMark(color="red"),
        Layer()
        .mark(Line)
        .encodeX("features",Nominal,axis = Axis(grid = true),hideAxis = true)
        .encodeY("Predicted Close Price",Quant,scale = Scale(domainValues = List(1720, 1750.0)))
        .configMark(color="green")
        )
    .show
