In [None]:
%%configure -f 
{ "numExecutors":16, "executorMemory":"4G", "executorCores":1, "driverMemory":"4G", "driverCores":4,
 "conf":{"spark.streaming.receiver.writeAheadLog.enable":"true"}, 
 "jars": ["wasb:///powerbi/adal4j-1.1.2.jar", "wasb:///powerbi/microsoft-spark-powerbi-connector_2.10-0.6.0.jar",
          "wasb:///streaming/spark-streaming-eventhubs_2.10-1.0.0.jar"] }

In [None]:
// Case class defining event structure

case class EventContent(EventDetails: String)

// Eventhubs related settings. It is recommended that the number of executors be at least double the number of
// eventhubs partitions. Number of executors are set in the configure statement above. For this example
// number of executor count is set at 16 since the number of partitions in the eventhubs used is 8.

val eventhubsNamespace: String = "***Enter Service Bus namespace here***"
val eventhubsName: String = "***Enter Eventhubs name here***"
val eventhubsReceivePolicyName: String = "***Enter Eventhubs receive policy name***"
val eventhubsReceivePolicyKey: String = "Enter Eventhubs receive policy key***"
val eventhubsConsumerGroup: String = "$default"
val eventhubsPartitionCount: String = "8"
val eventhubsCheckpointIntervalInSeconds: String = "15"
val eventhubsCheckpointDirectory: String = "/EventCheckpoint-15-8-16"
val eventhubsDataDirectory: String = "/EventCount-15-8-16/EventCount15"

// Streaming related settings

val batchingIntervalInSeconds: Int = 15
val windowingIntervalInSeconds: Int = 15
val runDurationInSeconds: Int = 3600

// Powerbi authentication related settings

val powerbiClientId = "***Enter PowerBI Client Id here***" 
val powerbiAccountUsername = "***Enter PowerBI Account Username here***"
val powerbiAccountPassword = "***Enter PowerBI Account Password here***"

//Powerbi data related settings

val powerbiDatasetName: String = "Eventhubs Spark Streaming Metrics"


In [None]:
//Method to create the streaming context and process the streaming events. The number of events processed per batch (using count)
//and the cumulative number of events processed since start (using updateStateByKey) are the two metrics sent to PowerBI.   

import org.apache.spark.streaming.{Seconds, StreamingContext}
import org.apache.spark.streaming.dstream.DStream
import org.apache.spark.streaming.eventhubs.EventHubsUtils

import com.microsoft.spark.powerbi.authentication._
import com.microsoft.spark.powerbi.models._
import com.microsoft.spark.powerbi.common._

def createStreamingContext(powerbiDatasetDetails: PowerBIDatasetDetails, powerbiTableList: List[table],
                           powerBIAuthentication: PowerBIAuthentication): StreamingContext = {

   
    val streamLengthKey: String = "StreamLength"

    val streamLength = (values: Seq[Long], state: Option[Long]) => {
          val currentCount = values.foldLeft(0L)(_ + _)
          val previousCount = state.getOrElse(0L)
          Some(currentCount + previousCount)
    }
    
    val eventHubsParameters = Map[String, String](
      "eventhubs.namespace" -> eventhubsNamespace,
      "eventhubs.name" -> eventhubsName,
      "eventhubs.policyname" -> eventhubsReceivePolicyName,
      "eventhubs.policykey" -> eventhubsReceivePolicyKey,
      "eventhubs.consumergroup" -> eventhubsConsumerGroup,
      "eventhubs.partition.count" -> eventhubsPartitionCount,
      "eventhubs.checkpoint.interval" -> eventhubsCheckpointIntervalInSeconds,
      "eventhubs.checkpoint.dir" -> eventhubsCheckpointDirectory
    )
    
    val streamingContext = new StreamingContext(sc, Seconds(batchingIntervalInSeconds))

    val eventHubsStream = EventHubsUtils.createUnionStream(streamingContext, eventHubsParameters)

    val eventHubsWindowedStream: DStream[Array[Byte]] = eventHubsStream.window(Seconds(windowingIntervalInSeconds))

    import com.microsoft.spark.powerbi.extensions.DStreamExtensions._

    // Save the number of events received per batching interval to PowerB
    
    eventHubsWindowedStream.map(m => EventContent(new String(m))).countTimelineToPowerBI(powerbiDatasetDetails,
        powerbiTableList.head, powerBIAuthentication)

    val batchEventCountDStream = eventHubsWindowedStream.count()

    batchEventCountDStream.print()

    // Count number of events received so far and save to PowerBI and default storage

    import com.microsoft.spark.powerbi.extensions.PairedDStreamExtensions._

    val totalEventCountMappedDStream = eventHubsWindowedStream.map(m => (streamLengthKey, 1L))
    val totalEventCountDStream: DStream[(String, Long)]
    = totalEventCountMappedDStream.updateStateByKey[Long](streamLength)

    totalEventCountDStream.stateTimelineToPowerBI(powerbiDatasetDetails, powerbiTableList.last, powerBIAuthentication)

    totalEventCountDStream.print()

    // Save the cumulative count of events

    totalEventCountDStream.saveAsTextFiles(eventhubsDataDirectory)
    
    streamingContext.checkpoint(eventhubsCheckpointDirectory)

    streamingContext
  }

In [None]:
//Initialize PowerBI Authentication and create or get PowerBI dataset and table

def initializeAuthentication(): PowerBIAuthentication = {

    val powerBIAuthentication: PowerBIAuthentication = new PowerBIAuthentication(
      PowerBIURLs.Authority,
      PowerBIURLs.Resource,
      powerbiClientId,
      powerbiAccountUsername,
      powerbiAccountPassword
    )

    powerBIAuthentication
}

val powerbiAuthentication: PowerBIAuthentication = initializeAuthentication()

val batchEventCountColumns = Map("Timestamp" -> PowerBIDataTypes.DateTime.toString(),
                                 "Batch Event Count" -> PowerBIDataTypes.Int64.toString())

val batchEventCountTable = PowerBIUtils.defineTable("BatchEventCountTable", batchEventCountColumns)

val cumulativeEventCountColumns = Map("Timestamp" -> PowerBIDataTypes.DateTime.toString(),
                                      "Cumulative Event Count" -> PowerBIDataTypes.Int64.toString())

val cumulativeEventCountTable = PowerBIUtils.defineTable("CumulativeEventCountTable", cumulativeEventCountColumns)

val powerbiTableList = List[table](batchEventCountTable, cumulativeEventCountTable)

val powerbiDatasetDetails = PowerBIUtils.getOrCreateDataset(powerbiDatasetName, powerbiTableList, PowerBIOptions.basicFIFO,
      powerbiAuthentication.getAccessToken())

println(powerbiDatasetDetails)

In [None]:
 //Create or recreate streaming context using the same checkpoint directory specified as Eventhubs receiver parameter

val streamingContext = StreamingContext.getOrCreate(eventhubsCheckpointDirectory, () => createStreamingContext(powerbiDatasetDetails, powerbiTableList, powerbiAuthentication))

streamingContext.start()

streamingContext.awaitTerminationOrTimeout(runDurationInSeconds * 1000)