In [5]:
case class OrderLine (
    sku : String,
    productName : String,
    thumbnailImage: String,
    quantity : Double,
    unitPrice : Double,
    totalPrice : Double
)

case class Order (
    customerId : java.util.UUID ,
    orderId : java.util.UUID ,
    date : java.util.Date ,
    OrderLines_ : List[OrderLine] ,
    totalPrice : Double
)

case class RecommendedProduct (
    sku : String,
    productName : String,
    regularPrice : Double,
    thumbnailImage : String
);

case class ProductRecommendations (
    sku : String,
    productName : String,
    recommendedProducts : List[RecommendedProduct]
);



In [11]:
val orders = sc.cassandraTable[Order]("retail_ks","orders").cache

In [12]:
orders.count

Long = 2106

In [13]:
val orderlines = orders.flatMap ( order => order.OrderLines_).map( ol => (ol.sku, (ol.productName, ol.thumbnailImage, ol.quantity, ol. unitPrice, ol.totalPrice)))

In [19]:
orderlines.take(10)

Array[(String, (String, String, Double, Double, Double))] = Array((4204502,(Insignia™ - 40" Class (40" Diag.) - LED - 1080p - Smart - HDTV Roku TV - Black,http://images.bestbuy.com/BestBuy_US/images/pac/products/1313/1313521268/1313521268_s.gif,73.0,329.99,24089.27)), (1654884,(CorLiving - Full-Motion TV Wall Mount for Most 23" - 42" Flat-Panel TVs - Black,http://images.bestbuy.com/BestBuy_US/images/products/1654/1654884_s.gif,9.0,39.99,359.91)), (5957224,(Cambridge Audio - Topaz CD10 CD Player - Black,http://images.bestbuy.com/BestBuy_US/images/products/5957/5957224_s.gif,9.0,349.99,3149.91)), (2750462,(BIC America - 5-1/4" 2-Way Center-Channel Speaker - Black,http://images.bestbuy.com/BestBuy_US/images/products/2750/2750462_s.gif,9.0,89.99,809.91)), (5009300,(Bose® - Solo Sound...

In [15]:
val soldproducts = orderlines.reduceByKey( (a,b) => (a._1, a._2, a._3+b._3, a._4, a._5+b._5)).
    map( { case ( sku, (productName, thumbnailImage, count, unitPrice, value) ) => (sku, productName, thumbnailImage, count, value) })

In [16]:
val Top50CountSellingProducts = soldproducts.sortBy( { case ( sku, productName, thumbnailImage, count, value ) => -value } ).
    zipWithIndex.
    filter{case (_, idx) => idx < 50}.
    keys

In [20]:
Top50CountSellingProducts.take(10)

Array[(String, String, String, Double, Double)] = Array((7739048,Samsung - 78" Class (78" Diag.) - LED - Curved - 2160p - Smart - 3D - 4K Ultra HD TV - Black,http://images.bestbuy.com/BestBuy_US/images/products/7739/7739048_s.gif,1190.0,1.18999762E7), (4920300,LG - 65" Class (64.5" Diag.) - OLED - 2160p - Smart - 3D - 4K Ultra HD TV - Black,http://images.bestbuy.com/BestBuy_US/images/products/4920/4920300_s.gif,1229.0,9831975.42), (3429088,LG - 65" Class (64.5" Diag.) - OLED - Curved - 2160p - Smart - 3D - 4K Ultra HD TV - Black,http://images.bestbuy.com/BestBuy_US/images/products/3429/3429088_s.gif,892.0,6243982.159999999), (5034600,Samsung - 65" Class - (64.5" Diag.) - LED - Curved - 4K SUHD (2160p) - Smart - 4K Ultra HD TV - Black,http://images.bestbuy.com/BestBuy_US/images/pr...

In [18]:
Top50CountSellingProducts.saveToCassandra("retail_ks","top50_selling_products", 
    SomeColumns("sku", "product_name" , "thumbnail_image" , "sale_count" , "sale_value"))

In [10]:
val ProductCoOccurance = orders.
    flatMap( order => order.OrderLines_.
        map(  ol => ( ol.sku , order.OrderLines_.
            map(ol => (ol.sku, (ol.productName, ol.thumbnailImage, ol.quantity, ol. unitPrice, ol.totalPrice))).filter ( ol3 => ol3._1!=`ol`.sku) 
                ) 
            )
            ).reduceByKey(_++_).mapValues (
                _.groupBy( { case (sku,(pn, ti, q, up, tp)) => sku } ).values.toList.
                    map( listOfProduct => listOfProduct.reduce( (a,b) => (a._1, (a._2._1, a._2._2, a._2._3+b._2._3, a._2._4, a._2._5+b._2._5))) ).
                    sortBy(-_._2._5).
                    map( lop => RecommendedProduct(lop._1, lop._2._1, lop._2._4  , lop._2._2  ))  
            ).
            map( pco => ProductRecommendations(pco._1, "", pco._2))

In [11]:
ProductCoOccurance

org.apache.spark.rdd.RDD[productRecommendations] = MapPartitionsRDD[15] at map at <console>:39

In [12]:
ProductCoOccurance.collect.foreach(println)

productRecommendations(a4a70900-24e1-11df-8924-001ff3591713,,List(recommendedProduct(a4a70900-24e1-11df-8924-001ff3591714,Product3,5.0,http), recommendedProduct(a4a70900-24e1-11df-8924-001ff3591712,Product1,10.0,http), recommendedProduct(a4a70900-24e1-11df-8924-001ff3591715,Product4,1.0,http)))
productRecommendations(a4a70900-24e1-11df-8924-001ff3591714,,List(recommendedProduct(a4a70900-24e1-11df-8924-001ff3591713,Product2,1000.0,http), recommendedProduct(a4a70900-24e1-11df-8924-001ff3591712,Product1,5.0,http), recommendedProduct(a4a70900-24e1-11df-8924-001ff3591715,Product4,1.0,http)))
productRecommendations(a4a70900-24e1-11df-8924-001ff3591712,,List(recommendedProduct(a4a70900-24e1-11df-8924-001ff3591713,Product2,1000.0,http), recommendedProduct(a4a70900-24e1-11df-8924-001ff3591714,Product3,5.0,http)))
productRecommendations(a4a70900-24e1-11df-8924-001ff3591715,,List(recommendedProduct(a4a70900-24e1-11df-8924-001ff3591714,Product3,5.0,http), recommendedProduct(a4a70900-24e1-11df-8924

In [13]:
ProductCoOccurance.saveToCassandra("retail_ks","product_recommendations")

Name: java.lang.IllegalArgumentException
Message: Failed to get converter for field "recommendedProducts" of type scala.List[recommendedProduct] in productRecommendations mapped to column "recommended_products" of "retail_ks.product_recommendations" 
StackTrace: com.datastax.spark.connector.writer.MappedToGettableDataConverter$$anon$1$$anonfun$5.apply(MappedToGettableDataConverter.scala:155)
com.datastax.spark.connector.writer.MappedToGettableDataConverter$$anon$1$$anonfun$5.apply(MappedToGettableDataConverter.scala:148)
scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
scala.collection.immutable.Range.foreach(Range.scala:141)
scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
scala.collection.AbstractTraversable.map(Traversable.scala:105)
com.datastax.spark.connector.writer.MappedToGettableDataConverter$$anon$1.<init>(MappedToGettableDataConverter.scala:14

In [15]:
sc.cassandraTable[productRecommendations]("retail_ks","product_recommendations").collect

Array[productRecommendations] = Array(productRecommendations(a4a70900-24e1-11df-8924-001ff3591712,Product1,List(recommendedProduct(a4a70900-24e1-11df-8924-001ff3591713,Product2,100.0,http), recommendedProduct(a4a70900-24e1-11df-8924-001ff3591714,Product3,100.0,http))))