In [None]:
%%cql select * from music.tracks_by_album limit 5

### Note: The newest versions of the kernel have a sqlContext already

### Create a dataframe on a cassandra table

In [None]:
val df = sqlContext.read.format("org.apache.spark.sql.cassandra").
   options(Map("keyspace"->"music", "table" -> "tracks_by_album")).load()	

### Explain the query plan and view some data

In [None]:
df.printSchema

In [None]:
df.explain

In [None]:
df.show

In [None]:
df.select("album_year").distinct.show

In [None]:
df.groupBy("album_year").count().show

### Group By Decade
You can use various spark sql functions.  Let's use *floor*.

In [None]:
import org.apache.spark.sql.functions._

In [None]:
df.groupBy(floor(df("album_year") / 10) * 10).count.show

### Clean it up

In [None]:
val tmp = df.groupBy((floor(df("album_year") / 10) * 10).cast("int").alias("decade")).count
tmp.show

In [None]:
val count_by_decade = tmp.select(tmp("decade"), tmp("count").alias("album_count"))
count_by_decade.show

### Save to a new table

In [None]:
%%cql create table if not exists steve.albums_by_decade  (decade int primary key, album_count int)

In [None]:
import org.apache.spark.sql.SaveMode

In [None]:
count_by_decade.write.format("org.apache.spark.sql.cassandra").
options(Map( "table" -> "albums_by_decade", "keyspace" -> "steve")).
mode(SaveMode.Overwrite).
save()

### Check on it

In [None]:
%%cql select * from steve.albums_by_decade