In [1]:
# CREATE CASSANDRA CONNECTION
import cassandra
from cassandra.cluster import Cluster
try: 
    cluster = Cluster(['127.0.0.1'])
    session = cluster.connect()
except Exception as e:
    print(e)

In [2]:
# CREATE A KEYSPACE TO DO OUR WORK
try:
    session.execute("""CREATE KEYSPACE IF NOT EXISTS udacity WITH REPLICATION = {'class': 'SimpleStrategy', 'replication_factor':1}""")
except Exception as e:
    print(e)

In [3]:
# SET THE KEYSPACE/SCHEMA NAME
try:
    session.set_keyspace('udacity')
except Exception as e:
    print(e)

Imagine we would like to start creating a new Music Library of albums. We are going to work with one of the queries from 2.0.
- We want to ask 1 question of our data 
    - Give me every album in my music library that was released in a given year
    - select * from music_library WHERE YEAR=1970

How should the data be modeled? What should be our Primary Key and Partition Key? Since our data is looking for the YEAR let's start with that. Is Partitioning our data by year a good idea? In this case our data is very small, but if we had a larger data set of albums partitions by YEAR might be a find choice. We would need to validate from our dataset. We want an equal spread of the data. 

Table Name: music_library
- column 1: Year
- column 2: Artist Name
- column 3: Album Name
- column 4: City
- PRIMARY KEY(year)

In [4]:
query = "CREATE TABLE IF NOT EXISTS music_library "
query = query + "(year int, artist_name text, album_name text, city text, PRIMARY KEY (year))"
try:
 session.execute(query)
except Exception as e:
 print(e)

In [7]:
query = "INSERT INTO music_library (year, artist_name, album_name, city)"
query = query + " VALUES (%s, %s, %s, %s)"

try:
 session.execute(query, (1970, "The Beatles", "Let it Be", "Liverpool"))
except Exception as e:
 print(e)

try:
 session.execute(query, (1965, "The Beatles", "Rubber Soul", "Oxford"))
except Exception as e:
 print(e)

try:
 session.execute(query, (1965, "The Who", "My Generation", "London"))
except Exception as e:
 print(e)

try:
 session.execute(query, (1966, "The Monkees", "The Monkees", "Los Angeles "))
except Exception as e:
 print(e)

try:
 session.execute(query, (1970, "The Carpenters", "Close To You", "San Die")) 
except Exception as e:
 print(e)

In [8]:
# THE PROBLEM ABOVE IS WITH THE PRIMARY KEY BEING YEAR IT WILL OVERWRITE PREVIOUS DATA WITH THAT KEY
query = "select * from music_library WHERE YEAR=1965" 
try:
 rows = session.execute(query)
except Exception as e:
 print(e)

for row in rows:
 print (row.year, row.artist_name, row.album_name, row.city)

1965 The Who My Generation London


Try Again. Focus on making the PRIMARY KEY unique. Look at our dataset do we have anything that is unique for each row? 
- We have a couple of options (City and Album Name) but that will not get us the query we need which is looking for album's in a particular year. 
- Let's make a composite key of the `YEAR` AND `ALBUM NAME`. This is assuming that an album name is unique to the year it was released. 

In [9]:
query = "CREATE TABLE IF NOT EXISTS music_library1"
query = query + "(year int, artist_name text, album_name text, city text, PRIMARY KEY (year, album_name))"
try:
 session.execute(query)
except Exception as e:
 print(e)

In [10]:
query = "INSERT INTO music_library1 (year, artist_name, album_name, city)"
query = query + " VALUES (%s, %s, %s, %s)"

try:
 session.execute(query, (1970, "The Beatles", "Let it Be", "Liverpool"))
except Exception as e:
 print(e)

try:
 session.execute(query, (1965, "The Beatles", "Rubber Soul", "Oxford"))
except Exception as e:
 print(e)

try:
 session.execute(query, (1965, "The Who", "My Generation", "London"))
except Exception as e:
 print(e)

try:
 session.execute(query, (1966, "The Monkees", "The Monkees", "Los Angeles "))
except Exception as e:
 print(e)

try:
 session.execute(query, (1970, "The Carpenters", "Close To You", "San Die")) 
except Exception as e:
 print(e)

In [11]:
# CAN NOW QUERY BOTH RECORDS BECAUSE THERE IS UNIQUE PRIMARY KEY
query = "select * from music_library1 WHERE album_name='Let it Be' ALLOW FILTERING" 
try:
 rows = session.execute(query)
except Exception as e:
 print(e)

for row in rows:
 print (row.year, row.artist_name, row.album_name, row.city)

1970 The Beatles Let it Be Liverpool


In [52]:
try:
 session.execute('Drop table music_library')
except Exception as e:
 print(e)

try:
 session.execute('Drop table music_library1')
except Exception as e:
 print(e)

In [12]:
# CLOSE THE SESSION AND CLUSTER CONNECTION
session.shutdown()
cluster.shutdown()