# Lesson 3 Exercise 1: Three Queries Three Tables
<img src="../../../images/cassandra_logo.png" width="250" height="250">

In [1]:
from src.utils.cassandra.generic_commands import CassandraCommands

#### Create a connection to the database

In [2]:
cassandra = CassandraCommands()

CassandraCommands class initiated


In [3]:
cassandra.connect('127.0.0.1')

Connection setup with cassandra at 127.0.0.1


#### Create and connect to keyspace

In [4]:
cassandra.create_keyspace('data_engeneering_nano_degree')

Keyspace named data_engeneering_nano_degree created


In [5]:
cassandra.connect_keyspace('data_engeneering_nano_degree')

Connected to keyspace: data_engeneering_nano_degree


#### Let's imagine we would like to start creating a Music Library of albums. 

We want to ask 3 questions of the data
##### 1. Give every album in the music library that was released in a given year
`select * from music_library WHERE YEAR=1970`
##### 2. Give every album in the music library that was created by a given artist  
`select * from artist_library WHERE artist_name="The Beatles"`
##### 3. Give all the information from the music library about a given album
`select * from album_library WHERE album_name="Close To You"`


#### Because we want to do three different queries, we will need different tables that partition the data differently (please not that these images do not reflect the data inserted)
<img src="../../../images/non_relational_table_1.png" width="350" height="350">
<img src="../../../images/non_relational_table_2.png" width="350" height="350">
<img src="../../../images/non_relational_table_3.png" width="550" height="550">

#### Create the tables

In [7]:
# cassandra.drop_table('music_library')
# cassandra.drop_table('artist_library')
# cassandra.drop_table('album_library')

In [8]:
cassandra.create_table(table_name='music_library', schema='(year int, artist_name varchar, album_name varchar, PRIMARY KEY (year, album_name))')

Table named music_library created


In [9]:
cassandra.create_table(table_name='artist_library', schema='(year int, artist_name varchar, album_name varchar, PRIMARY KEY (artist_name, year))')

Table named artist_library created


In [10]:
cassandra.create_table(table_name='album_library', schema='(year int, artist_name varchar, album_name varchar, PRIMARY KEY (album_name, artist_name))')

Table named album_library created


#### Insert data into the tables

In [13]:
# Music library
cassandra.insert_rows(table_name='music_library', 
                      columns='(year, artist_name, album_name)',
                      rows=[
                           (1970, 'The Beatles', 'Let it Be'),
                           (1965, 'The Beatles', 'Rubber Soul'),
                           (1965, 'The Who', 'My Generation'), 
                           (1966, 'The Monkees', 'The Monkees'),
                           (1970, 'The Carpenters', 'Close To You')
                           ]
                     )              

In [15]:
# Artist library
cassandra.insert_rows(table_name='artist_library', 
                      columns='(artist_name, year, album_name)',
                      rows=[
                           ('The Beatles', 1970, 'Let it Be'),
                           ('The Beatles', 1965, 'Rubber Soul'),
                           ('The Who', 1965, 'My Generation'), 
                           ('The Monkees', 1966, 'The Monkees'),
                           ('The Carpenters', 1970, 'Close To You')
                           ]
                     )              

In [17]:
# Album library
cassandra.insert_rows(table_name='album_library', 
                      columns='(artist_name, album_name, year)',
                      rows=[
                           ('The Beatles', 'Let it Be', 1970),
                           ('The Beatles', 'Rubber Soul', 1965),
                           ('The Who', 'My Generation', 1965), 
                           ('The Monkees', 'The Monkees', 1966),
                           ('The Carpenters', 'Close To You', 1970)
                           ]
                     )              

This might have felt unnatural to insert duplicate data into the tables. If I just normalized these tables, I wouldn't have to have extra copies! While this is true, remember there are no `JOINS` in Apache Cassandra. For the benefit of high availibity and scalabity, denormalization must be how this is done. 


#### Validate the data

In [19]:
cassandra.custom_query(query='select * from music_library WHERE YEAR=1970')

[Row(year=1970, album_name='Close To Your', artist_name='The Carpenters'),
 Row(year=1970, album_name='Let it Be', artist_name='The Beatles')]

In [22]:
cassandra.custom_query(query="select * from artist_library WHERE artist_name='The Beatles'")

[Row(artist_name='The Beatles', year=1965, album_name='Rubber Soul'),
 Row(artist_name='The Beatles', year=1970, album_name='Let it Be')]

In [24]:
cassandra.custom_query(query="select * from album_library WHERE album_name='Close To You'")

[Row(album_name='Close To Your', artist_name='The Carpenters', year=1970)]

In [None]:
query = "select * from ##### WHERE #####'"
try:
    rows = session.execute(query)
except Exception as e:
    print(e)
    
for row in rows:
    print (row.artist_name, row.year, row.album_name)

#### Close the session and cluster connection

In [25]:
cassandra.close_connection()

Closed session
Closed cluster connection
