# Query a Table with Apache Cassandra

In [1]:
# Import Apache Cassandra

import cassandra

## Let's create a connection to the databasse


In [2]:
from cassandra.cluster import Cluster

try:
    cluster = Cluster(['127.0.0.1']) # If you have a locally installed Apache Cassandra instance
    session = cluster.connect()
except Exception as e:
    print(e)

('Unable to connect to any servers', {'127.0.0.1:9042': ConnectionRefusedError(10061, "Tried connecting to [('127.0.0.1', 9042)]. Last error: No connection could be made because the target machine actively refused it")})


### Let's test the connection

We are trying to do a select * on a table we have not yet created yet, we should expect to see a nicely handled error

In [3]:
# try:
#     session.execute("""select * from songs""")
# except Exception as e:
#     print(e)

### Let's create a keyspace to do our work in


In [4]:
try:
    session.execute("""
    create keyspace if not exists udacity
    with replication =
    { 'class' : 'SimpleStrategy', 'replication_factor' : 1}"""
                   )
except Exception as e:
    print(e)

### Connect to the keyspace. Compare this to how we had create a new session in PostgreSQL.

In [5]:
try:
    session.set_keyspace('udacity')
except Exception as e:
    print(e)

#### Unlike RDBM, We can't model our data and create our table with out more information NoSQL 

## What queries will I be performing on this data?


##### In this case I would like to be able to get every album that was released in a particular year.

```bash
select * from songs WHERE year=1970 and artist_name='The Beatles'
```

### Because of this I need to be able to do a WHERE on YEAR. YEAR will become my partition key, and artist name will be my clustering column to make each Primary key unique. Remember there are no duplicate in Apache Cassandra.

In [6]:
query = "CREATE TABLE IF NOT EXISTS songs"
query = query + "(year int, song_title text, artist_name text, album_name text, single boolean, PRIMARY KEY (year, artist_name))"

try:
    session.execute(query)
except Exception as e:
    print(e)

### Let's check if the table was created

```bash
select count (*)
```

This query shouldn't be tried on a large datasets, this is for demo sake

In [7]:
query = "select count(*) from songs"
try:
    count = session.execute(query)
except Exception as e:
    print(e)
    
print(count.one())

Row(count=0)


### Let's insert two rows

Note the syntax here

In [8]:
query = "INSERT INTO songs (year, song_title, artist_name, album_name, single)"
query = query + "VALUES (%s, %s, %s, %s, %s)"

try:
    session.execute(query, (1970, "Think for Yourself", "The Beatles", "Rubber Soul", False))
except Exception as e:
    print(e)
    

try:
    session.execute(query, (1970, "Across the Universe", "The Beatles", "Let it Be", False))
except Exception as e:
    print(e)

### Validate if the data was inserted 



In [9]:
query = 'SELECT * from songs'

try:
    rows = session.execute(query)
except Exception as e:
    print(e)
    
for row in rows:  # the for loop is for printing, it will not be required if executing in cqlsh
    print(row.year, row.album_name, row.artist_name)

1970 Let it Be The Beatles


### Let's Validate our data model with our original query

select * from music_library WHERE year=1970

In [10]:
query = "select * from songs WHERE year=1970 and artist_name='The Beatles'"

try:
    rows = session.execute(query)
except Exception as e:
    print(e)
    
for row in rows:  # the for loop is for printing, it will not be required if executing in cqlsh
    print(row.year, row.album_name, row.artist_name)

1970 Let it Be The Beatles


### For the sake of the demo, let's drop the table

In [11]:
query = "drop table songs"

try:
    rows = session.execute(query)
except Exception as e:
    print(e)

### And Finally close the session and cluster connection

In [12]:
session.shutdown()
cluster.shutdown()