#### Import Python packages 

In [None]:
# Import Python packages 
import pandas as pd
import numpy as np
import cassandra

# Part II. Cassandra Coding. 

In part I we have prepared the original source data. Now we are ready to work with the CSV file titled `event_datafile_new.csv`, located in `data/`.  The `event_datafile_new.csv` contains the following columns: 

- artist 
- firstName of user
- gender of user
- item number in session
- last name of user
- length of the song
- level (paid or free song)
- location of the user
- sessionId
- song title
- userId

The image below is a screenshot of  the denormalized data in `event_datafile_new.csv`:

<img src="images/image_event_datafile_new.jpg">

### Load data to pandas.DataFrame

We will insert our data into the cassandra tables using pandas.

In [None]:
# Read original data from csv to pandas.DataFrame
file = 'event_datafile_new.csv'
filepath = '../data/' + file
df = pd.read_csv(filepath)

### Data Model

We will create three tables: Sessions, Users and Songs. In NoSQL / Cassandra it is common practice to create one table per query. Every table is created differently, especially in regards to "primary key" and "clustering column". Here are the details for every table:

1. __Sessions:__ Give me the artist, song title and song's length in the music app history that was heard during  sessionId = 338, and itemInSession  = 4
2. __Users:__ Give me only the following: name of artist, song (sorted by itemInSession) and user (first and last name) for userid = 10, sessionid = 182
3. __Songs:__ Give me every user name (first and last) in my music app history who listened to the song 'All Hands Against His Own'




### Create a Cluster

In [2]:
# This should make a connection to a Cassandra instance on your local machine (127.0.0.1)
from cassandra.cluster import Cluster
cluster = Cluster()

# To establish connection and begin executing queries, you need a session
session = cluster.connect()

### Create a Keyspace

In [None]:
try:
    session.execute("""
    CREATE KEYSPACE IF NOT EXISTS sparkify_db 
    WITH REPLICATION = 
    { 'class' : 'SimpleStrategy', 'replication_factor' : 1 }"""
)

except Exception as e:
    print(e)

### Set Keyspace

In [None]:
try:
    session.set_keyspace('sparkify_db')
except Exception as e:
    print(e)

In [None]:
# Read original data from csv to pandas.DataFrame
file = 'event_datafile_new.csv'
filepath = '../data/' + file
df = pd.read_csv(filepath)

### Create Tables «Sessions», «Users» & «Songs»

#### 1. Create Tables

In [None]:
session.execute("""
CREATE TABLE IF NOT EXISTS sessions
(
  artist text, 
  song text, 
  length decimal, 
  session_id int, 
  item_in_session int,
  PRIMARY KEY (session_id, item_in_session)
)
""")

In [None]:
session.execute("""
CREATE TABLE IF NOT EXISTS users
(
  artist text, 
  song text, 
  user_id int,
  user text,
  session_id int,
  item_in_session int,
  PRIMARY KEY ((user_id, session_id), item_in_session)
)
""")

In [None]:
session.execute("""
CREATE TABLE IF NOT EXISTS songs
(
  artist text, 
  song text, 
  user_id int,
  user text,
  PRIMARY KEY ((song), artist, user_id)
)
""")

#### 2. Insert Data Into The Tables

In [None]:
sessions = df[['artist', 'song', 'length', 'sessionId', 'itemInSession']]
for row in sessions.iterrows():
    query = """
    INSERT INTO sessions (artist, song, length, session_id, item_in_session)
    VALUES (%s, %s, %s, %s, %s)"""
    data_as_tuple = tuple(row[1].values)
    session.execute(query, data_as_tuple)

In [None]:
users = df.loc[:, ['artist', 'song', 'firstName', 'lastName', 'userId', 'sessionId', 'itemInSession']]
users['user'] = users['firstName'] + ' ' + users['lastName']
users = users.drop(columns=['firstName', 'lastName'])

query = """
INSERT INTO users (artist, song, user_id, session_id, item_in_session, user)
VALUES (%s, %s, %s, %s, %s, %s)"""

for row in users.iterrows():
    data_as_tuple = tuple(row[1].values)
    session.execute(query, data_as_tuple)

In [None]:
songs = df.loc[:, ['artist', 'song', 'firstName', 'lastName', 'userId']]
songs['user'] = songs['firstName'] + ' ' + songs['lastName']
songs = songs.drop(columns=['firstName', 'lastName'])

query = """
INSERT INTO songs (artist, song, user_id, user)
VALUES (%s, %s, %s, %s)"""

for row in songs.iterrows():
    data_as_tuple = tuple(row[1].values)
    session.execute(query, data_as_tuple)

#### 3. Select Data From The Tables

In [None]:
query = """
SELECT artist, song, length
FROM sessions 
WHERE 
  session_id=338
  AND item_in_session=4
"""
try:
    rows = session.execute(query)
except Exception as e:
    print(e)
else:
    for row in rows:
        print (row.artist, row.song, row.length)       

In [None]:
query = """
SELECT artist, song, user
FROM users 
WHERE 
  user_id = 10
  AND session_id = 182
"""
try:
    rows = session.execute(query)
except Exception as e:
    print(e)
else:   
    for row in rows:
        print (row.artist, row.song, row.user)       

In [None]:
query = """
SELECT user
FROM songs 
WHERE 
  song = 'All Hands Against His Own'
"""
try:
    rows = session.execute(query)
except Exception as e:
    print(e)
else:   
    for row in rows:
        print (row.user)       

### Drop the tables before closing out the sessions

In [None]:
query = "DROP TABLE sessions"
try:
    session.execute(query)
except Exception as e:
    print(e)

In [None]:
query = "DROP TABLE users"
try:
    session.execute(query)
except Exception as e:
    print(e)

In [None]:
query = "DROP TABLE songs"
try:
    session.execute(query)
except Exception as e:
    print(e)

### Close the session and cluster connection¶

In [None]:
session.shutdown()
cluster.shutdown()