## Cassandra DB

In [166]:
from cassandra.cluster import Cluster
import uuid
import datetime
from decimal import Decimal 

In [167]:
uuid.uuid4()

UUID('cf33f87d-76d9-4776-b62a-262ae0bc6680')

In [168]:
cluster = Cluster(["127.0.0.1"],port=9042)

In [169]:
session = cluster.connect()

### 1. Data Model Creation:

Design a data model for an e-commerce platform to handle products, orders, and user information.

Define appropriate column families and primary keys to ensure efficient querying.

In [5]:
create_keyspace = """
    CREATE KEYSPACE ecommerce
    WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1};
"""

session.execute(create_keyspace)

<cassandra.cluster.ResultSet at 0x2a0c714c5b0>

In [6]:
session.execute("USE ecommerce")

<cassandra.cluster.ResultSet at 0x2a0c714c5e0>

In [102]:
session.execute(""" 
CREATE TABLE products_orders (
    order_id UUID PRIMARY KEY,
    order_date timestamp,
    product_id UUID,
    product_name text,
    unit_price decimal,
    user_id UUID,
    user_name text,
    total decimal,
    amount int)
    """)

<cassandra.cluster.ResultSet at 0x2a0c7337220>

### 2. Data Insertion and Retrieval:

Insert sample data into the Cassandra database, including user information and product details.

In [108]:
sample_orders = [
    {
        'order_id': uuid.uuid4(),
        'order_date': datetime.datetime(2023, 8, 17, 10, 0, 0),
        'product_id': uuid.uuid4(),
        'product_name': 'Laptop',
        'unit_price': Decimal('999.99'),
        'user_id': uuid.uuid4(),
        'user_name': 'John Doe',
        'total': Decimal('1499.98'),
        'amount': 2
    },
    {
        'order_id': uuid.uuid4(),
        'order_date': datetime.datetime(2023, 8, 18, 15, 30, 0),
        'product_id': uuid.uuid4(),
        'product_name': 'T-Shirt',
        'unit_price': Decimal('19.99'),
        'user_id': uuid.uuid4(),
        'user_name': 'Jane Smith',
        'total': Decimal('39.98'),
        'amount': 2
    },
    {
        'order_id': uuid.uuid4(),
        'order_date': datetime.datetime(2023, 8, 19, 12, 0, 0),
        'product_id': uuid.uuid4(),
        'product_name': 'Book',
        'unit_price': Decimal('14.99'),
        'user_id': uuid.uuid4(),
        'user_name': 'Alice Johnson',
        'total': Decimal('29.98'),
        'amount': 2
    },
    {
        'order_id': uuid.uuid4(),
        'order_date': datetime.datetime(2023, 8, 20, 9, 0, 0),
        'product_id': uuid.uuid4(),
        'product_name': 'Smartphone',
        'unit_price': Decimal('699.99'),
        'user_id': uuid.uuid4(),
        'user_name': 'Bob Williams',
        'total': Decimal('1399.98'),
        'amount': 2
    },
    {
        'order_id': uuid.uuid4(),
        'order_date': datetime.datetime(2023, 8, 21, 14, 0, 0),
        'product_id': uuid.uuid4(),
        'product_name': 'Shoes',
        'unit_price': Decimal('59.99'),
        'user_id': uuid.uuid4(),
        'user_name': 'Eve Davis',
        'total': Decimal('119.98'),
        'amount': 2
    }
]

In [109]:
insert_query = """
    INSERT INTO products_orders (
        order_id, order_date, product_id, product_name,
        unit_price, user_id, user_name, total, amount
    )
    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
"""

In [110]:
orders_stmt = session.prepare(insert_query)

In [111]:
for order in sample_orders:
    session.execute(
        orders_stmt,
        (
            order['order_id'], order['order_date'], order['product_id'], order['product_name'],
            order['unit_price'], order['user_id'], order['user_name'], order['total'], order['amount']
        ))

In [112]:
result = session.execute("""SELECT * FROM products_orders""")
for row in result:
    print(row.order_id, row.order_date, row.product_id, row.product_name,
        row.unit_price, row.user_id, row.user_name, row.total, row.amount)

10b047ba-5319-46a8-b479-f0b54db00744 2023-08-19 12:00:00 0996f9cd-a479-4689-a852-df5cdb02148c Book 14.99 3f8fb574-381f-4858-9a6b-41f830b177b4 Alice Johnson 29.98 2
94a7216f-687e-4608-9e49-bc348ffa1bbe 2023-08-17 10:00:00 86b6fb06-9295-4ca4-8077-9c38a23da3b3 Laptop 999.99 b9f1bdc4-2578-4fc6-bdd1-0478616d5eba John Doe 1499.98 2
bed13552-7793-4462-8039-7899e8628f77 2023-08-20 09:00:00 02c47bde-b4f6-42f1-a3b6-9a5d53d7c5d3 Smartphone 699.99 7eaa35ca-976b-4cc7-982f-5096486682aa Bob Williams 1399.98 2
9eb5d7b5-4dba-4a8f-859c-8c25a9351096 2023-08-21 14:00:00 7e591a10-8848-45d9-a641-b1a03ac5d062 Shoes 59.99 00e235ff-10c6-4b86-97be-e402d6cd467c Eve Davis 119.98 2
d0cf8cc0-ab81-4892-a15f-c9414b121ef5 2023-08-18 15:30:00 88ff1188-e4f1-4d8c-8977-f718692a214b T-Shirt 19.99 65a97b0d-c7a1-4631-b0a0-3a8252d535a4 Jane Smith 39.98 2


Retrieve a user's order history using CQL (Cassandra Query Language).

In [121]:
user_id = '65a97b0d-c7a1-4631-b0a0-3a8252d535a4'  


select_query = f"""
    SELECT order_date, user_name, product_name, total, amount
    FROM products_orders
    WHERE user_id = {user_id} ALLOW FILTERING
"""

In [122]:
result_set = session.execute(select_query)

In [123]:
for row in result_set:
    print("Order Date: ", row.order_date)
    print("User: ", row.user_name)
    print("Product: ", row.product_name)
    print("Total: ", row.total)
    print("Amount: ", row.amount)
    print("----------------------")

Order Date:  2023-08-18 15:30:00
User:  Jane Smith
Product:  T-Shirt
Total:  39.98
Amount:  2
----------------------


### 3. Time-Series Data:
    
Design a schema to handle time-series data, such as IoT sensor readings.


In [170]:
session.execute("""CREATE KEYSPACE IF NOT EXISTS sample_data 
                WITH replication = {'class': 'SimpleStrategy', 'replication_factor': 1}""")

<cassandra.cluster.ResultSet at 0x2a0c80083a0>

In [171]:
session.execute("USE sample_data")

<cassandra.cluster.ResultSet at 0x2a0c7250880>

In [204]:
create_table_query = """
    CREATE TABLE IF NOT EXISTS latest_events_by_device (
    device_id  UUID,
    current_time  TIMESTAMP,
    state      TEXT,
    value      TEXT,
    PRIMARY KEY((device_id))
)
"""
session.execute(create_table_query)

<cassandra.cluster.ResultSet at 0x2a0c740d060>

Insert and retrieve time-series data efficiently, using appropriate time-based partitioning.

In [173]:
# Sample data for insertion
sample_data = [
    {'device_id': uuid.uuid4(), 'current_time': datetime.datetime.now(), 'state': 'active', 'value': '23.5'},
    {'device_id': uuid.uuid4(), 'current_time': datetime.datetime.now(), 'state': 'inactive', 'value': '18.2'},
    {'device_id': uuid.uuid4(), 'current_time': datetime.datetime.now(), 'state': 'active', 'value': '29.7'},
    {'device_id': uuid.uuid4(), 'current_time': datetime.datetime.now(), 'state': 'active', 'value': '14.9'},
    {'device_id': uuid.uuid4(), 'current_time': datetime.datetime.now(), 'state': 'inactive', 'value': '10.3'}
]

# Prepare the insert query
insert_query = """
    INSERT INTO latest_events_by_device (device_id, current_time, state, value)
    VALUES (?, ?, ?, ?)
"""

In [174]:
sample_stmt = session.prepare(insert_query)

In [175]:
# Insert data dynamically
for event in sample_data:
    session.execute(
        sample_stmt, (event['device_id'], event['current_time'], event['state'], event['value']))

In [198]:
# show the result 
result = session.execute("""SELECT * FROM latest_events_by_device""")
for row in result:
    print(row.device_id, row.current_time, row.state, row.value)

b14d7478-be6c-48dc-b2e8-a6e80867b309 2023-08-20 20:46:55.932000 inactive 18.2
1833292a-e61b-455e-b974-070505817715 2023-08-20 20:58:15.214000 active 14.9
6542812b-6a9a-4be4-bc77-d7de75eea7da 2023-08-20 20:58:15.214000 active 29.7
b0958966-dc6e-4a0f-b3ec-dc2c53d1be19 2023-08-20 20:46:55.932000 inactive 10.3
242be36a-669f-4890-8bbe-3bbfd04c5285 2023-08-20 20:46:55.932000 active 29.7
eb6a0a7f-bcd4-4e4e-bc7b-45178c947393 2023-08-20 20:58:15.214000 inactive 18.2
95d1a5c3-1084-40e6-b0ec-3abace252959 2023-08-20 20:58:15.214000 active 23.5
ce9a11b7-7692-45f0-9331-ab2fdbff72d3 2023-08-18 22:24:20.757000 inactive 18.2
6aec28df-4766-494e-9842-99b824672820 2023-08-20 20:46:55.932000 active 23.5
5aa9f35f-8c2a-4bd2-b8a1-96f1d8a81b6a 2023-08-18 22:24:20.757000 active 29.7
a97ff5ab-6ed5-4027-a055-faec7eb48d1a 2023-08-18 22:24:20.757000 active 23.5
a5a70044-fdf0-4b59-a5b0-e390a2630fde 2023-08-20 20:58:15.214000 inactive 10.3
902eee4c-cfd0-4bcd-9e05-641515d86ac4 2023-08-18 22:24:20.757000 inactive 10.3


In [202]:
# Query for a specific device
device_id = '242be36a-669f-4890-8bbe-3bbfd04c5285'

result = session.execute("""SELECT current_time, state, value 
                            FROM latest_events_by_device WHERE device_id = {} ALLOW FILTERING""".format(device_id))

In [203]:
for row in result:
    print(row.current_time, row.state, row.value)

2023-08-20 20:46:55.932000 active 29.7


In [205]:
session.shutdown()
cluster.shutdown()