In [1]:
# Jupyter relative import (else it interupts the module)
import os, sys
parent_dir = os.path.abspath('..')
if parent_dir not in sys.path:
    sys.path.append(parent_dir)
    
from cassandra.auth import PlainTextAuthProvider
from cassandra.cluster import Cluster
from cassandra.cqlengine import connection, management, query
from cassandra.cqlengine.connection import log as cql_logger
from cassandra.concurrent import execute_concurrent_with_args
from config import get_settings
from db import create_session
import pandas as pd

**Scenario**: 

Image a car rental company that operates nationwide and needs to store information about each car in the fleet, including the make, model, registration number, and the time that each car is rented out.  The data is required to be held for `30 days`.  

- All over the customers that rented a Toyota Camry off the lot yesterday were over charged, so you would like to contact them. to inform them of the overcharge. 

<br>

```QUERY: Find the contact numbers of all the Toyota Camry rentals that left the lot yesterday after 8:00am```

We can construct a table to model this scenario using:

**Table**
rentals

**Columns**
car_make, car_model, rental_start_time, rental_end_time, registration, car_category, car_year, driver_license, first_name, last_name, contact_number

**Composite key of (car_make, employee_id)** where:

    car_make, car_model = partition key (tells us which node)
    rental_start_time, rental_end_time = clustering column (defines the order of the data)

In [2]:
session = create_session()

In [3]:
session = create_session()

# Create a keyspace
session.execute("""
CREATE KEYSPACE IF NOT EXISTS cql_keyspace
WITH replication = { 'class': 'SimpleStrategy', 'replication_factor': 1 } 
AND durable_writes = 'true';
""")

# Create our table
session.execute("""
CREATE TABLE IF NOT EXISTS cql_keyspace.rentals (
car_make text,
car_model text,
rental_start_time timestamp,
rental_end_time timestamp,
car_category text,
car_year int,
registration_number text,
driver_license text,
first_name text,
last_name text,
contact_number text,
PRIMARY KEY ((car_make, car_model), rental_start_time, rental_end_time)
) WITH default_time_to_live = 2592000;
""")

<cassandra.cluster.ResultSet at 0x2171dba6640>

In [4]:
from data_generator import DataGenerator

records = DataGenerator().generate_records(num_records=10000)

In [5]:
statement = session.prepare("""
INSERT INTO cql_keyspace.rentals (car_make,
car_model,
rental_start_time,
rental_end_time,
car_category,
car_year,
registration_number,
driver_license,
first_name,
last_name,
contact_number) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""")

records = DataGenerator().generate_records(num_records=10000)

parameters = [x for x in records]

execute_concurrent_with_args(session, statement, parameters, concurrency=50)



[ExecutionResult(success=True, result_or_exc=<cassandra.cluster.ResultSet object at 0x000002171EB67EE0>),
 ExecutionResult(success=True, result_or_exc=<cassandra.cluster.ResultSet object at 0x000002171EB6F400>),
 ExecutionResult(success=True, result_or_exc=<cassandra.cluster.ResultSet object at 0x000002171EB750A0>),
 ExecutionResult(success=True, result_or_exc=<cassandra.cluster.ResultSet object at 0x000002171EB676A0>),
 ExecutionResult(success=True, result_or_exc=<cassandra.cluster.ResultSet object at 0x000002171EB67310>),
 ExecutionResult(success=True, result_or_exc=<cassandra.cluster.ResultSet object at 0x000002171EB67A90>),
 ExecutionResult(success=True, result_or_exc=<cassandra.cluster.ResultSet object at 0x000002171EB6F7C0>),
 ExecutionResult(success=True, result_or_exc=<cassandra.cluster.ResultSet object at 0x000002171E789340>),
 ExecutionResult(success=True, result_or_exc=<cassandra.cluster.ResultSet object at 0x000002171EB5E040>),
 ExecutionResult(success=True, result_or_exc=<

In [26]:
result = list(session.execute("SELECT * FROM cql_keyspace.rentals"))
print(len(result), f".. where {10000-len(result)} cars where rented multiple times within the last 30 days.")
pd.DataFrame(result).head()

9684 .. where 316 cars where rented multiple times within the last 30 days.


Unnamed: 0,car_make,car_model,rental_start_time,rental_end_time,car_category,car_year,contact_number,driver_license,first_name,last_name,registration_number
0,Mitsubishi,F-TYPE,2023-01-14 01:54:09.632,2023-01-22 23:54:09.632,SUV,1995,,Catherine,Martinez,431649449,0204PX5G
1,Cadillac,Sierra 2500 HD Crew Cab,2023-01-19 02:54:10.059,2023-01-21 06:54:10.059,Wagon,2010,,Stephen,Underwood,456604620,970BV6L6
2,Toyota,fortwo,2023-01-29 00:54:09.645,2023-01-31 23:54:09.645,Pickup,2004,,Madison,Foley,456164718,763IKB9P
3,MAZDA,Trans Sport,2023-01-06 03:54:09.513,2023-01-09 00:54:09.513,SUV,2004,,Sandra,Hall,496683770,954LNQIS
4,Audi,X1,2023-01-06 03:54:09.313,2023-02-03 05:54:09.313,Van/Minivan,2012,,Charles,Garcia,439046452,866FJWDD


```QUERY: Find the contact details of each person that rented a Toyota Camry from yesterday after 8:00am```

In [21]:
result = session.execute("SELECT * FROM cql_keyspace.rentals WHERE car_make = 'Toyota' and car_model = 'Camry'")
pd.DataFrame(list(result)).head()

Unnamed: 0,car_make,car_model,rental_start_time,rental_end_time,car_category,car_year,contact_number,driver_license,first_name,last_name,registration_number
0,Toyota,Camry,2023-01-10 05:54:09.578,2023-01-21 01:54:09.578,SUV,2014,,Benjamin,Copeland,424349739,380Z16BN
1,Toyota,Camry,2023-01-19 01:54:09.219,2023-01-30 02:54:09.219,Coupe,1997,,Debra,Baker,428617865,351PZGS9
