In [1]:
from cassandra.cluster import Cluster, BatchStatement
import cassandra.cluster as cc
import cassandra
import uuid
import time as ttime

In [2]:
cluster = Cluster(['127.0.0.1'])

session = cluster.connect()

In [3]:
try:
    session.execute("""
        CREATE KEYSPACE simplex WITH replication
            = {'class':'SimpleStrategy', 'replication_factor':1};
    """)
except cassandra.AlreadyExists:
    print('KeySpace exits')

KeySpace exits


In [4]:
try:
    session.execute('USE simplex')
except cassandra.InvalidRequest:
    print('Cannot use the KEYSPACE')

In [5]:
try:
    session.execute("""
        CREATE TABLE simplex.events (
            id uuid PRIMARY KEY,
            data_key text,
            data float,
            timestamp float,
            event_id uuid
        );
        """)
except cassandra.AlreadyExists:
    print("Table already created")

Table already created


In [6]:
start = ttime.time()
for i in range(int(1e2)):
    session.execute(
        """
        INSERT INTO events (id, data_key, data, timestamp)
        VALUES (%(id)s, %(data_key)s, %(data)s, %(timestamp)s)
        """,
        {'id': uuid.uuid4(), 'data_key': "pvalias"+str(i), 'data': (18.134315+i),'timestamp': 12143151351.642}
    )
end = ttime.time()

In [7]:
rows = session.execute('SELECT * FROM events')


In [9]:
%timeit list(rows)

The slowest run took 6791472.93 times longer than the fastest. This could mean that an intermediate result is being cached 
1 loops, best of 3: 1.68 µs per loop


In [10]:
print(len(list(session.execute('SELECT * FROM events'))))

1687952


In [None]:
batch = BatchStatement()
for i in range(int(1e6)):
    batch.add(
        """
        INSERT INTO events (id, data_key, data, timestamp)
        VALUES (%(id)s, %(data_key)s, %(data)s, %(timestamp)s)
        """,
        {'id': uuid.uuid4(), 'data_key': "pvalias"+str(i), 'data': (18.134315+i),'timestamp': 12143151351.642}
    )
    if i % 500 == 0:
        session.execute(batch)
        batch = BatchStatement()

In [None]:
print(len(list(session.execute('SELECT * FROM events'))))

In [11]:
rws = session.execute('SELECT * FROM events')
%timeit list(rws)


The slowest run took 8460016.59 times longer than the fastest. This could mean that an intermediate result is being cached 
1 loops, best of 3: 1.37 µs per loop


In [13]:
import time
result = session.execute('SELECT * FROM events')
start = time.time()
parsed = list(result)
end = time.time()
print(len(parsed), 'events took ', end-start, ' seconds')


1687952 events took  14.096062421798706  seconds
