In [1]:
from cassandra.cluster import Cluster, BatchStatement
import cassandra.cluster as cc
import cassandra
import uuid
import time as ttime

In [2]:
cluster = Cluster(['127.0.0.1'])

session = cluster.connect()

In [3]:
try:
    session.execute("""
        CREATE KEYSPACE simplex WITH replication
            = {'class':'SimpleStrategy', 'replication_factor':1};
    """)
except cassandra.AlreadyExists:
    print('KeySpace exits')

KeySpace exits


In [4]:
try:
    session.execute('USE simplex')
except cassandra.InvalidRequest:
    print('Cannot use the KEYSPACE')

In [5]:
try:
    session.execute("""
        CREATE TABLE simplex.events (
            id uuid PRIMARY KEY,
            data_key text,
            data float,
            timestamp float,
            event_id uuid
        );
        """)
except cassandra.AlreadyExists:
    print("Table already created")

Table already created


In [None]:
start = ttime.time()
for i in range(int(1e5)):
    session.execute(
        """
        INSERT INTO events (id, data_key, data, timestamp)
        VALUES (%(id)s, %(data_key)s, %(data)s, %(timestamp)s)
        """,
        {'id': uuid.uuid4(), 'data_key': "pvalias"+str(i), 'data': (18.134315+i),'timestamp': 12143151351.642}
    )
end = ttime.time()

In [6]:
rows = session.execute('SELECT * FROM events')


In [7]:
print(rows)

<cassandra.cluster.ResultSet object at 0x106eec208>


In [14]:
%timeit list(rows)

The slowest run took 6.83 times longer than the fastest. This could mean that an intermediate result is being cached 
100000 loops, best of 3: 2.33 µs per loop


In [13]:
print(len(list(session.execute('SELECT * FROM events'))))

340110


In [15]:
batch = BatchStatement()
for i in range(int(1e6)):
    batch.add(
        """
        INSERT INTO events (id, data_key, data, timestamp)
        VALUES (%(id)s, %(data_key)s, %(data)s, %(timestamp)s)
        """,
        {'id': uuid.uuid4(), 'data_key': "pvalias"+str(i), 'data': (18.134315+i),'timestamp': 12143151351.642}
    )
    if i % 90 == 0:
        session.execute(batch)
        batch = BatchStatement()



In [16]:
%timeit list(rows)


The slowest run took 7.49 times longer than the fastest. This could mean that an intermediate result is being cached 
100000 loops, best of 3: 2.29 µs per loop


In [17]:
print(len(list(session.execute('SELECT * FROM events'))))

1340101


In [18]:
%timeit list(session.execute('SELECT * FROM events'))


1 loops, best of 3: 16 s per loop
