# Import libraries

In [52]:
import aerospike
import json
import gzip
import time

# Compress data

In [1]:
# Read the JSON data from the file
json_file = "daily_dataset_small_kv.json"
with open(json_file, "r") as file:
    json_data = json.load(file)

# Specify the output gzip file
gzip_file = "daily_dataset_small_kv.json.gz"

# Compress the JSON data into a gzip file
with gzip.open(gzip_file, "wt", encoding="utf-8") as file:
    json.dump(json_data, file)

In [53]:
config = {
    'hosts': [
        ( '127.0.0.1', 3000 )
    ],
    'policies': {
        'timeout': 1000 # milliseconds
    }
}

# Create client connection

In [3]:
# Set Aerospike host configuration
config = {"hosts": [("localhost", 3000)]}

# Create the Aerospike client
client = aerospike.client(config).connect()

# Data Insertion

In [4]:
# Specify Aerospike namespace and set
namespace = "test"
as_set = "energy_c"

# Open zipped file
json_file = gzip.open("/Users/jafarabdurrahmaan/Dropbox/00_Uni/Masterthesis/A_Thesis/1_Components/13_Benchmark_Study_Components/3_KVS/Aerospike/iter2/daily_dataset_small_kv.json.gz", "rt")

# Load json data
json_data = json.load(json_file)

# Measure start of execution time
start_time = time.time()

# Iterate over the two items in json data
for item in json_data:
    id = item["id"]
    energy_sum = float(item["energy_sum"])

    key = (namespace, as_set, id)

    bins = {
        "energy": {
            "id": id,
            "energy_sum": energy_sum
        }
    }

    write_policy = {"key": aerospike.POLICY_KEY_SEND}

    try:
        client.put(key=key, bins=bins, policy=write_policy)
        
    except aerospike.exception.AerospikeError as e:
        print(f"Create failed for Record with id: {id}\nError: {e.msg}, {e.code}")

# Close json file
json_file.close()

# Measure query execution time
elapsed_time = time.time() - start_time
print(f"Data insertion took: {elapsed_time} seconds")

# Close client
client.close()

Data insertion took: 3.3095030784606934 seconds


# Simple Retrieval

## First Query Workload

In [18]:
# Primary keys to query
primary_keys = ['MAC000131_2012-06-01', 'MAC000131_2012-06-02', 'MAC000131_2012-06-03']

# Measure start of execution time
start_time = time.time()

# Query and retrieve the records
for pk in primary_keys:
    key = (namespace, as_set, pk)

    try:
        _, _, record = client.get(key)
        energy = record["energy"]
        print(f"PK: {pk}, Energy: {energy}")

    except aerospike.exception.RecordNotFound:
        print(f"No record found for PK: {pk}")

# Measure stop of execution time
end_time = time.time()

# Calculate execution time
execution_time = (end_time - start_time) * 1000

# Print the execution time
print(f"Query Execution Time: {execution_time:.2f} milliseconds")

# Close client
client.close()

PK: MAC000131_2012-06-01, Energy: {'energy_sum': 5.774999999999998, 'id': 'MAC000131_2012-06-01'}
PK: MAC000131_2012-06-02, Energy: {'energy_sum': 5.459000000000001, 'id': 'MAC000131_2012-06-02'}
PK: MAC000131_2012-06-03, Energy: {'energy_sum': 6.481000000000001, 'id': 'MAC000131_2012-06-03'}
Query Execution Time: 5.00 milliseconds


## Second Query Workload

In [20]:
# Primary keys to query
primary_keys = ['MAC000131_2012-06-01', 'MAC000131_2012-06-02', 'MAC000131_2012-06-03']

# Measure start of execution time
start_time = time.time()

# Query and retrieve the records
for pk in primary_keys:
    key = (namespace, as_set, pk)

    try:
        _, _, record = client.get(key)
        energy = record["energy"]
        print(f"PK: {pk}, Energy: {energy}")

    except aerospike.exception.RecordNotFound:
        print(f"No record found for PK: {pk}")

# Measure stop of execution time
end_time = time.time()

# Calculate execution time
execution_time = (end_time - start_time) * 1000

# Print the execution time
print(f"Query Execution Time: {execution_time:.2f} milliseconds")

# Close client
client.close()

PK: MAC000131_2012-06-01, Energy: {'energy_sum': 5.774999999999998, 'id': 'MAC000131_2012-06-01'}
PK: MAC000131_2012-06-02, Energy: {'energy_sum': 5.459000000000001, 'id': 'MAC000131_2012-06-02'}
PK: MAC000131_2012-06-03, Energy: {'energy_sum': 6.481000000000001, 'id': 'MAC000131_2012-06-03'}
Query Execution Time: 5.30 milliseconds


## Third Query Workload

In [21]:
# Primary keys to query
primary_keys = ['MAC000131_2012-06-01', 'MAC000131_2012-06-02', 'MAC000131_2012-06-03']

# Measure start of execution time
start_time = time.time()

# Query and retrieve the records
for pk in primary_keys:
    key = (namespace, as_set, pk)

    try:
        _, _, record = client.get(key)
        energy = record["energy"]
        print(f"PK: {pk}, Energy: {energy}")

    except aerospike.exception.RecordNotFound:
        print(f"No record found for PK: {pk}")

# Measure stop of execution time
end_time = time.time()

# Calculate execution time
execution_time = (end_time - start_time) * 1000

# Print the execution time
print(f"Query Execution Time: {execution_time:.2f} milliseconds")

# Close client
client.close()

PK: MAC000131_2012-06-01, Energy: {'energy_sum': 5.774999999999998, 'id': 'MAC000131_2012-06-01'}
PK: MAC000131_2012-06-02, Energy: {'energy_sum': 5.459000000000001, 'id': 'MAC000131_2012-06-02'}
PK: MAC000131_2012-06-03, Energy: {'energy_sum': 6.481000000000001, 'id': 'MAC000131_2012-06-03'}
Query Execution Time: 5.68 milliseconds


## Overall Query Execution Time for Simple Retrieval

In [22]:
print(f"Average Query Execution Time: {((5.30+5.68+5.00)/3)} seconds")

Average Query Execution Time: 5.326666666666667 seconds
