# Import libraries

In [None]:
import json
import gzip
import time
import couchdb

In [56]:
# Read json data
json_file = "/Users/jafarabdurrahmaan/Dropbox/00_Uni/Masterthesis/A_Thesis/1_Components/13_Benchmark_Study_Components/4_Document/CouchDB/iter2/daily_dataset_small.json"
with open(json_file, "r") as file:
    json_data = json.load(file)

gzip_file = "daily_dataset_small.json.gz"

with gzip.open(gzip_file, "wt", encoding="utf-8") as file:
    json.dump(json_data, file)

# Create connection

In [57]:
couch = couchdb.Server('http://admin:smart-meter-benchmark@127.0.0.1:5984/')
db = couch.create('smart-meter-benchmark_c')
db_name = 'smart-meter-benchmark_c'
db = couch[db_name]

# Data Insertion

In [58]:
# Path to the gzipped JSON file
file_path = '/Users/jafarabdurrahmaan/Dropbox/00_Uni/Masterthesis/A_Thesis/1_Components/13_Benchmark_Study_Components/4_Document/CouchDB/iter2/daily_dataset_small.json.gz'

# Start timer
start_time = time.time()

# Open and read the gzipped JSON file
with gzip.open(file_path, 'rt') as f:
    data = json.load(f)

# Extract list from JSON data
docs = data['docs']

# Upload each document to database
for doc in docs:
    doc['energy_median'] = float(doc['energy_median'])
    doc['energy_mean'] = float(doc['energy_mean'])
    doc['energy_max'] = float(doc['energy_max'])
    doc['energy_count'] = int(doc['energy_count'])
    doc['energy_std'] = float(doc['energy_std'])
    doc['energy_sum'] = float(doc['energy_sum'])
    doc['energy_min'] = float(doc['energy_min'])

    db.save(doc)

# End timer
end_time = time.time()

# Calculate execution time
ingestion_time = end_time - start_time

# Print ingestion time
print("Data insertion took:", ingestion_time, "seconds")

Data insertion took: 11.52485203742981 seconds


# Data Querying

## Simple Retrieval

### Create View

In [59]:
# Define map function
map_func = """
function (doc) {
  if (doc.LCLid === 'MAC000131' && doc.day) {
    emit(doc.day, { energy_min: doc.energy_min, energy_max: doc.energy_max });
  }
}
"""

# Create view
view_name = 'simplequery'
design_doc = {
  '_id': '_design/simplequery',
  'views': {
    view_name: {
      'map': map_func
    }
  }
}

# Save view
db.save(design_doc)

('_design/simplequery', '1-b7c94017512ad5ad31b9bfccbd0faa3e')

### First Query Workload

In [64]:
# Query the view with a time range
start_date = '2012-06-01'
end_date = '2013-06-01'

# Start timer
start_time = time.time()

# Execute query
result = db.view('simplequery/' + view_name, startkey=start_date, endkey=end_date, limit=5)

# End timer
end_time = time.time()

# Calculate execution time
execution_time = (end_time - start_time)*1000

# Print results
for row in result:
    doc_id = row.id
    doc = db[doc_id]
    energy_min = doc['energy_min']
    energy_max = doc['energy_max']
    print(f"energy_min: {energy_min}, energy_max: {energy_max}")
    
# Print execution time
print(f"Query execution time: {execution_time} milliseconds")

energy_min: 0.064, energy_max: 0.312
energy_min: 0.065, energy_max: 0.299
energy_min: 0.064, energy_max: 0.7829999999999999
energy_min: 0.066, energy_max: 1.1619999
energy_min: 0.065, energy_max: 0.742
Query execution time: 0.2601146697998047 milliseconds


## Second Query Workload

In [66]:
# Query the view with a time range
start_date = '2012-06-01'
end_date = '2013-06-01'

# Start timer
start_time = time.time()

# Execute query
result = db.view('simplequery/' + view_name, startkey=start_date, endkey=end_date, limit=5)

# End timer
end_time = time.time()

# Calculate execution time
execution_time = (end_time - start_time)*1000

# Print results
for row in result:
    doc_id = row.id
    doc = db[doc_id]
    energy_min = doc['energy_min']
    energy_max = doc['energy_max']
    print(f"energy_min: {energy_min}, energy_max: {energy_max}")
    
# Print execution time
print(f"Query execution time: {execution_time} milliseconds")

energy_min: 0.064, energy_max: 0.312
energy_min: 0.065, energy_max: 0.299
energy_min: 0.064, energy_max: 0.7829999999999999
energy_min: 0.066, energy_max: 1.1619999
energy_min: 0.065, energy_max: 0.742
Query execution time: 0.27298927307128906 milliseconds


## Third Query Workload

In [67]:
# Query the view with a time range
start_date = '2012-06-01'
end_date = '2013-06-01'

# Start timer
start_time = time.time()

# Execute query
result = db.view('simplequery/' + view_name, startkey=start_date, endkey=end_date, limit=5)

# End timer
end_time = time.time()

# Calculate execution time
execution_time = (end_time - start_time)*1000

# Print results
for row in result:
    doc_id = row.id
    doc = db[doc_id]
    energy_min = doc['energy_min']
    energy_max = doc['energy_max']
    print(f"energy_min: {energy_min}, energy_max: {energy_max}")
    
# Print execution time
print(f"Query execution time: {execution_time} milliseconds")

energy_min: 0.064, energy_max: 0.312
energy_min: 0.065, energy_max: 0.299
energy_min: 0.064, energy_max: 0.7829999999999999
energy_min: 0.066, energy_max: 1.1619999
energy_min: 0.065, energy_max: 0.742
Query execution time: 0.2818107604980469 milliseconds


### Overall Query Execution Time for Simple Retrieval

In [72]:
print(f"Average Query Execution Time: {((0.26+0.27+0.28)/3)} milliseconds")

Average Query Execution Time: 0.27 milliseconds


# Filter and Aggregation

## Create View

In [68]:
# Define map function
map_func = """
function (doc) {
  var date = doc.day;
  if (date >= "2012-06-01" && date <= "2013-06-01") {
    emit(doc.LCLid, doc.energy_sum);
  }
}
"""

# Define reduce function
reduce_func = """
function (keys, values, rereduce) {
  return sum(values);
}
"""

# Create view
view_name = 'filteredaggregatedquery'
design_doc = {
  '_id': '_design/filteredaggregatedquery',
  'views': {
    view_name: {
      'map': map_func,
      'reduce': reduce_func
    }
  }
}

# Save view
db.save(design_doc)

('_design/filteredaggregatedquery', '1-f7df51e96909e38c5dbca611d3c4fe1a')

### First Query Workload

In [77]:
# Start timer
start_time = time.time()

# Execute query
result = db.view('filteredaggregatedquery/' + view_name, group=True, reduce=True, sorted=True)

# End timer
end_time = time.time()

# Calculate execution time
execution_time = (end_time - start_time)*1000

# Print results
for row in result:
    LCLid = row.key
    energy_sum = row.value
    print(f"LCLid: {LCLid}, Sum of energy_sum: {energy_sum}")
    
# Print execution time
print(f"Query execution time: {execution_time} milliseconds")

LCLid: MAC000131, Sum of energy_sum: 3691.9949997
LCLid: MAC000132, Sum of energy_sum: 5258.8470011
LCLid: MAC000221, Sum of energy_sum: 4917.5459997
LCLid: MAC000228, Sum of energy_sum: 2707.6260003
LCLid: MAC000234, Sum of energy_sum: 4213.725
LCLid: MAC000235, Sum of energy_sum: 1528.1550001
Query execution time: 0.26106834411621094 milliseconds


### Second Query Workload

In [70]:
# Start timer
start_time = time.time()

# Execute query
result = db.view('filteredaggregatedquery/' + view_name, group=True, reduce=True, sorted=True)

# End timer
end_time = time.time()

# Calculate execution time
execution_time = (end_time - start_time)*1000

# Print results
for row in result:
    LCLid = row.key
    energy_sum = row.value
    print(f"LCLid: {LCLid}, Sum of energy_sum: {energy_sum}")
    
# Print execution time
print(f"Query execution time: {execution_time} milliseconds")

LCLid: MAC000131, Sum of energy_sum: 3691.9949997
LCLid: MAC000132, Sum of energy_sum: 5258.8470011
LCLid: MAC000221, Sum of energy_sum: 4917.5459997
LCLid: MAC000228, Sum of energy_sum: 2707.6260003
LCLid: MAC000234, Sum of energy_sum: 4213.725
LCLid: MAC000235, Sum of energy_sum: 1528.1550001
Query execution time: 0.10919570922851562 milliseconds


### Third Query Workload

In [71]:
# Start timer
start_time = time.time()

# Execute query
result = db.view('filteredaggregatedquery/' + view_name, group=True, reduce=True, sorted=True)

# End timer
end_time = time.time()

# Calculate execution time
execution_time = (end_time - start_time)*1000

# Print results
for row in result:
    LCLid = row.key
    energy_sum = row.value
    print(f"LCLid: {LCLid}, Sum of energy_sum: {energy_sum}")
    
# Print execution time
print(f"Query execution time: {execution_time} milliseconds")

LCLid: MAC000131, Sum of energy_sum: 3691.9949997
LCLid: MAC000132, Sum of energy_sum: 5258.8470011
LCLid: MAC000221, Sum of energy_sum: 4917.5459997
LCLid: MAC000228, Sum of energy_sum: 2707.6260003
LCLid: MAC000234, Sum of energy_sum: 4213.725
LCLid: MAC000235, Sum of energy_sum: 1528.1550001
Query execution time: 0.16999244689941406 milliseconds


### Overall Query Execution Time for Filter and Aggregation

In [78]:
print(f"Average Query Execution Time: {((0.26+0.11+0.17)/3)} milliseconds")

Average Query Execution Time: 0.18000000000000002 milliseconds


# Metadata

In [79]:
import couchdb

# Establish a connection to the CouchDB server
couch = couchdb.Server('http://admin:smart-meter-benchmark@127.0.0.1:5984/')

# Access the desired database
db_name = 'smart-meter-benchmark_c'
db = couch[db_name]

# Get the database info
db_info = db.info()

# Print the metadata
print("Database Name:", db_info['db_name'])
print("Document Count:", db_info['doc_count'])
print("Number of nodes:", db_info['cluster']['n'])
print("File Size:", db_info['sizes']['file'])

Database Name: smart-meter-benchmark_c
Document Count: 4873
Number of nodes: 1
File Size: 18092480
