# HyperLogLog Data Type Examples

# Import to run %sh magic cell

In [1]:
import io.github.spencerpark.ijava.IJava;
import io.github.spencerpark.jupyter.kernel.magic.common.Shell;
IJava.getKernelInstance().getMagics().registerMagics(Shell.class);

# Add Java Client POM Dependency

In [2]:
%%loadFromPOM
<dependencies>
  <dependency>
    <groupId>com.aerospike</groupId>
    <artifactId>aerospike-client-jdk8</artifactId>
    <version>8.1.1</version>
  </dependency>
</dependencies>

# Add required Imports

In [3]:
import com.aerospike.client.AerospikeClient;
import com.aerospike.client.policy.WritePolicy;
import com.aerospike.client.Bin;
import com.aerospike.client.Key;
import com.aerospike.client.Record;
import com.aerospike.client.Value;
import com.aerospike.client.policy.RecordExistsAction;
import com.aerospike.client.AerospikeException;
import com.aerospike.client.ResultCode;
import com.aerospike.client.Operation;
System.out.println("Client modules imported.");

Client modules imported.


In [4]:
%sh asadm --enable -e "manage truncate ns test --no-warn" -h "127.0.0.1"

#### Connect to the Aerospike Server
Instantiate the client object. Let us write a record and read it back.
We have a namespace **_test_** pre-defined on the server.

In [5]:
AerospikeClient client = new AerospikeClient("127.0.0.1", 3000);
System.out.println("Initialized the client and connected to the cluster.");

Initialized the client and connected to the cluster.


# Accessing a record on the Aerospike Server
We build the Key object in Java
We need the namespace and the record digest to find the record on the server.
The record digest is computed by the client library using application provided key (integer, string or byte-array) and the set name. If not in a set, use null for set name.

<img src="./graphics/RecordKey.png"
     alt="Record Digest"
     style="float: left; margin-right: 10px;"
     width="600"
     height="400"/>

In [6]:
Key key = new Key("test", "demo", "key1");
System.out.println("Working with record key:");
System.out.println(key);  //Will show the 20 byte digest

Working with record key:
test:demo:key1:ec91192d4b7f8ce35d5d78d34bca65cbaaaac960


# Generate Test Data

In [7]:
// Java imports required for HLL Data Type
import com.aerospike.client.operation.HLLOperation;
import com.aerospike.client.operation.HLLPolicy;

// Setup a record to accumulate HLL entries
final Key k_hll = new Key("test", null, "hllKey1");
final String hllBin1 = "hllBin1";
int indexBits = 16;
int minHashBits = 34;

//All HLL operations are through operate()
Operation[] ops1c = new Operation[] {
  HLLOperation.init(HLLPolicy.Default, hllBin1, indexBits, minHashBits)
  //Initialize the HLL bin, we will add tweet elements as we scan.
};

Record rec = client.operate(null, k_hll, ops1c);


In [8]:
List<Value> lData1 = new ArrayList<Value>();
for(int j=0; j<612; j++){
  for(int i=0;i<10240; i++){
    lData1.add(Value.get("america"+i+"british"+j));
   } 

Operation[] ops2c = new Operation[] {
      //can add init() here also, it is only executed once, initially
      HLLOperation.add(HLLPolicy.Default, hllBin1, lData1),
      HLLOperation.refreshCount(hllBin1)  //See next slide on refreshCount()

    };
    
Record rec = client.operate(null, k_hll, ops2c);
lData1.clear();
}
Operation[] ops2d = new Operation[] { HLLOperation.getCount(hllBin1) };

rec = client.operate(null, k_hll, ops2d);
long c_hllBin1 = rec.getLong(hllBin1);

System.out.println("Cardinality of hllBin1 = "+ c_hllBin1+ "\n");

// 612 * 10,240 = 6,266,880


Cardinality of hllBin1 = 6286805



In [9]:
lData1.clear();
for(int j=0; j<81; j++){
  for(int i=0;i<10240; i++){
    lData1.add(Value.get("denmark"+i+"europe"+j));
   } 

Operation[] ops2e = new Operation[] {
      //can add init() here also, it is only executed once, initially
      HLLOperation.add(HLLPolicy.Default, hllBin1, lData1),
      HLLOperation.refreshCount(hllBin1)  //See next slide on refreshCount()

    };
    
Record rec = client.operate(null, k_hll, ops2e);
lData1.clear();
}
Operation[] ops2f = new Operation[] { HLLOperation.getCount(hllBin1) };

rec = client.operate(null, k_hll, ops2f);
long c_hllBin1 = rec.getLong(hllBin1);

System.out.println("Cardinality of hllBin1 = "+ c_hllBin1+ "\n");

// 81 * 10,240 = 829,440
// Previous: 6,266,880 + 829,440 = 7,096,320


Cardinality of hllBin1 = 7097755



In [10]:
// Setup a record to accumulate HLL entries
final Key k_hll2 = new Key("test", null, "hllKey2");
final String hllBin2 = "hllBin2";
//int indexBits = 16;
//int minHashBits = 16;

//All HLL operations are through operate()
Operation[] ops3a = new Operation[] {
  HLLOperation.init(HLLPolicy.Default, hllBin2, indexBits, minHashBits)
  //Initialize the HLL bin, we will add tweet elements as we scan.
};

Record rec = client.operate(null, k_hll2, ops3a);

In [11]:
lData1.clear();
for(int j=0; j<3399; j++){
  for(int i=0;i<10240; i++){
    lData1.add(Value.get("finland"+i+"gambia"+j));
   } 

Operation[] ops4a = new Operation[] {
      //can add init() here also, it is only executed once, initially
      HLLOperation.add(HLLPolicy.Default, hllBin2, lData1),
      HLLOperation.refreshCount(hllBin2)  //See next slide on refreshCount()

    };
    
Record rec = client.operate(null, k_hll2, ops4a);
lData1.clear();
}
Operation[] ops5a = new Operation[] { HLLOperation.getCount(hllBin2) };

rec = client.operate(null, k_hll2, ops5a);
long c_hllBin2 = rec.getLong(hllBin2);

System.out.println("Cardinality of hllBin2 = "+ c_hllBin2+ "\n");

// 3,399 * 10,240 = 34,805,760

Cardinality of hllBin2 = 34870641



In [12]:
lData1.clear();
for(int j=0; j<81; j++){
  for(int i=0;i<10240; i++){
    lData1.add(Value.get("denmark"+i+"europe"+j));
   } 

Operation[] ops7a = new Operation[] {
      //can add init() here also, it is only executed once, initially
      HLLOperation.add(HLLPolicy.Default, hllBin2, lData1),
      HLLOperation.refreshCount(hllBin2)  //See next slide on refreshCount()

    };
    
Record rec = client.operate(null, k_hll2, ops7a);
lData1.clear();
}
Operation[] ops8a = new Operation[] { HLLOperation.getCount(hllBin2) };

rec = client.operate(null, k_hll2, ops8a);
long c_hllBin2 = rec.getLong(hllBin2);

System.out.println("Cardinality of hllBin2 = "+ c_hllBin2+ "\n");

// 81 * 10,240 = 829,440
// Previous: 34,805,760 + 829,440 = 35,635,200

Cardinality of hllBin2 = 35689178



In [14]:
rec = client.get(null, k_hll, hllBin1);
final Value.HLLValue hllBin1val = rec.getHLLValue(hllBin1);
List<Value.HLLValue> hllList_hllBin1val = new ArrayList<Value.HLLValue>(){{ add(hllBin1val); }};

Operation[] ops9a = new Operation[] { 
  HLLOperation.getIntersectCount(hllBin2, hllList_hllBin1val)
};
rec = client.operate(null, k_hll2, ops9a);
long c_hllBin2 = rec.getLong(hllBin2);

System.out.println("Cardinality of Intersection = "+ c_hllBin2+ "\n");

//Common data between hllBin1 and hllBin2 - denmark europe: 81 * 10,240 = 829,440

Cardinality of Intersection = 816166



# Cleanup

In [15]:
%sh asadm --enable -e "manage truncate ns test --no-warn" -h "127.0.0.1"