Skip to content

Performance drop compared do Java Client #149

@gokhanoner

Description

@gokhanoner

Reproducer:

1 - Start a Hazelcast Member
2 - Run below Java code (Java 8 needed):

    public static void main(String[] args) {
        System.setProperty("hazelcast.logging.type", "none");
        HazelcastInstance hz = HazelcastClient.newHazelcastClient();
        try {
            IMap<Object, Object> test = hz.getMap("test");
            if(test.size() <= 10_000) {
                byte[] data = new byte[1024];
                ThreadLocalRandom.current().nextBytes(data);
                fillMap(test, 10_000, data);
            }
            System.out.println("Map Size = " + test.size());
            System.out.println(Instant.now());
            LongAdder tmp = new LongAdder();
            long count = test.entrySet()
                    .stream()
                    .peek(e -> tmp.add(e.getValue().hashCode() + e.getKey().hashCode()))
                    .count();
            System.out.println("Entries Iterated = " + count);
            System.out.println(Instant.now());
            System.out.println(tmp.longValue());
        } finally {
            Hazelcast.shutdownAll();
        }
    }

    private static void fillMap(IMap<Object, Object> imap, int size, Object data) {
        imap.clear();
        IntStream.range(0, size).forEach(i -> imap.put(i, data));
    }

Result of this operation is below, on my laptop took about 60-70 ms:

Map Size = 10000
2019-02-11T21:20:57.024Z
Entries Iterated = 10000
2019-02-11T21:20:57.088Z
10846178286848

3 - Run below Python client code:

# -*- coding: utf-8 -*-
"""
Created on Mon Jan 14 09:11:59 2019

@author:
"""

import hazelcast, logging
from datetime import datetime

HC_ADDRESS = ['localhost:5701']
MAP_NAME = 'test'

def client_startup():
    
    config = hazelcast.ClientConfig()
    for addr in HC_ADDRESS:
        config.network_config.addresses.append(addr)
     
    # basic logging setup to see client logs
    logging.basicConfig()
    logging.getLogger().setLevel(logging.INFO)
    
    client = hazelcast.HazelcastClient(config)
    
    return client

def client_shutdown(client):
    client.shutdown()    

def test_sync(client):
    print('USING SYNC...')
    # connect to map
    hc_map = client.get_map(MAP_NAME).blocking()
    
    # print map size
    print('map size: {}'.format(hc_map.size()))
    
    # test with iterator
    print('WITH ITERATOR...')
    print('{} start'.format(str(datetime.now())))
    es = iter(hc_map.entry_set())
    es_list = [e for e in es]
    print('{} stop'.format(str(datetime.now())))
    print('list length: {}'.format(len(es_list)))
    
    # test without iterator
    print('\nWITHOUT ITERATOR...')
    print('{} start'.format(str(datetime.now())))
    es = hc_map.entry_set()
    es_list = [e for e in es]
    print('{} stop'.format(str(datetime.now())))
    print('list length: {}'.format(len(es_list)))
    
def test_async(client):
    print('USING ASYNC...')
    # connect to map
    hc_map = client.get_map(MAP_NAME)
    
    # print map size
    print('map size: {}'.format(hc_map.size().result()))
    
    # test with iterator
    print('WITH ITERATOR...')
    print('{} start'.format(str(datetime.now())))
    es = iter(hc_map.entry_set().result())
    es_list = [e for e in es]
    print('{} stop'.format(str(datetime.now())))
    print('list length: {}'.format(len(es_list)))
    
    # test without iterator
    print('\nWITHOUT ITERATOR...')
    print('{} start'.format(str(datetime.now())))
    es = hc_map.entry_set().result()
    es_list = [e for e in es]
    print('{} stop'.format(str(datetime.now())))
    print('list length: {}'.format(len(es_list)))
    
if __name__ == '__main__':
    client = client_startup()
    print('\n--------------------------------------\n')
    test_sync(client)
    print('\n--------------------------------------\n')
    test_async(client)
    client_shutdown(client)
    print('process done...')

Same operation here took around 1.5 seconds:

USING SYNC...
map size: 10000
WITH ITERATOR...
2019-02-11 13:24:37.506658 start
2019-02-11 13:24:41.732174 stop
list length: 10000

WITHOUT ITERATOR...
2019-02-11 13:24:41.732207 start
2019-02-11 13:24:43.075291 stop
list length: 10000

--------------------------------------

USING ASYNC...
map size: 10000
WITH ITERATOR...
2019-02-11 13:24:43.076840 start
2019-02-11 13:24:44.318692 stop
list length: 10000

WITHOUT ITERATOR...
2019-02-11 13:24:44.318747 start
2019-02-11 13:24:45.742133 stop
list length: 10000

4 - If you change the data from byte[] to String by changing fillMap(test, 10_000, data) line to fillMap(test, 10_000, new String(data, StandardCharsets.UTF_8)), then Java client took 90-100 ms to run the same code but Python code took much longer, around 25 sec:

USING SYNC...
map size: 10000
WITH ITERATOR...
2019-02-11 13:27:57.039696 start
2019-02-11 13:28:30.277160 stop
list length: 10000

WITHOUT ITERATOR...
2019-02-11 13:28:30.277190 start
2019-02-11 13:28:54.060542 stop
list length: 10000

--------------------------------------

USING ASYNC...
map size: 10000
WITH ITERATOR...
2019-02-11 13:28:54.062131 start
2019-02-11 13:29:19.383447 stop
list length: 10000

WITHOUT ITERATOR...
2019-02-11 13:29:19.383481 start
2019-02-11 13:29:43.178533 stop
list length: 10000

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions