largecolumn_test.py

from dtest import Tester, debug
from tools import since

"""
Check that inserting and reading large columns to the database doesn't cause off heap memory usage
that is proportional to the size of the memory read/written.
"""
@since('3.0')
class TestLargeColumn(Tester):

    def stress_with_col_size(self, cluster, node, size):
        size = str(size);
        node.stress(['write', 'n=5', "no-warmup", "cl=ALL", "-pop", "seq=1...5", "-schema", "replication(factor=2)", "-col", "n=fixed(1)", "size=fixed(" + size + ")", "-rate", "threads=1"])
        node.stress(['read', 'n=5', "no-warmup", "cl=ALL", "-pop", "seq=1...5", "-schema", "replication(factor=2)", "-col", "n=fixed(1)", "size=fixed(" + size + ")", "-rate", "threads=1"])

    def directbytes(self, node):
        output = node.nodetool( "gcstats", capture_output=True);
        output = output[0].split("\n");
        assert output[0].strip().startswith("Interval"), "Expected output from nodetool gcstats starts with a header line with first column Interval"
        fields = output[1].split();
        assert len(fields) >= 6, "Expected output from nodetool gcstats has at least six fields"
        for field in fields:
            assert field.strip().isdigit() or field == 'NaN', "Expected numeric from fields from nodetool gcstats"
        return fields[6]

    def cleanup_test(self):
        """
        See CASSANDRA-8670
        """
        cluster = self.cluster
        #Commit log segment size needs to increase for the database to be willing to accept columns that large
        #internode compression is disabled because the regression being tested occurs in NIO buffer pooling without compression
        cluster.set_configuration_options( { 'commitlog_segment_size_in_mb' : 128, 'internode_compression' : 'none' })
        #Have Netty allocate memory on heap so it is clear if memory used for large columns is related to intracluster messaging
        cluster.populate(2).start(jvm_args=[" -Dcassandra.netty_use_heap_allocator=true "])
        node1, node2 = cluster.nodelist()

        session = self.patient_cql_connection(node1)
        debug("Before stress {0}".format(self.directbytes(node1)))
        debug("Running stress")
        #Run the full stack to see how much memory is utilized for "small" columns
        self.stress_with_col_size(cluster, node1, 1)
        debug("Ran stress once {0}".format(self.directbytes(node1)))

        #Now run the full stack to see how much memory is utilized for "large" columns
        self.stress_with_col_size(cluster, node1, 1024 * 1024 * 63)

        output = node1.nodetool( "gcstats", capture_output=True);
        afterStress = self.directbytes(node1)
        debug("After stress {0}".format(afterStress))

        #Any growth in memory usage should not be proportional column size. Really almost no memory should be used
        #since Netty was instructed to use a heap allocator
        assert int(afterStress) < 1024 * 1024 * 2, int(afterStress)