Skip to content
Browse files

download and build 0.6.beta2

  • Loading branch information...
1 parent aa6d24d commit 9584bdfdabdaf4f892f6b2d410f4523dbdcb84b5 @ryanking ryanking committed Mar 9, 2010
Showing with 333 additions and 233 deletions.
  1. +7 −4 Rakefile
  2. +7 −11 conf/cassandra.in.sh
  3. +319 −218 conf/storage-conf.xml
View
11 Rakefile
@@ -16,7 +16,7 @@ unless ENV['FROM_BIN_CASSANDRA_HELPER']
end
CASSANDRA_HOME = "#{ENV['HOME']}/cassandra"
-DIST_URL = "http://github.com/downloads/ryanking/cassandra/apache-cassandra-incubating-0.5.0.2010-02-21-bin.tar.gz"
+DIST_URL = "http://apache.osuosl.org/incubator/cassandra/0.6.0/apache-cassandra-0.6.0-beta2-bin.tar.gz"
DIST_FILE = DIST_URL.split('/').last
directory CASSANDRA_HOME
@@ -26,9 +26,9 @@ desc "Start Cassandra"
task :cassandra => [:java, File.join(CASSANDRA_HOME, 'server'), File.join(CASSANDRA_HOME, 'test', 'data')] do
env = ""
if !ENV["CASSANDRA_INCLUDE"]
- env << "CASSANDRA_INCLUDE=#{Dir.pwd}/conf/cassandra.in.sh "
+ env << "CASSANDRA_INCLUDE=#{File.expand_path(Dir.pwd)}/conf/cassandra.in.sh "
env << "CASSANDRA_HOME=#{CASSANDRA_HOME}/server "
- env << "CASSANDRA_CONF=#{Dir.pwd}/conf"
+ env << "CASSANDRA_CONF=#{File.expand_path(Dir.pwd)}/conf"
end
Dir.chdir(File.join(CASSANDRA_HOME, 'server')) do
@@ -39,7 +39,10 @@ end
file File.join(CASSANDRA_HOME, 'server') => File.join(CASSANDRA_HOME, DIST_FILE) do
Dir.chdir(CASSANDRA_HOME) do
sh "tar xzvf #{DIST_FILE}"
- sh "mv #{DIST_FILE.split('.')[0..2].join('.')} server"
+ sh "mv #{DIST_FILE.split('.')[0..2].join('.').sub('-bin', '')} server"
+ Dir.chdir('server') do
+ sh "ant ivy-retrieve"
+ end
end
end
View
18 conf/cassandra.in.sh
@@ -18,34 +18,30 @@
CASSANDRA_CONF=$CASSANDRA_CONF
# This can be the path to a jar file, or a directory containing the
-# compiled classes.
+# compiled classes. NOTE: This isn't needed by the startup script,
+# it's just used here in constructing the classpath.
cassandra_bin=$CASSANDRA_HOME/build/classes
# The java classpath (required)
-CLASSPATH=$CASSANDRA_CONF:$cassandra_bin
+CLASSPATH=$CASSANDRA_CONF:$CASSANDRA_BIN
-for jar in $CASSANDRA_HOME/lib/*.jar; do
+for jar in $CASSANDRA_HOME/lib/*.jar $CASSANDRA_HOME/build/lib/jars/*.jar; do
CLASSPATH=$CLASSPATH:$jar
done
-echo "CASSANDRA_HOME: $CASSANDRA_HOME"
-echo "CASSANDRA_CONF: $CASSANDRA_CONF"
-
# Arguments to pass to the JVM
JVM_OPTS=" \
-ea \
- -Xdebug \
- -Xrunjdwp:transport=dt_socket,server=y,address=8888,suspend=n \
- -Xms512M \
+ -Xms128M \
-Xmx1G \
- -XX:SurvivorRatio=8 \
-XX:TargetSurvivorRatio=90 \
-XX:+AggressiveOpts \
-XX:+UseParNewGC \
-XX:+UseConcMarkSweepGC \
- -XX:CMSInitiatingOccupancyFraction=1 \
-XX:+CMSParallelRemarkEnabled \
-XX:+HeapDumpOnOutOfMemoryError \
+ -XX:SurvivorRatio=128 \
+ -XX:MaxTenuringThreshold=0 \
-Dcom.sun.management.jmxremote.port=8080 \
-Dcom.sun.management.jmxremote.ssl=false \
-Dcom.sun.management.jmxremote.authenticate=false"
View
537 conf/storage-conf.xml
@@ -7,232 +7,333 @@
~ "License"); you may not use this file except in compliance
~ with the License. You may obtain a copy of the License at
~
- ~ http:/www.apache.org/licenses/LICENSE-2.0
+ ~ http://www.apache.org/licenses/LICENSE-2.0
~
~ Unless required by applicable law or agreed to in writing,
~ software distributed under the License is distributed on an
~ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
~ KIND, either express or implied. See the License for the
~ specific language governing permissions and limitations
~ under the License.
- -->
+-->
<Storage>
- <!--======================================================================-->
- <!-- Basic Configuration -->
- <!--======================================================================-->
- <ClusterName>Test</ClusterName>
-
- <!-- Tables and ColumnFamilies
- Think of a table as a namespace, not a relational table.
- (ColumnFamilies are closer in meaning to those.)
-
- There is an implicit table named 'system' for Cassandra internals.
- -->
- <Keyspaces>
- <Keyspace Name="Twitter">
- <KeysCachedFraction>0.01</KeysCachedFraction>
- <ColumnFamily CompareWith="UTF8Type" Name="Users" />
- <ColumnFamily CompareWith="UTF8Type" Name="UserAudits" />
- <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="UserRelationships" />
- <ColumnFamily CompareWith="UTF8Type" Name="Usernames" />
- <ColumnFamily CompareWith="UTF8Type" Name="Statuses" />
- <ColumnFamily CompareWith="UTF8Type" Name="StatusAudits" />
- <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="StatusRelationships" />
- <ColumnFamily CompareWith="UTF8Type" ColumnType="Super" Name="Index" />
- </Keyspace>
-
- <Keyspace Name="Multiblog">
- <KeysCachedFraction>0.01</KeysCachedFraction>
- <ColumnFamily CompareWith="TimeUUIDType" Name="Blogs"/>
- <ColumnFamily CompareWith="TimeUUIDType" Name="Comments"/>
- </Keyspace>
+ <!--======================================================================-->
+ <!-- Basic Configuration -->
+ <!--======================================================================-->
- <Keyspace Name="MultiblogLong">
- <KeysCachedFraction>0.01</KeysCachedFraction>
- <ColumnFamily CompareWith="LongType" Name="Blogs"/>
- <ColumnFamily CompareWith="LongType" Name="Comments"/>
- </Keyspace>
- <Keyspace Name="CassandraObject">
+ <!--
+ ~ The name of this cluster. This is mainly used to prevent machines in
+ ~ one logical cluster from joining another.
+ -->
+ <ClusterName>Test</ClusterName>
+
+ <!--
+ ~ Turn on to make new [non-seed] nodes automatically migrate the right data
+ ~ to themselves. (If no InitialToken is specified, they will pick one
+ ~ such that they will get half the range of the most-loaded node.)
+ ~ If a node starts up without bootstrapping, it will mark itself bootstrapped
+ ~ so that you can't subsequently accidently bootstrap a node with
+ ~ data on it. (You can reset this by wiping your data and commitlog
+ ~ directories.)
+ ~
+ ~ Off by default so that new clusters and upgraders from 0.4 don't
+ ~ bootstrap immediately. You should turn this on when you start adding
+ ~ new nodes to a cluster that already has data on it. (If you are upgrading
+ ~ from 0.4, start your cluster with it off once before changing it to true.
+ ~ Otherwise, no data will be lost but you will incur a lot of unnecessary
+ ~ I/O before your cluster starts up.)
+ -->
+ <AutoBootstrap>false</AutoBootstrap>
+
+ <!--
+ ~ Keyspaces and ColumnFamilies:
+ ~ A ColumnFamily is the Cassandra concept closest to a relational
+ ~ table. Keyspaces are separate groups of ColumnFamilies. Except in
+ ~ very unusual circumstances you will have one Keyspace per application.
+
+ ~ There is an implicit keyspace named 'system' for Cassandra internals.
+ -->
+ <Keyspaces>
+ <Keyspace Name="Twitter">
<KeysCachedFraction>0.01</KeysCachedFraction>
- <ColumnFamily CompareWith="UTF8Type" Name="Customers" />
- <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="CustomerRelationships" />
- <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="CustomersByLastName" />
- <ColumnFamily CompareWith="UTF8Type" Name="Invoices" />
- <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="InvoiceRelationships" />
- <ColumnFamily CompareWith="UTF8Type" Name="InvoicesByNumber" />
- <ColumnFamily CompareWith="UTF8Type" Name="Payments" />
- <ColumnFamily CompareWith="UTF8Type" Name="Appointments" />
- </Keyspace>
- </Keyspaces>
-
- <!-- Partitioner: any IPartitioner may be used, including your own
- as long as it is on the classpath. Out of the box,
- Cassandra provides
- org.apache.cassandra.dht.RandomPartitioner and
- org.apache.cassandra.dht.OrderPreservingPartitioner.
- Range queries require using OrderPreservingPartitioner or a subclass.
-
- Achtung! Changing this parameter requires wiping your data directories,
- since the partitioner can modify the sstable on-disk format.
- -->
- <Partitioner>org.apache.cassandra.dht.OrderPreservingPartitioner</Partitioner>
-
- <!-- If you are using the OrderPreservingPartitioner and you know your key
- distribution, you can specify the token for this node to use.
- (Keys are sent to the node with the "closest" token, so distributing
- your tokens equally along the key distribution space will spread
- keys evenly across your cluster.) This setting is only checked the
- first time a node is started.
-
- This can also be useful with RandomPartitioner to force equal
- spacing of tokens around the hash space, especially for
- clusters with a small number of nodes. -->
- <InitialToken></InitialToken>
-
-
- <!-- EndPointSnitch: Setting this to the class that implements IEndPointSnitch
- which will see if two endpoints are in the same data center or on the same rack.
- Out of the box, Cassandra provides
- org.apache.cassandra.locator.EndPointSnitch
- -->
- <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
-
- <!-- Strategy: Setting this to the class that implements IReplicaPlacementStrategy
- will change the way the node picker works.
- Out of the box, Cassandra provides
- org.apache.cassandra.locator.RackUnawareStrategy
- org.apache.cassandra.locator.RackAwareStrategy
- (place one replica in a different datacenter, and the
- others on different racks in the same one.)
- -->
- <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
-
- <!-- Number of replicas of the data-->
- <ReplicationFactor>1</ReplicationFactor>
-
- <!-- Directories: Specify where Cassandra should store different data on disk
- Keep the data disks and the CommitLog disks separate for best performance
- -->
- <CommitLogDirectory>data/commitlog</CommitLogDirectory>
- <DataFileDirectories>
- <DataFileDirectory>data/data</DataFileDirectory>
- </DataFileDirectories>
- <CalloutLocation>data/callouts</CalloutLocation>
- <BootstrapFileDirectory>data/bootstrap</BootstrapFileDirectory>
- <StagingFileDirectory>data/staging</StagingFileDirectory>
-
- <!-- Addresses of hosts that are deemed contact points. Cassandra nodes use
- this list of hosts to find each other and learn the topology of the ring.
- You must change this if you are running multiple nodes!
- -->
- <Seeds>
- <Seed>127.0.0.1</Seed>
- </Seeds>
-
-
- <!-- Miscellaneous -->
-
- <!-- time to wait for a reply from other nodes before failing the command -->
- <RpcTimeoutInMillis>5000</RpcTimeoutInMillis>
- <!-- size to allow commitlog to grow to before creating a new segment -->
- <CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
-
-
- <!-- Local hosts and ports -->
-
- <!-- Address to bind to and tell other nodes to connect to.
- You _must_ change this if you want multiple nodes to be able
- to communicate!
-
- Leaving it blank leaves it up to InetAddress.getLocalHost().
- This will always do the Right Thing *if* the node is properly
- configured (hostname, name resolution, etc), and the Right
- Thing is to use the address associated with the hostname (it
- might not be). -->
- <ListenAddress>localhost</ListenAddress>
- <!-- TCP port, for commands and data -->
- <StoragePort>7000</StoragePort>
- <!-- UDP port, for membership communications (gossip) -->
- <ControlPort>7001</ControlPort>
-
- <!-- The address to bind the Thrift RPC service to. Unlike
- ListenAddress above, you *can* specify 0.0.0.0 here if you want
- Thrift to listen on all interfaces.
-
- Leaving this blank has the same effect it does for ListenAddress,
- (i.e. it will be based on the configured hostname of the node).
- -->
- <ThriftAddress>localhost</ThriftAddress>
- <!-- Thrift RPC port (the port clients connect to). -->
- <ThriftPort>9160</ThriftPort>
-
-
- <!--======================================================================-->
- <!-- Memory, Disk, and Performance -->
- <!--======================================================================-->
-
- <!-- Add column indexes to a row after its contents reach this size -->
- <ColumnIndexSizeInKB>256</ColumnIndexSizeInKB>
-
- <!--
- The maximum amount of data to store in memory before flushing to
- disk. Note: There is one memtable per column family, and this threshold
- is based solely on the amount of data stored, not actual heap memory
- usage (there is some overhead in indexing the columns).
- -->
- <MemtableSizeInMB>32</MemtableSizeInMB>
-
- <!--
- The maximum number of columns in millions to store in memory
- before flushing to disk. This is also a per-memtable setting.
- Use with MemtableSizeInMB to tune memory usage.
- -->
- <MemtableObjectCountInMillions>0.01</MemtableObjectCountInMillions>
-
- <!-- Unlike most systems, in Cassandra writes are faster than
- reads, so you can afford more of those in parallel.
- A good rule of thumb is 2 concurrent reads per processor core.
- You especially want more concurrentwrites if you are using
- CommitLogSync + CommitLogSyncDelay. -->
- <ConcurrentReads>8</ConcurrentReads>
- <ConcurrentWrites>32</ConcurrentWrites>
-
- <!-- CommitLogSync may be either "periodic" or "batch."
- When in batch mode, Cassandra won't ack writes until the commit log
- has been fsynced to disk. It will wait up to CommitLogSyncBatchWindowInMS
- milliseconds for other writes, before performing the sync.
-
- This is less necessary in Cassandra
- than in traditional databases since replication reduces the
- odds of losing data from a failure after writing the log
- entry but before it actually reaches the disk. So the other
- option is "timed," where wirtes may be acked immediately
- and the CommitLog is simply synced every CommitLogSyncPeriodInMS
- milliseconds.
- -->
- <CommitLogSync>periodic</CommitLogSync>
- <!-- Interval at which to perform syncs of the CommitLog in periodic
- mode. Usually the default of 1000ms is fine; increase it
- only if the CommitLog PendingTasks backlog in jmx shows that
- you are frequently scheduling a second sync while the first
- has not yet been processed.
- -->
- <CommitLogSyncPeriodInMS>1000</CommitLogSyncPeriodInMS>
- <!-- Delay (in microseconds) during which additional commit log
- entries may be written before fsync in batch mode. This will increase
- latency slightly, but can vastly improve throughput where
- there are many writers. Set to zero to disable
- (each entry will be synced individually).
- Reasonable values range from a minimal 0.1 to 10 or even more
- if throughput matters more than latency.
- -->
- <!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> -->
-
- <!-- Time to wait before garbage-collection deletion markers.
- Set this to a large enough value that you are confident
- that the deletion marker will be propagated to all replicas
- by the time this many seconds has elapsed, even in the
- face of hardware failures. The default value is ten days.
- -->
- <GCGraceSeconds>864000</GCGraceSeconds>
+ <ColumnFamily CompareWith="UTF8Type" Name="Users" />
+ <ColumnFamily CompareWith="UTF8Type" Name="UserAudits" />
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="UserRelationships" />
+ <ColumnFamily CompareWith="UTF8Type" Name="Usernames" />
+ <ColumnFamily CompareWith="UTF8Type" Name="Statuses" />
+ <ColumnFamily CompareWith="UTF8Type" Name="StatusAudits" />
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="StatusRelationships" />
+ <ColumnFamily CompareWith="UTF8Type" ColumnType="Super" Name="Index" />
+
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
+ <ReplicationFactor>1</ReplicationFactor>
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
+ </Keyspace>
+
+ <Keyspace Name="Multiblog">
+ <KeysCachedFraction>0.01</KeysCachedFraction>
+ <ColumnFamily CompareWith="TimeUUIDType" Name="Blogs"/>
+ <ColumnFamily CompareWith="TimeUUIDType" Name="Comments"/>
+
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
+ <ReplicationFactor>1</ReplicationFactor>
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
+ </Keyspace>
+
+ <Keyspace Name="MultiblogLong">
+ <KeysCachedFraction>0.01</KeysCachedFraction>
+ <ColumnFamily CompareWith="LongType" Name="Blogs"/>
+ <ColumnFamily CompareWith="LongType" Name="Comments"/>
+
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
+ <ReplicationFactor>1</ReplicationFactor>
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
+ </Keyspace>
+
+ <Keyspace Name="CassandraObject">
+ <KeysCachedFraction>0.01</KeysCachedFraction>
+ <ColumnFamily CompareWith="UTF8Type" Name="Customers" />
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="CustomerRelationships" />
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="CustomersByLastName" />
+ <ColumnFamily CompareWith="UTF8Type" Name="Invoices" />
+ <ColumnFamily CompareWith="UTF8Type" CompareSubcolumnsWith="TimeUUIDType" ColumnType="Super" Name="InvoiceRelationships" />
+ <ColumnFamily CompareWith="UTF8Type" Name="InvoicesByNumber" />
+ <ColumnFamily CompareWith="UTF8Type" Name="Payments" />
+ <ColumnFamily CompareWith="UTF8Type" Name="Appointments" />
+
+ <ReplicaPlacementStrategy>org.apache.cassandra.locator.RackUnawareStrategy</ReplicaPlacementStrategy>
+ <ReplicationFactor>1</ReplicationFactor>
+ <EndPointSnitch>org.apache.cassandra.locator.EndPointSnitch</EndPointSnitch>
+ </Keyspace>
+ </Keyspaces>
+
+ <!--
+ ~ Authenticator: any IAuthenticator may be used, including your own as long
+ ~ as it is on the classpath. Out of the box, Cassandra provides
+ ~ org.apache.cassandra.auth.AllowAllAuthenticator and,
+ ~ org.apache.cassandra.auth.SimpleAuthenticator
+ ~ (SimpleAuthenticator uses access.properties and passwd.properties by
+ ~ default).
+ ~
+ ~ If you don't specify an authenticator, AllowAllAuthenticator is used.
+ -->
+ <Authenticator>org.apache.cassandra.auth.AllowAllAuthenticator</Authenticator>
+
+ <!--
+ ~ Partitioner: any IPartitioner may be used, including your own as long
+ ~ as it is on the classpath. Out of the box, Cassandra provides
+ ~ org.apache.cassandra.dht.RandomPartitioner,
+ ~ org.apache.cassandra.dht.OrderPreservingPartitioner, and
+ ~ org.apache.cassandra.dht.CollatingOrderPreservingPartitioner.
+ ~ (CollatingOPP colates according to EN,US rules, not naive byte
+ ~ ordering. Use this as an example if you need locale-aware collation.)
+ ~ Range queries require using an order-preserving partitioner.
+ ~
+ ~ Achtung! Changing this parameter requires wiping your data
+ ~ directories, since the partitioner can modify the sstable on-disk
+ ~ format.
+ -->
+ <Partitioner>org.apache.cassandra.dht.RandomPartitioner</Partitioner>
+
+ <!--
+ ~ If you are using an order-preserving partitioner and you know your key
+ ~ distribution, you can specify the token for this node to use. (Keys
+ ~ are sent to the node with the "closest" token, so distributing your
+ ~ tokens equally along the key distribution space will spread keys
+ ~ evenly across your cluster.) This setting is only checked the first
+ ~ time a node is started.
+
+ ~ This can also be useful with RandomPartitioner to force equal spacing
+ ~ of tokens around the hash space, especially for clusters with a small
+ ~ number of nodes.
+ -->
+ <InitialToken></InitialToken>
+
+ <!--
+ ~ Directories: Specify where Cassandra should store different data on
+ ~ disk. Keep the data disks and the CommitLog disks separate for best
+ ~ performance
+ -->
+ <CommitLogDirectory>/var/lib/cassandra/commitlog</CommitLogDirectory>
+ <DataFileDirectories>
+ <DataFileDirectory>/var/lib/cassandra/data</DataFileDirectory>
+ </DataFileDirectories>
+ <CalloutLocation>/var/lib/cassandra/callouts</CalloutLocation>
+ <StagingFileDirectory>/var/lib/cassandra/staging</StagingFileDirectory>
+
+
+ <!--
+ ~ Addresses of hosts that are deemed contact points. Cassandra nodes
+ ~ use this list of hosts to find each other and learn the topology of
+ ~ the ring. You must change this if you are running multiple nodes!
+ -->
+ <Seeds>
+ <Seed>127.0.0.1</Seed>
+ </Seeds>
+
+
+ <!-- Miscellaneous -->
+
+ <!-- Time to wait for a reply from other nodes before failing the command -->
+ <RpcTimeoutInMillis>5000</RpcTimeoutInMillis>
+ <!-- Size to allow commitlog to grow to before creating a new segment -->
+ <CommitLogRotationThresholdInMB>128</CommitLogRotationThresholdInMB>
+
+
+ <!-- Local hosts and ports -->
+
+ <!--
+ ~ Address to bind to and tell other nodes to connect to. You _must_
+ ~ change this if you want multiple nodes to be able to communicate!
+ ~
+ ~ Leaving it blank leaves it up to InetAddress.getLocalHost(). This
+ ~ will always do the Right Thing *if* the node is properly configured
+ ~ (hostname, name resolution, etc), and the Right Thing is to use the
+ ~ address associated with the hostname (it might not be).
+ -->
+ <ListenAddress>localhost</ListenAddress>
+ <!-- internal communications port -->
+ <StoragePort>7000</StoragePort>
+
+ <!--
+ ~ The address to bind the Thrift RPC service to. Unlike ListenAddress
+ ~ above, you *can* specify 0.0.0.0 here if you want Thrift to listen on
+ ~ all interfaces.
+ ~
+ ~ Leaving this blank has the same effect it does for ListenAddress,
+ ~ (i.e. it will be based on the configured hostname of the node).
+ -->
+ <ThriftAddress>localhost</ThriftAddress>
+ <!-- Thrift RPC port (the port clients connect to). -->
+ <ThriftPort>9160</ThriftPort>
+ <!--
+ ~ Whether or not to use a framed transport for Thrift. If this option
+ ~ is set to true then you must also use a framed transport on the
+ ~ client-side, (framed and non-framed transports are not compatible).
+ -->
+ <ThriftFramedTransport>false</ThriftFramedTransport>
+
+
+ <!--======================================================================-->
+ <!-- Memory, Disk, and Performance -->
+ <!--======================================================================-->
+
+ <!--
+ ~ Access mode. mmapped i/o is substantially faster, but only practical on
+ ~ a 64bit machine (which notably does not include EC2 "small" instances)
+ ~ or relatively small datasets. "auto", the safe choice, will enable
+ ~ mmapping on a 64bit JVM. Other values are "mmap", "mmap_index_only"
+ ~ (which may allow you to get part of the benefits of mmap on a 32bit
+ ~ machine by mmapping only index files) and "standard".
+ ~ (The buffer size settings that follow only apply to standard,
+ ~ non-mmapped i/o.)
+ -->
+ <DiskAccessMode>auto</DiskAccessMode>
+
+ <!--
+ ~ Buffer size to use when performing contiguous column slices. Increase
+ ~ this to the size of the column slices you typically perform.
+ ~ (Name-based queries are performed with a buffer size of
+ ~ ColumnIndexSizeInKB.)
+ -->
+ <SlicedBufferSizeInKB>64</SlicedBufferSizeInKB>
+
+ <!--
+ ~ Buffer size to use when flushing memtables to disk. (Only one
+ ~ memtable is ever flushed at a time.) Increase (decrease) the index
+ ~ buffer size relative to the data buffer if you have few (many)
+ ~ columns per key. Bigger is only better _if_ your memtables get large
+ ~ enough to use the space. (Check in your data directory after your
+ ~ app has been running long enough.) -->
+ <FlushDataBufferSizeInMB>32</FlushDataBufferSizeInMB>
+ <FlushIndexBufferSizeInMB>8</FlushIndexBufferSizeInMB>
+
+ <!--
+ ~ Add column indexes to a row after its contents reach this size.
+ ~ Increase if your column values are large, or if you have a very large
+ ~ number of columns. The competing causes are, Cassandra has to
+ ~ deserialize this much of the row to read a single column, so you want
+ ~ it to be small - at least if you do many partial-row reads - but all
+ ~ the index data is read for each access, so you don't want to generate
+ ~ that wastefully either.
+ -->
+ <ColumnIndexSizeInKB>64</ColumnIndexSizeInKB>
+
+ <!--
+ ~ Flush memtable after this much data has been inserted, including
+ ~ overwritten data. There is one memtable per column family, and
+ ~ this threshold is based solely on the amount of data stored, not
+ ~ actual heap memory usage (there is some overhead in indexing the
+ ~ columns).
+ -->
+ <MemtableThroughputInMB>64</MemtableThroughputInMB>
+ <!--
+ ~ Throughput setting for Binary Memtables. Typically these are
+ ~ used for bulk load so you want them to be larger.
+ -->
+ <BinaryMemtableThroughputInMB>256</BinaryMemtableThroughputInMB>
+ <!--
+ ~ The maximum number of columns in millions to store in memory per
+ ~ ColumnFamily before flushing to disk. This is also a per-memtable
+ ~ setting. Use with MemtableThroughputInMB to tune memory usage.
+ -->
+ <MemtableOperationsInMillions>0.3</MemtableOperationsInMillions>
+ <!--
+ ~ The maximum time to leave a dirty memtable unflushed.
+ ~ (While any affected columnfamilies have unflushed data from a
+ ~ commit log segment, that segment cannot be deleted.)
+ ~ This needs to be large enough that it won't cause a flush storm
+ ~ of all your memtables flushing at once because none has hit
+ ~ the size or count thresholds yet. For production, a larger
+ ~ value such as 1440 is recommended.
+ -->
+ <MemtableFlushAfterMinutes>60</MemtableFlushAfterMinutes>
+
+ <!--
+ ~ Unlike most systems, in Cassandra writes are faster than reads, so
+ ~ you can afford more of those in parallel. A good rule of thumb is 2
+ ~ concurrent reads per processor core. Increase ConcurrentWrites to
+ ~ the number of clients writing at once if you enable CommitLogSync +
+ ~ CommitLogSyncDelay. -->
+ <ConcurrentReads>8</ConcurrentReads>
+ <ConcurrentWrites>32</ConcurrentWrites>
+
+ <!--
+ ~ CommitLogSync may be either "periodic" or "batch." When in batch
+ ~ mode, Cassandra won't ack writes until the commit log has been
+ ~ fsynced to disk. It will wait up to CommitLogSyncBatchWindowInMS
+ ~ milliseconds for other writes, before performing the sync.
+
+ ~ This is less necessary in Cassandra than in traditional databases
+ ~ since replication reduces the odds of losing data from a failure
+ ~ after writing the log entry but before it actually reaches the disk.
+ ~ So the other option is "timed," where writes may be acked immediately
+ ~ and the CommitLog is simply synced every CommitLogSyncPeriodInMS
+ ~ milliseconds.
+ -->
+ <CommitLogSync>periodic</CommitLogSync>
+ <!--
+ ~ Interval at which to perform syncs of the CommitLog in periodic mode.
+ ~ Usually the default of 10000ms is fine; increase it if your i/o
+ ~ load is such that syncs are taking excessively long times.
+ -->
+ <CommitLogSyncPeriodInMS>10000</CommitLogSyncPeriodInMS>
+ <!--
+ ~ Delay (in milliseconds) during which additional commit log entries
+ ~ may be written before fsync in batch mode. This will increase
+ ~ latency slightly, but can vastly improve throughput where there are
+ ~ many writers. Set to zero to disable (each entry will be synced
+ ~ individually). Reasonable values range from a minimal 0.1 to 10 or
+ ~ even more if throughput matters more than latency.
+ -->
+ <!-- <CommitLogSyncBatchWindowInMS>1</CommitLogSyncBatchWindowInMS> -->
+
+ <!--
+ ~ Time to wait before garbage-collection deletion markers. Set this to
+ ~ a large enough value that you are confident that the deletion marker
+ ~ will be propagated to all replicas by the time this many seconds has
+ ~ elapsed, even in the face of hardware failures. The default value is
+ ~ ten days.
+ -->
+ <GCGraceSeconds>864000</GCGraceSeconds>
</Storage>

0 comments on commit 9584bdf

Please sign in to comment.
Something went wrong with that request. Please try again.