Skip to content

Commit

Permalink
datatstax_pssh and S3 store/restore capabilities
Browse files Browse the repository at this point in the history
  • Loading branch information
joaquincasares committed Jun 15, 2012
1 parent 7eb6036 commit 0380747
Show file tree
Hide file tree
Showing 9 changed files with 302 additions and 2 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Expand Up @@ -6,6 +6,8 @@ Changes
and smoke testing scripts.
* 1.17-0: Notify users when `ssh-keyscan` fails and workaround
OpsCenter 2.1.0 bug with installing agents.
* 1.18-0: Added datastax_pssh and s3 store/restore functionality.
See [New Features](https://github.com/joaquincasares/cassandralauncher/tree/master/docs/new_features.md) for details.

Automated Features
==================
Expand Down
46 changes: 45 additions & 1 deletion cassandralauncher/cassandralauncher.py
Expand Up @@ -110,8 +110,14 @@ def install_datastax_ssh(user):
# Send files to the cluster
for ip in public_ips:
scp_send(user, ip, tmp_file.name, 'nodelist')

scp_send(user, ip, datastax_ssh)
exe_ssh_cmd(create_ssh_cmd(user, ip), 'chmod +x datastax_ssh; sudo mv datastax_ssh /usr/bin/datastax_ssh; sudo mkdir /etc/cassandralauncher; sudo mv nodelist /etc/cassandralauncher/nodelist')
scp_send(user, ip, datastax_ssh.replace('datastax_ssh', 'datastax_pssh'))

exe_ssh_cmd(create_ssh_cmd(user, ip), 'chmod +x datastax_ssh; sudo mv datastax_ssh /usr/bin/')
exe_ssh_cmd(create_ssh_cmd(user, ip), 'chmod +x datastax_pssh; sudo mv datastax_pssh /usr/bin/')

exe_ssh_cmd(create_ssh_cmd(user, ip), 'sudo mkdir /etc/cassandralauncher; sudo mv nodelist /etc/cassandralauncher/nodelist')

def upload_smoke_tests(user):
# Find the datastax_ssh original file
Expand Down Expand Up @@ -145,6 +151,35 @@ def install_hosts_appending(user):
exe_ssh_cmd(create_ssh_cmd(user, ip), "sudo su -c 'cat hosts_file >> /etc/hosts'")
exe_ssh_cmd(create_ssh_cmd(user, ip), 'rm hosts_file')

def setup_s3_store_and_restore(user):
# Look for the configuration file in the same directory as the launcher
s3cfg_default = os.path.join(os.path.dirname(__file__), 's3cfg')
if not os.path.exists(s3cfg_default):
# Look for the configuration file in the user's home directory
s3cfg_default = os.path.join(os.path.expanduser('~'), '.s3cfg')
if not os.path.exists(s3cfg_default):
# Look for the configuration file in /etc/clusterlauncher
s3cfg_default = os.path.join('/etc', 'cassandralauncher', 's3cfg')
with open(s3cfg_default) as f:
s3cfg = f.read()

s3cfg = s3cfg.replace('$ACCESS_KEY', check_cascading_options('aws_access_key_id'))
s3cfg = s3cfg.replace('$SECRET_KEY', check_cascading_options('aws_secret_access_key'))

with tempfile.NamedTemporaryFile() as tmp_file:
tmp_file.write(s3cfg)
tmp_file.flush()

for ip in public_ips:
if check_cascading_options('send_s3_credentials', optional=True, ignore_command_line=True):
scp_send(user, ip, tmp_file.name, '.s3cfg')
exe_ssh_cmd(create_ssh_cmd(user, ip), 'chmod 400 .s3cfg')

scp_send(user, ip, os.path.join(os.path.dirname(__file__), 'datastax_s3_store'), 'datastax_s3_store')
scp_send(user, ip, os.path.join(os.path.dirname(__file__), 'datastax_s3_restore'), 'datastax_s3_restore')
exe_ssh_cmd(create_ssh_cmd(user, ip), 'chmod +x datastax_s3*; sudo mv datastax_s3* /usr/bin/')


#################################

#################################
Expand Down Expand Up @@ -367,6 +402,12 @@ def running_log(reservation, demotime):
'Prompt': 'AWS Secret Access Key',
'Help': 'AWS Secret Access Key'
},
'send_s3_credentials': {
'Section': 'S3',
'Prompt': 'Send S3 Credentials',
'Action': 'store_true',
'Help': 'Specify if S3 Credentials get uploaded'
},
'clustername': {
'Section': 'Cassandra',
'Prompt': 'Cluster Name',
Expand Down Expand Up @@ -715,6 +756,9 @@ def main():
print 'Setting up the hosts file for the cluster...'
install_hosts_appending(user)

print 'Setting up datastax_s3_store and datastax_s3_restore capabilities...'
setup_s3_store_and_restore(user)

install_opsc_agents(user)

end_time = int(time.time() - start_time)
Expand Down
6 changes: 6 additions & 0 deletions cassandralauncher/clusterlauncher.conf
Expand Up @@ -30,6 +30,12 @@ pem_home = ~/.ssh
placement = us-east-1c


[S3]
# send_s3_credentials: Send a preconfigured .s3cfg with your aws_access_key_id and aws_secret_access_key properly set under 400 permissions. For use with S3 store/restore capabilities.

send_s3_credentials = False


[Rax]
# rax_user: Found here: https://manage.rackspacecloud.com/APIAccess.do
# rax_api_key: Found here: https://manage.rackspacecloud.com/APIAccess.do
Expand Down
74 changes: 74 additions & 0 deletions cassandralauncher/datastax_pssh
@@ -0,0 +1,74 @@
#!/usr/bin/env python
### Script provided by DataStax.

import os
import random
import subprocess
import shlex
import sys


commands = [
'Enter a custom command...',
'nodetool -h localhost ring',
'datastax_s3_store',
'datastax_s3_restore',
'sudo /etc/init.d/cassandra start',
'sudo /etc/init.d/cassandra stop',
'sudo /etc/init.d/cassandra restart'
]

# Function to execute commands and print traces of the command and output for debugging/logging purposes
def exe(command, log=True):
# Executes command and wait for completion
process = subprocess.Popen(shlex.split(command), stderr=subprocess.PIPE, stdout=subprocess.PIPE)
read = process.communicate()

# Prints output to stdout
print read[0]
print read[1]

# return process
return read

def datastax_ssh(command):
exe('parallel-ssh --hosts /etc/cassandralauncher/nodelist --user ubuntu --print %s' % command)

try:
print "Welcome to DataStax' Parallel SSH Utility!"
print

selection = False
while not selection:
print "Choose a command to run across the cluster:"
for i, command in enumerate(commands):
print " %s. %s" % (i, command)

try:
selection = int(raw_input(""))
print

if selection == 0:
selection = raw_input("Please input your command: ")
print
else:
if selection in [2, 3]:
number = random.randint(0, 100)
if raw_input("Enter the number '%s' to verify you wish to run `%s` clusterwide: " % (number, commands[selection])) != str(number):
sys.exit(1)
else:
print "Performing command: %s..." % commands[selection]

selection = commands[selection]
except KeyboardInterrupt:
raise
except:
print "Invalid selection. Please try again."
print
selection = False

datastax_ssh(selection)
# Catch, log, and display pretty KeyboardInterrupts
except KeyboardInterrupt:
print
pass
58 changes: 58 additions & 0 deletions cassandralauncher/datastax_s3_restore
@@ -0,0 +1,58 @@
#!/usr/bin/env python

import ConfigParser
import os
import re
import shlex
import subprocess
import yaml

configfile = '/home/ubuntu/.s3cfg'

def exe(command, wait=True):
"""Execute a subprocess command"""

# Open a subprocess to run your command
process = subprocess.Popen(shlex.split(str(command)), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if wait:
read = process.communicate()
return read
else:
return process

def s3_restore():
print "Stopping the node..."
exe('sudo service dse stop')
exe('sudo service cassandra stop')

print "Clearing current data..."
exe('sudo rm -rf %s/*' % os.path.join(root_data_dir, 'data'))

print "Downloading data..."
response = exe('sudo s3cmd sync --delete-removed s3://%s-%s/%s/%s/ %s' % (
bucket_name, access_key,
cluster_name, initial_token,
os.path.join(root_data_dir, 'data/')
))
if response[1]:
print response[1]

print "Restarting the node..."
exe('sudo service dse restart')
exe('sudo service cassandra restart')


# Read access_key
config = ConfigParser.RawConfigParser()
config.read(configfile)
access_key = config.get('default', 'access_key')
bucket_name = config.get('datastax', 'bucket_name') if config.has_option('default', 'bucket_name') else 'datastax_s3_storage'
root_data_dir = config.get('datastax', 'root_data_dir') if config.has_option('default', 'root_data_dir') else '/raid0/cassandra'

# Read cluster_name and initial_token
with open('/etc/dse/cassandra/cassandra.yaml') as f:
dataMap = yaml.load(f)
cluster_name = re.sub(r'\W+', '_', dataMap['cluster_name'])
initial_token = dataMap['initial_token']

s3_restore()
58 changes: 58 additions & 0 deletions cassandralauncher/datastax_s3_store
@@ -0,0 +1,58 @@
#!/usr/bin/env python

import ConfigParser
import os
import re
import shlex
import subprocess
import yaml

configfile = '/home/ubuntu/.s3cfg'

def exe(command, wait=True):
"""Execute a subprocess command"""

# Open a subprocess to run your command
process = subprocess.Popen(shlex.split(str(command)), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if wait:
read = process.communicate()
return read
else:
return process

def s3_store():
print "Draining the node..."
exe('nodetool -h localhost drain')


# Create the bucket
exe('s3cmd mb s3://%s-%s' % (bucket_name, access_key))

print "Uploading data..."
response = exe('sudo s3cmd sync --delete-removed %s s3://%s-%s/%s/%s/' % (
os.path.join(root_data_dir, 'data/'),
bucket_name, access_key,
cluster_name, initial_token
))
if response[1]:
print response[1]

print "Stopping the node..."
exe('sudo service dse stop')
exe('sudo service cassandra stop')


# Read access_key
config = ConfigParser.RawConfigParser()
config.read(configfile)
access_key = config.get('default', 'access_key')
bucket_name = config.get('datastax', 'bucket_name') if config.has_option('default', 'bucket_name') else 'datastax_s3_storage'
root_data_dir = config.get('datastax', 'root_data_dir') if config.has_option('default', 'root_data_dir') else '/raid0/cassandra'

# Read cluster_name and initial_token
with open('/etc/dse/cassandra/cassandra.yaml') as f:
dataMap = yaml.load(f)
cluster_name = re.sub(r'\W+', '_', dataMap['cluster_name'])
initial_token = dataMap['initial_token']

s3_store()
39 changes: 39 additions & 0 deletions cassandralauncher/s3cfg
@@ -0,0 +1,39 @@
[default]
access_key = $ACCESS_KEY
acl_public = False
bucket_location = US
cloudfront_host = cloudfront.amazonaws.com
cloudfront_resource = /2008-06-30/distribution
default_mime_type = binary/octet-stream
delete_removed = False
dry_run = False
encoding = UTF-8
encrypt = False
force = False
get_continue = False
gpg_command = /usr/bin/gpg
gpg_decrypt = %(gpg_command)s -d --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s
gpg_encrypt = %(gpg_command)s -c --verbose --no-use-agent --batch --yes --passphrase-fd %(passphrase_fd)s -o %(output_file)s %(input_file)s
gpg_passphrase = $SECRET_KEY
guess_mime_type = True
host_base = s3.amazonaws.com
host_bucket = %(bucket)s.s3.amazonaws.com
human_readable_sizes = False
list_md5 = False
preserve_attrs = True
progress_meter = True
proxy_host =
proxy_port = 0
recursive = False
recv_chunk = 4096
secret_key = $SECRET_KEY
send_chunk = 4096
simpledb_host = sdb.amazonaws.com
skip_existing = False
urlencoding_mode = normal
use_https = False
verbosity = WARNING

[datastax]
root_data_dir = '/raid0/cassandra'
bucket_name = 'datastax_s3_storage'
19 changes: 19 additions & 0 deletions docs/new_features.md
Expand Up @@ -13,3 +13,22 @@ Now you can simply run the DataStax SSH Client by executing: `datastax_ssh`. Thi
## Modified /etc/hosts

Now you can simply run an `ssh node0` or `ssh node1` command and easily jump from node to node from within your cluster.

## DataStax Parallel SSH Client

Now you can simply run the DataStax Parallel SSH Client by executing: `datastax_pssh`. This will connect you to all machines and executing your command of choice on all machines in your cluster in parallel.

## S3 Store and Restore

Now you can use the `datastax_s3_store` and `datastax_s3_restore` commands to simply upload and download your `cassandra/data` directory to and from S3. Think of it as an "experimental EBS functionality for S3."

The files are stored in `s3://datastax_s3_storage-<YOUR_AWS_ACCESS_KEY_ID>/<CLUSTER_NAME>/<NODES_TOKEN>` using the `~/.s3cfg` file.

You may also use `datastax_pssh` command to store and restore an entire cluster.

To automatically preconfigure `s3cmd`, which `datastax_s3_store` and `datastax_s3_restore` rely on, add/change this on your `clusterlauncher.conf`:

[S3]
send_s3_credentials = True

That will send .s3cfg preconfigured with your aws_access_key_id and aws_secret_access_key, properly set under 400 permissions, to your home directory.
2 changes: 1 addition & 1 deletion setup.py
Expand Up @@ -20,7 +20,7 @@
"""

setup(name='CassandraLauncher',
version='1.17-0',
version='1.18-0',
description='Command line utilities for launching Cassandra clusters in EC2',
long_description=long_description,
author='Joaquin Casares',
Expand Down

0 comments on commit 0380747

Please sign in to comment.