In [None]:
import subprocess
import boto3

ec2 = boto3.resource('ec2')

In [None]:
sample_sheet = pd.read_csv('REDACTED', index_col='Sample_ID')

In [None]:
samples_to_run = sample_sheet.index

In [None]:
data_prefix = 'REDACTED'

In [None]:
print(f'Launching {len(samples_to_run)} new instances')

new_instances = ec2.create_instances(
    ImageId='REDACTED',
    KeyName='REDACTED',
    SecurityGroups=['REDACTED'],
    InstanceType='i3.2xlarge',
    MinCount=len(samples_to_run),
    MaxCount=len(samples_to_run),
    InstanceInitiatedShutdownBehavior='terminate',
)

In [None]:
for instance in new_instances:
    instance.wait_until_running()

In [None]:
# Attach names to new instances.

for instance, sample_name in zip(new_instances, samples_to_run):
    ec2.create_tags(
        Resources=[instance.id],
        Tags=[
            {
                'Key': 'Name',
                'Value': sample_name,
            },
        ],
    )

In [None]:
index_to_fn = {
    'mm10': '/home/ubuntu/refdata-cellranger-mm10-1.2.0',
    'dummy': '/home/ubuntu/refdata-cellranger-dummy',
}           

In [None]:
for sample_name in samples_to_run:
    print(sample_name)
    
    sample_row = sample_sheet.loc[sample_name]
    output_name = sample_row['Output name']
    index_fn = index_to_fn[sample_row['index']]

    # Initial instance object doesn't have public_dns_name, look up a new one.

    sample_name_filter = [
        {
            'Name': 'tag:Name',
            'Values': [
                sample_name,
            ],
        },
    ]

    instance = list(ec2.instances.filter(Filters=sample_name_filter))[0]

    output = f'''\
function mount_data_volume {{
    sudo mkfs -t xfs /dev/nvme0n1
    sudo mount /dev/nvme0n1 /data
    sudo chmod a+rw /data
}}

function download_data {{
    aws s3 cp s3://{data_prefix} /data --recursive --exclude "*" --include "{sample_name}_*"
}}

function run_cellranger {{
    cd /data
    ~/cellranger-2.1.1/cellranger count \\
        --id={output_name} \\
        --transcriptome={index_fn} \\
        --sample {sample_name} \\
        --fastqs=/data \\
        --chemistry=SC3Pv2
}}

function upload_results {{
    aws s3 sync /data/{output_name}/outs s3://{data_prefix}/cellranger_output/{output_name}/outs
}}

function send_email {{
    aws --region us-west-2 ses send-email \\
        --from REDACTED \\
        --to REDACTED \\
        --subject "$1" \\
        --text "$2"
}}

export AWS_ACCESS_KEY_ID=REDACTED
export AWS_SECRET_ACCESS_KEY=REDACTED

mount_data_volume || {{ send_email "{sample_name} error" "{sample_name} mount data failed" ; exit 1; }}

download_data || {{ send_email "{sample_name} error" "{sample_name} download failed" ; exit 1; }}

run_cellranger || {{ send_email "{sample_name} error" "{sample_name} cellranger failed" ; exit 1; }}

upload_results || {{ send_email "{sample_name} error" "{sample_name} upload failed" ; exit 1; }}

send_email "{sample_name} finished" "{sample_name} has finished processing"

sudo shutdown now
'''
                                                       
    script_fn = Path('REDACTED')

    with open(script_fn, 'w') as fh:
        fh.write(output)

    credentials = '-o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -i REDACTED'
    
    for fn in [script_fn,
              ]:
        print('\t', fn)
        subprocess.run(f'scp {credentials} {fn} ubuntu@{instance.public_dns_name}:~/{fn.name}',
                       shell=True,
                       check=True,
                      )
        
    subprocess.run(f'ssh {credentials} ubuntu@{instance.public_dns_name} "byobu new-session -d -s cellranger"', shell=True, check=True)
    subprocess.run(f'ssh {credentials} ubuntu@{instance.public_dns_name} \'byobu send-keys "bash run_cellranger.sh" C-m\'', shell=True, check=True)

In [None]:
def tags_to_dict(tags):
    return {d['Key']: d['Value'] for d in tags}

with open('REDACTED', 'w') as config_fh:
    for instance in ec2.instances.all():
        print(instance.id, instance.tags, instance.public_dns_name)
        if instance.tags is not None:
            sample_name = tags_to_dict(instance.tags)['Name']
            config_fh.write(f'''\
Host {sample_name}
    Hostname {instance.public_dns_name}
    User ubuntu
    IdentityFile REDACTED
''')