# Orchestrating Script on Multiple Virtual Machines

Learning how to run a script on a bunch of virtual machines programatically from a central controller machine. Hypothetically I could use this to divide work for a complex task among multiple machines. I'll be using ssh to communicate with the machines

In [1]:
import paramiko
import time
import codecs
import os
import stat
from random import randint
import concurrent.futures
from linode_api4 import (LinodeClient, StackScript, Instance, Tag, Type, Region)
from os import environ
from dotenv import load_dotenv
load_dotenv()
bufsize = int(2**16)

# Parameters
num_workers = 3
nth_primes = [
    randint(1000, 10000)
    for _ in range(100)
]
print('Nth Primes to Find:', nth_primes)

# Linode client
api_key = environ['LINODE_API_KEY']
linode_client = LinodeClient(api_key)

Nth Primes to Find: [7671, 6514, 7987, 3500, 7774, 8073, 2683, 3629, 6836, 7845, 7743, 5256, 3519, 6918, 2263, 1081, 6627, 5236, 1726, 4010, 4797, 5746, 7292, 8227, 4939, 2883, 8883, 9647, 3108, 4433, 7475, 1207, 9855, 7786, 2605, 7673, 8156, 2145, 1783, 5802, 6911, 2285, 1151, 3485, 8007, 5670, 4207, 4830, 7829, 9291, 6392, 7709, 4685, 7116, 4394, 8094, 7612, 4976, 7744, 3037, 4470, 1904, 5878, 4318, 4237, 3313, 2648, 2676, 1567, 1885, 2301, 5207, 3739, 1940, 3422, 6503, 8330, 4386, 6849, 9731, 8975, 6944, 6559, 8662, 7976, 4729, 5345, 9665, 7194, 9606, 9770, 8097, 2836, 4165, 6252, 1365, 7126, 7500, 5783, 7026]


Can we access linode with the right credentials?

In [2]:
linode_client.account.users()[0]

User: anshulkharbanda

To clean up our workspace, we'll delete old instances

In [3]:
tag = next(tag for tag in linode_client.tags() if tag.label == 'worker')
instances = [obj for obj in tag.objects if isinstance(obj, Instance)]
for instance in instances:
    instance.delete()
print(f'{len(instances)} Instance(s) deleted...')

0 Instance(s) deleted...


We're interested in creating a few small worker nodes in the us-east region. Let's query linode with our requirements

In [4]:
worker_script = linode_client.linode.stackscripts(StackScript.label == 'worker', mine_only=True)[0]
image = next(image for image in worker_script.images if 'ubuntu22.04' in image.id)
linode_type = linode_client.linode.types(Type.label.contains('nanode'))[0]
region = next(region for region in linode_client.regions() if 'us-east' in region.id)
print(worker_script, image, linode_type, region, sep='\n')

StackScript: 1078791
Image: linode/ubuntu22.04
Type: g6-nanode-1
Region: us-east


Now we can create a series of workers using these objects

In [5]:
_passwords = [] # Hopefully we won't need this :-\

# Create the workers
workers = []
for i in range(num_workers):
    # Create an instance on linode for workers
    instance, password = linode_client.linode.instance_create(ltype=linode_type,
                                                              region=region,
                                                              image=image,
                                                              stackscript=worker_script,
                                                              label=f'worker-{i}',
                                                              tags=['worker', 'hobby', 'temp'])
    
    # Save to workers array
    workers.append(instance)
    _passwords.append(password)
    
print(*workers, sep='\n')

# We're gonna wait for a bit for these instances to actually get up and running
print('Waiting for instances to setup...')
time.sleep(140)
print('Aight, we prolly good')

Instance: 39786633
Instance: 39786635
Instance: 39786639
Waiting for instances to setup...
Aight, we prolly good


Connect to server and execute some commands in an interactive shell

In [6]:
commands = [
    'cd workerdir',
    'ls -a']

# Run commands in ssh client
ssh_client = paramiko.client.SSHClient()
ssh_client.load_system_host_keys()
ssh_client.set_missing_host_key_policy(paramiko.client.AutoAddPolicy)
ssh_client.connect(workers[0].ipv4[0], username='worker')
shell = ssh_client.invoke_shell()
time.sleep(0.1)
banner_out = shell.recv(bufsize)
banner_out = codecs.decode(banner_out, 'utf-8')
output = banner_out
for command in commands:
    command_enc = codecs.encode(command + '\n', 'utf-8')
    shell.send(command_enc)
    time.sleep(0.1)
    command_out = shell.recv(bufsize)
    command_out = codecs.decode(command_out, 'utf-8')
    output += command_out
ssh_client.close()

# Get output
print(output)

Welcome to Ubuntu 22.04.1 LTS (GNU/Linux 5.15.0-47-generic x86_64)

 * Documentation:  https://help.ubuntu.com
 * Management:     https://landscape.canonical.com
 * Support:        https://ubuntu.com/advantage

  System information as of Tue Oct 25 06:47:28 AM UTC 2022

  System load:           0.83740234375
  Usage of /:            11.8% of 24.04GB
  Memory usage:          19%
  Swap usage:            0%
  Processes:             102
  Users logged in:       0
  IPv4 address for eth0: 97.107.141.93
  IPv6 address for eth0: 2600:3c03::f03c:93ff:fed9:7e63

50 updates can be applied immediately.
30 of these updates are standard security updates.
To see these additional updates run: apt list --upgradable



The programs included with the Ubuntu system are free software;
the exact distribution terms for each program are described in the
individual files in /usr/share/doc/*/copyright.

Ubuntu comes with ABSOLUTELY NO WARRANTY, to the extent permitted by
applicable law.

To run a command as a

Transfer a script to the worker

In [7]:
ssh_client = paramiko.client.SSHClient()
ssh_client.load_system_host_keys()
ssh_client.set_missing_host_key_policy(paramiko.client.AutoAddPolicy)
ssh_client.connect(workers[0].ipv4[0], username='worker')
sftp = ssh_client.open_sftp()
if 'workerdir' not in sftp.listdir():
    stats = sftp.mkdir('workerdir')
stats = sftp.put('worker-script.py', 'workerdir/worker-script.py')
print(stats)
ssh_client.close()

-rw-rw-r--   1 1000     1000         1755 25 Oct 02:47 ?


Run script on worker

In [8]:
# Random nth prime number to find
n = randint(2100, 2800)

# Run on worker
ssh_client = paramiko.client.SSHClient()
ssh_client.load_system_host_keys()
ssh_client.set_missing_host_key_policy(paramiko.client.AutoAddPolicy)
ssh_client.connect(workers[0].ipv4[0], username='worker')
stdin, stdout, stderr = ssh_client.exec_command(f'python workerdir/worker-script.py {n} > workerdir/result.txt')
stdout.channel.recv_exit_status()
sftp = ssh_client.open_sftp()
with sftp.open('workerdir/result.txt', 'r') as f:
    data = f.read()
    data = codecs.decode(data, 'utf-8')
ssh_client.close()
number = int(data)

# Return number
print(number)

20921


Now do this concurrently

In [11]:
# Partition array
nth_prime_partition = []
for i, worker in enumerate(workers):
    s = i*len(nth_primes)//len(workers)
    e = (i + 1)*len(nth_primes)//len(workers)
    nth_prime_partition.append((worker, nth_primes[s:e]))
print('Partitions:', *nth_prime_partition, sep='\n', end='\n\n')

def generate_prime_number(worker, nth_primes):
    """
    Generate prime numbers for worker
    
    :worker:     ip address of worker
    :nth_primes: list of n'th prime numbers to find
    """
    ssh_client = paramiko.client.SSHClient()
    ssh_client.load_system_host_keys()
    ssh_client.set_missing_host_key_policy(paramiko.client.AutoAddPolicy)
    ssh_client.connect(worker.ipv4[0], username='worker')
    sftp = ssh_client.open_sftp()
    if 'workerdir' not in sftp.listdir():
        stats = sftp.mkdir('workerdir')
    stats = sftp.put('worker-script.py', 'workerdir/worker-script.py')
    sftp.close()
    stdin, stdout, stderr = ssh_client.exec_command('rm -f workerdir/result.txt')
    stdout.channel.recv_exit_status()
    nstring = ' '.join(map(str, nth_primes))
    stdin, stdout, stderr = ssh_client.exec_command(f'python workerdir/worker-script.py {nstring} >> workerdir/result.txt')
    stdout.channel.recv_exit_status()
    sftp = ssh_client.open_sftp()
    with sftp.open('workerdir/result.txt', 'r') as f:
        data = f.read()
        data = codecs.decode(data, 'utf-8')
    ssh_client.close()
    return [ int(num) for num in data.split('\n') if num ]


# Run commands on workers using separate threads
with concurrent.futures.ThreadPoolExecutor(max_workers=len(workers)) as exe:
    futures_with_workers = { 
        exe.submit(generate_prime_number, worker, nth_primes) : (worker, nth_primes) 
        for worker, nth_primes in nth_prime_partition }
    for future in concurrent.futures.as_completed(futures_with_workers):
        worker, nth_primes = futures_with_workers[future]
        try:
            primes = future.result()
            print('Worker', worker.label, 'generated:')
            for nth, prime in zip(nth_primes, primes):
                print(f'\t{nth} = {prime}')
        except Exception as exc:
            print('Worker', worker.label, 'resulted in error:', exc)

Partitions:
(Instance: 39786633, [7786, 2605, 7673, 8156, 2145, 1783, 5802, 6911, 2285, 1151, 3485])
(Instance: 39786635, [8007, 5670, 4207, 4830, 7829, 9291, 6392, 7709, 4685, 7116, 4394])
(Instance: 39786639, [8094, 7612, 4976, 7744, 3037, 4470, 1904, 5878, 4318, 4237, 3313])

Worker worker-0 generated:
	7786 = 79433
	2605 = 23369
	7673 = 78139
	8156 = 83563
	2145 = 18797
	1783 = 15271
	5802 = 57203
	6911 = 69697
	2285 = 20201
	1151 = 9293
	3485 = 32479
Worker worker-1 generated:
	8007 = 81899
	5670 = 55849
	4207 = 40037
	4830 = 46747
	7829 = 79907
	9291 = 96469
	6392 = 63737
	7709 = 78569
	4685 = 45131
	7116 = 71887
	4394 = 42017
Worker worker-2 generated:
	8094 = 82811
	7612 = 77489
	4976 = 48371
	7744 = 78977
	3037 = 27809
	4470 = 42743
	1904 = 16427
	5878 = 58049
	4318 = 41233
	4237 = 40423
	3313 = 30707


Tear everything down

In [12]:
tag = next(tag for tag in linode_client.tags() if tag.label == 'worker')
instances = [obj for obj in tag.objects if isinstance(obj, Instance)]
for instance in instances:
    instance.delete()
print(f'{len(instances)} Instance(s) deleted...')

3 Instance(s) deleted...
