# Disk performance test
- this notebook will be used to find the best configuration for ollama model storage
- you will find a separate toml file (to configure some parameters) and a auto generated md file in the same directory like this notebook
- the goal is to explore the best possible disk configuration to store ollama models 

In [35]:
import os
import subprocess
import toml
import json
import paramiko
import pandas as pd

In [36]:
config_toml = toml.load('performance-test.toml')

In [37]:
# load ssh key
k = paramiko.ed25519key.Ed25519Key(filename='/home/tom/.ssh/test')

# create ssh client
ssh = paramiko.SSHClient()

# connect to the server
ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
ssh.connect(hostname='141.33.165.24', username='root', pkey=k)

In [38]:
def get_pase_data(df, kind, raid, fs, bs, sz):
    print(f'Test with: k-{kind} r-{raid} f-{fs} b-{bs} s-{sz}')
    for model in config_toml['Model']['models']:
        print(f'Start test for {model}.')
        ssh.exec_command(f'echo 3 > /proc/sys/vm/drop_caches')[2].channel.recv_exit_status()
        ssh.exec_command(f'ollama pull {model}')[2].channel.recv_exit_status()
        print('Pulled model: ' + model)
        model_params = model.split(':')
        # open sftp client
        sftp_client = ssh.open_sftp()
        # open remote file
        remote_file = sftp_client.open('/ollama/.ollama/models/manifests/registry.ollama.ai/library/' + model_params[0] + '/' + model_params[1])
        # read remote file
        json_data = json.loads(remote_file.read())

        sftp_client.close()

        # create list of digests
        digests_list = []
        for item in json_data['layers']:
            digests_list.append(item['digest'].replace(':', '-'))

        pase_data = []
        for item in digests_list:
            print('Start test for: ' + item)
            command = 'dd if=/ollama/.ollama/models/blobs/' + item + ' of=/dev/zero'
            ssh_stdin, ssh_stdout, ssh_stderr = ssh.exec_command(command)
            pase_data_tmp = ssh_stderr.readlines()
            pase_data_time = pase_data_tmp[2].split('copied, ')[1].split(' s, ')[0]
            pase_data_bytes = pase_data_tmp[2].split(' bytes')[0]
            pase_data_tmp = [pase_data_time, pase_data_bytes]
            pase_data.append(pase_data_tmp)

        pase_time = 0.0
        pase_bytes = 0
        for item in pase_data:
            pase_time += float(item[0])
            pase_bytes += int(item[1])

        
        new_entry ={ 'model': model, 'time': pase_time, 'bytes': pase_bytes, 'MiB/s': '{:.4f}'.format((pase_bytes / pase_time) / 1024 / 1024),
                    'kind': kind, 'raid': raid, 'fs': fs, 'bs': bs, 'sz': sz}
        df.loc[len(df)] = new_entry
        print(f'Test for {model} completed: {new_entry}')


In [39]:
df = pd.DataFrame(columns=['model', 'time', 'bytes', 'MiB/s', 'kind', 'raid', 'fs', 'bs', 'sz'])
for kind in config_toml['Disk_Configuration']['kind']:
    for raid in config_toml['Disk_Configuration']['raid']:
        for fs in config_toml['Disk_Configuration']['filesystem']:
            for bs in config_toml['Disk_Configuration']['block_size']:
                for sz in config_toml['Disk_Configuration']['stripe_size']:
                    ssh.exec_command('systemctl stop ollama.service')[2].channel.recv_exit_status()
                    ssh.exec_command('umount /ollama/')[2].channel.recv_exit_status()
                    ssh.exec_command('lvremove -f /dev/ollama-m/ollama-m')[2].channel.recv_exit_status()
                    ssh.exec_command('vgremove -f ollama-m')[2].channel.recv_exit_status()
                    ssh.exec_command('pvremove -f /dev/sdd1 /dev/sdc1')[2].channel.recv_exit_status()
                    ssh.exec_command('mdadm --zero-superblock /dev/sdd /dev/sdc')[2].channel.recv_exit_status()
                    if(kind == 'lvm'):
                        ssh.exec_command('parted -s /dev/sdd mklabel mdr && sudo parted -s /dev/sdd mkpart primary 0% 100%')[2].channel.recv_exit_status()
                        ssh.exec_command('parted -s /dev/sdc mklabel mdr && sudo parted -s /dev/sdc mkpart primary 0% 100%')[2].channel.recv_exit_status()
                        ssh.exec_command('pvcreate /dev/sdd1 /dev/sdc1')[2].channel.recv_exit_status()
                        ssh.exec_command('vgcreate ollama-m /dev/sdd1 /dev/sdc1')[2].channel.recv_exit_status()
                    if(kind == 'lvm' and raid == '0'):
                        ssh.exec_command('lvcreate --type raid' + raid + ' -i 2 -I ' + sz + ' -L 894.255G -n ollama-m ollama-m -y')[2].channel.recv_exit_status()
                        if(fs == 'xfs'):
                            ssh.exec_command(f'mkfs.xfs -b size={bs} /dev/ollama-m/ollama-m -f')[2].channel.recv_exit_status()
                        elif(fs == 'ext4'):
                            ssh.exec_command(f'mkfs.ext4 -b {bs} /dev/ollama-m/ollama-m -F')[2].channel.recv_exit_status()
                        ssh.exec_command('mount /dev/ollama-m/ollama-m /ollama/')[2].channel.recv_exit_status()
                        ssh.exec_command('ln -s /ollama/ /usr/share/')[2].channel.recv_exit_status()
                        ssh.exec_command('chown -R ollama:ollama /ollama/')[2].channel.recv_exit_status()
                        ssh.exec_command('systemctl start ollama.service')[2].channel.recv_exit_status()
                        get_pase_data(df, kind, raid, fs, bs, sz)
                    elif(kind == 'lvm' and raid == '1'):
                        ssh.exec_command('lvcreate --type raid' + raid + ' -i 2 -L 894.255G -n ollama-m ollama-m -y')[2].channel.recv_exit_status()
                        if(fs == 'xfs'):
                            ssh.exec_command(f'mkfs.xfs -b size={bs} /dev/ollama-m/ollama-m -f')[2].channel.recv_exit_status()
                        elif(fs == 'ext4'):
                            ssh.exec_command(f'mkfs.ext4 -b {bs} /dev/ollama-m/ollama-m -F')[2].channel.recv_exit_status()
                        ssh.exec_command('mount /dev/ollama-m/ollama-m /ollama/')[2].channel.recv_exit_status()
                        ssh.exec_command('ln -s /ollama/ /usr/share/')[2].channel.recv_exit_status()
                        ssh.exec_command('chown -R ollama:ollama /ollama/')[2].channel.recv_exit_status()
                        ssh.exec_command('systemctl start ollama.service')[2].channel.recv_exit_status()
                        get_pase_data(df, kind, raid, fs, bs, '-')
                    elif(kind == 'mdadm' and raid == '0'):
                        ssh.exec_command(f'mdadm --create --verbose /dev/md0 --level={raid} --stripe-size={sz} --raid-devices=2 /dev/sdd /dev/sdc')[2].channel.recv_exit_status()
                        if(fs == 'xfs'):
                            ssh.exec_command(f'mkfs.xfs -b size={bs} /dev/ollama-m/ollama-m -f')[2].channel.recv_exit_status()
                        elif(fs == 'ext4'):
                            ssh.exec_command(f'mkfs.ext4 -b {bs} /dev/ollama-m/ollama-m -F')[2].channel.recv_exit_status()
                        ssh.exec_command('mount /dev/ollama-m/ollama-m /ollama/')[2].channel.recv_exit_status()
                        ssh.exec_command('ln -s /ollama/ /usr/share/')[2].channel.recv_exit_status()
                        ssh.exec_command('chown -R ollama:ollama /ollama/')[2].channel.recv_exit_status()
                        ssh.exec_command('systemctl start ollama.service')[2].channel.recv_exit_status()
                        get_pase_data(df, kind, raid, fs, bs, sz)
                    elif(kind == 'mdadm' and raid == '1'):
                        ssh.exec_command(f'mdadm --create --verbose /dev/md0 --level={raid} --raid-devices=2 /dev/sdd /dev/sdc')[2].channel.recv_exit_status()
                        if(fs == 'xfs'):
                            ssh.exec_command(f'mkfs.xfs -b size={bs} /dev/ollama-m/ollama-m -f')[2].channel.recv_exit_status()
                        elif(fs == 'ext4'):
                            ssh.exec_command(f'mkfs.ext4 -b {bs} /dev/ollama-m/ollama-m -F')[2].channel.recv_exit_status()
                        ssh.exec_command('mount /dev/ollama-m/ollama-m /ollama/')[2].channel.recv_exit_status()
                        ssh.exec_command('ln -s /ollama/ /usr/share/')[2].channel.recv_exit_status()
                        ssh.exec_command('chown -R ollama:ollama /ollama/')[2].channel.recv_exit_status()
                        ssh.exec_command('systemctl start ollama.service')[2].channel.recv_exit_status()
                        get_pase_data(df, kind, raid, fs, bs, '-')

print(df)
df.to_csv('output.csv', index=False)

Test with: k-lvm r-0 f-xfs b-4096 s-4K
Start test for llama3.1:70b.
Pulled model: llama3.1:70b
Start test for: sha256-a677b4a4b70c45e702b1d600f7905e367733c53898b8be60e3f29272cf334574
Start test for: sha256-948af2743fc78a328dcb3b0f5a31b3d75f415840fdb699e8b1235978392ecf85
Start test for: sha256-0ba8f0e314b4264dfd19df045cde9d4c394a52474bf92ed6a3de22a4ca31a177
Start test for: sha256-56bb8bd477a519ffa694fc449c2413c6f0e1d3b1c88fa7e3c9d88d3ae49d4dcb
Test for llama3.1:70b completed: {'model': 'llama3.1:70b', 'time': 40.580397950999995, 'bytes': 39969750953, 'MiB/s': '939.3236', 'kind': 'lvm', 'raid': '0', 'fs': 'xfs', 'bs': '4096', 'sz': '4K'}
Start test for qwen2.5:72b-instruct-q8_0.


KeyboardInterrupt: 