# Performance measurement in ExSeqProcessing (ver.1.7.2)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import os
import subprocess
import re
from datetime import datetime, timedelta
from IPython.display import display

pd.set_option('float_format', '{:f}'.format)

## top log

In [None]:
proc_stat = pd.read_csv('summary-top-proc-status.csv',parse_dates=[0])
mem_stat = pd.read_csv('summary-top-mem-usage.csv',parse_dates=[0])

In [None]:
proc_stat['elapsed_time'] = pd.to_timedelta(proc_stat['elapsed_time'])
proc_stat['elapsed_time_m'] = proc_stat['elapsed_time'] / timedelta(minutes=1)
proc_stat['proc'] = proc_stat['proc'].str.rstrip()

mem_stat['elapsed_time'] = pd.to_timedelta(mem_stat['elapsed_time'])
mem_stat['elapsed_time_m'] = mem_stat['elapsed_time'] / timedelta(minutes=1)

In [None]:
mem_unit = mem_stat['mem_unit'].unique()[0]

### System memory status

In [None]:
fig = mem_stat.plot(x='elapsed_time_m',y=['mem_used','buf_cache','avail_mem'],figsize=(10,5))
fig.set_xlabel('Elapsed time (min)')
fig.set_ylabel('Memory usage (%s)' % mem_unit)

### MATLAB processes memory status

In [None]:
fig = proc_stat.pivot_table(index='elapsed_time_m',values=['res','virt'],aggfunc=sum).plot(figsize=(10,5))
fig.set_xlabel('Elapsed time (min)')
fig.set_ylabel('Memory usage (MiB)')

In [None]:
proc_stat.pivot_table(index='elapsed_time',values=['res','virt'],aggfunc=sum).describe()

In [None]:
fig = proc_stat.pivot_table(index='elapsed_time_m',columns=['proc'],values=['res'],aggfunc=sum).plot(figsize=(10,5))
fig.set_xlabel('Elapsed time (min)')
fig.set_ylabel('Memory usage (MiB)')

In [None]:
expected_memory_usage_line = False
if expected_memory_usage_line:
    proc_stat['expected memory usage'] = 8310.9
    proc_stat.loc[proc_stat['elapsed_time_m']>2.5,'expected memory usage'] = 1738.9

In [None]:
tbl = proc_stat[proc_stat['proc'] == 'worker'].pivot_table(index='elapsed_time_m',columns=['pid'],values=['res'],aggfunc=sum)
tbl[tbl.isna()] = 0
fig = tbl.plot(figsize=(10,5))

if expected_memory_usage_line:
    proc_stat.plot(x='elapsed_time_m',y='expected memory usage',style=':',ax=fig)

fig.set_xlabel('Elapsed time (min)')
fig.set_ylabel('Memory usage (MiB)')
fig.legend(loc='upper left',bbox_to_anchor=(1.02,0.5,0.5,0.5),ncol=2,borderaxespad=0.1)

### MATLAB processes cpu and memory usage (%)

In [None]:
proc_usage = proc_stat.pivot_table(index='elapsed_time_m',values=['cpu'],aggfunc=sum)
fig = proc_usage.plot(alpha=0.5,figsize=(10,5))
proc_usage.rolling(window=12,center=True).mean().plot(ax=fig)
fig.set_title('CPU usage of all MATLAB processes')
fig.set_xlabel('Elapsed time (min)')
fig.set_ylabel('CPU usage (%)')
fig.legend(['cpu','SMA cpu'])

proc_usage = proc_stat.pivot_table(index='elapsed_time_m',values=['mem'],aggfunc=sum)
fig = proc_usage.plot(alpha=0.5,figsize=(10,5))
proc_usage.rolling(window=12,center=True).mean().plot(ax=fig)
fig.set_title('Memory usage of all MATLAB processes')
fig.set_xlabel('Elapsed time (min)')
fig.set_ylabel('Memory usage (%)')
fig.legend(['mem','SMA mem'])

In [None]:
proc_usage_worker = proc_stat[proc_stat['proc'] == 'worker'].pivot_table(index='elapsed_time_m',values=['cpu'],aggfunc=sum)
fig = proc_usage_worker.plot(y=['cpu'],alpha=0.5,figsize=(10,5))
proc_usage_worker.rolling(window=12,center=True).mean().plot(ax=fig)
fig.set_title('CPU usage of MATLAB worker processes')
fig.set_xlabel('Elapsed time (min)')
fig.set_ylabel('CPU usage (%)')
fig.legend(['cpu','SMA cpu'])

proc_usage_main = proc_stat[proc_stat['proc'] == 'main'].pivot_table(index='elapsed_time_m',values=['cpu'],aggfunc=sum)
fig = proc_usage_main.plot(y=['cpu'],alpha=0.5,figsize=(10,5))
proc_usage_main.rolling(window=12,center=True).mean().plot(ax=fig)
fig.set_title('CPU usage of MATLAB main process')
fig.set_xlabel('Elapsed time (min)')
fig.set_ylabel('CPU usage (%)')
fig.legend(['cpu','SMA cpu'])

proc_usage_worker = proc_stat[proc_stat['proc'] == 'worker'].pivot_table(index='elapsed_time_m',values=['mem'],aggfunc=sum)
fig = proc_usage_worker.plot(y=['mem'],alpha=0.5,figsize=(10,5))
proc_usage_worker.rolling(window=12,center=True).mean().plot(ax=fig)
fig.set_title('Memory usage of MATLAB worker processes')
fig.set_xlabel('Elapsed time (min)')
fig.set_ylabel('Memory usage (%)')
fig.legend(['mem','SMA mem'])

proc_usage_main = proc_stat[proc_stat['proc'] == 'main'].pivot_table(index='elapsed_time_m',values=['mem'],aggfunc=sum)
fig = proc_usage_main.plot(y=['mem'],alpha=0.5,figsize=(10,5))
proc_usage_main.rolling(window=12,center=True).mean().plot(ax=fig)
fig.set_title('Memory usage of MATLAB main process')
fig.set_xlabel('Elapsed time (min)')
fig.set_ylabel('Memory usage (%)')
fig.legend(['mem','SMA mem'])

In [None]:
proc_stat.pivot_table(index='elapsed_time',values=['cpu','mem'],aggfunc=sum).describe()

## nfsiostat log

In [None]:
try:
    nfs_stat = pd.read_csv('summary-nfsiostat.csv',parse_dates=[0])
    nfs_stat['time'] = pd.to_timedelta(nfs_stat['time'])
    nfs_stat['time_m'] = nfs_stat['time'] / timedelta(minutes=1)
except:
    print('WARNING: command "nfsiostat" cannot be used.')

### read/write throughput (kb/s)

In [None]:
try:
    fig = nfs_stat[nfs_stat['fs-path']=='/mp/nas1'].pivot_table(index='time_m',values=['read-kb/s','write-kb/s']).plot(alpha=0.5,figsize=(10,5))
    fig.set_xlabel('Elapsed time (min)')
    fig.set_ylabel('read/write (kb/s)')
except:
    print('WARNING: command "nfsiostat" cannot be used.')

In [None]:
try:
    dsc = nfs_stat[nfs_stat['fs-path']=='/mp/nas1'].pivot_table(index='time',values=['read-kb/s','write-kb/s'],aggfunc=sum).describe()
except:
    dsc = None
    print('WARNING: command "nfsiostat" cannot be used.')
dsc

### pages in readpages/writepages

In [None]:
try:
    fig = nfs_stat[nfs_stat['fs-path']=='/mp/nas1'].pivot_table(index='time_m',values=['pages-in-readpages','pages-in-writepages']).plot(alpha=0.5,figsize=(10,5))
    fig.set_xlabel('Elapsed time (min)')
    fig.set_ylabel('pages')
except:
    print('WARNING: command "nfsiostat" cannot be used.')

## vmstat log

In [None]:
vm_stat = pd.read_csv('summary-vmstat.csv',parse_dates=[0])

In [None]:
vm_stat['time'] = pd.to_timedelta(vm_stat['time'])
vm_stat['time_m'] = vm_stat['time'] / timedelta(minutes=1)

### block i/o

In [None]:
vm_bio = vm_stat.pivot_table(index='time_m',values=['io-bi','io-bo'])
fig = vm_bio.plot(alpha=0.5,figsize=(10,5))
vm_bio.rolling(window=12,center=True).mean().plot(ax=fig)
fig.set_xlabel('Elapsed time (min)')
fig.set_ylabel('blocks/s')
fig.legend(['io-bi','io-bo','SMA io-bi','SMA io-bo'])

In [None]:
vm_stat.pivot_table(index='time',values=['io-bi','io-bo']).describe()

### cpu status

In [None]:
fig = vm_stat.pivot_table(index='time_m',values=['cpu-us','cpu-sy','cpu-id','cpu-wa','cpu-st']).plot(alpha=0.5,figsize=(10,5))
fig.set_xlabel('Elapsed time (min)')
fig.set_ylabel('CPU usage (%)')

In [None]:
vm_stat.pivot_table(index='time',values=['cpu-us','cpu-sy','cpu-id','cpu-wa','cpu-st']).describe()

## iostat log

In [None]:
try:
    io_stat = pd.read_csv('summary-iostat.csv',parse_dates=[0])
    io_stat['time'] = pd.to_timedelta(io_stat['time'])
    io_stat['time_m'] = io_stat['time'] / timedelta(minutes=1)
except:
    print('WARNING: command "iostat" cannot be used.')

In [None]:
try:
    comp_disk = re.compile('^([a-z][^ ]*) .* disk')
    comp_dir = re.compile('(/mp/.*)$')

    dev_dir_map = {}
    try:
        for line in open('lsblk.txt'):
            line = line.rstrip()
            if line.startswith('NAME'):
                continue
        #    print(line)

            m = comp_disk.search(line)
            if m != None:
                dev_name = m.group(1)
        #        print(dev_name)
                continue

            m = comp_dir.search(line)
            if m != None:
                dir_name = m.group(1)
                dev_dir_map[dev_name] = dir_name
    except:
        print('WARNING: command "lsblk" cannot be used.')
except:
    print('WARNING: command "iostat" cannot be used.')

In [None]:
try:
    dev_list = io_stat[~io_stat['device'].str.contains('dm-')]['device'].unique()
    mp_list = [dev_dir_map[str] if str in dev_dir_map else str for str in dev_list]
    dev_mp_map = dict(zip(dev_list,mp_list))

    io_stat['device'].replace(dev_mp_map,inplace=True)
    print(dev_mp_map)
except:
    print('WARNING: command "iostat" cannot be used.')

### device read/write (kB/s)

In [None]:
try:
    for d in np.unique(mp_list):
        dev_rw = io_stat[io_stat['device'] == d].pivot_table(index=['time_m'],values=['rkB/s','wkB/s'],aggfunc=sum)
        dev_rw_sum = dev_rw[:].sum()
        if dev_rw_sum['rkB/s'] == 0 and dev_rw_sum['wkB/s'] == 0:
            continue

        fig = dev_rw.plot(alpha=0.5,figsize=(10,5))
        dev_rw.rolling(window=12,center=True).mean().plot(ax=fig)
        fig.set_title('device='+d)
        fig.set_xlabel('Elapsed time (min)')
        fig.set_ylabel('kB/s')
        fig.legend(['rkB/s','wkB/s','SMA rkB/s','SMA wkB/s'])
except:
    print('WARNING: command "iostat" cannot be used.')

In [None]:
try:
    dev_rw = io_stat[io_stat['device'].isin(mp_list)].pivot_table(index=['time'],columns=['device'],values=['rkB/s','wkB/s'],aggfunc=sum)
    dsc = dev_rw.describe()
except:
    dsc = None
    print('WARNING: command "iostat" cannot be used.')
    
dsc

### device util (%)

In [None]:
try:
    for d in np.unique(mp_list):
        dev_util = io_stat[io_stat['device'] == d].pivot_table(index=['time_m'],values=['%util'],aggfunc=sum)
        dev_util_sum = dev_util[:].sum()
        if dev_util_sum['%util'] == 0:
            continue

        fig = dev_util.plot(alpha=0.5,figsize=(10,5))
        dev_util.rolling(window=12,center=True).mean().plot(ax=fig)
        fig.set_title('device='+d)
        fig.set_xlabel('Elapsed time (min)')
        fig.set_ylabel('util (%)')
        fig.legend(['%util','SMA %util'])
except:
    print('WARNING: command "iostat" cannot be used.')

In [None]:
try:
    dev_util = io_stat[io_stat['device'].isin(mp_list)].pivot_table(index=['time'],columns=['device'],values=['%util'],aggfunc=sum)
    dsc = dev_util.describe()
except:
    dsc = None
    print('WARNING: command "iostat" cannot be used.')
    
dsc

## gpu log

In [None]:
try:
    gpu_stat = pd.read_csv('summary-gpu.csv',parse_dates=[0])
    gpu_stat['time'] = pd.to_timedelta(gpu_stat['time'])
    gpu_stat['time_m'] = gpu_stat['time'] / timedelta(minutes=1)
except:
    print('WARNING: command "nvidia-smi" cannot be used.')

In [None]:
try:
    gpu_util = gpu_stat.pivot_table(index='time_m',values=['utilization.gpu [%]','utilization.memory [%]'],aggfunc=sum)
    fig = gpu_util.plot(alpha=0.5,figsize=(10,5))
    gpu_util.rolling(window=10,center=True).mean().plot(ax=fig)
    fig.set_xlabel('Elapsed time (min)')
    fig.set_ylabel('util (%)')
    fig.legend(['gpu','gpu memory','SMA gpu','SMA gpu memory'])
    
    gpu_util_each = gpu_stat.pivot_table(index=['time_m'],columns=['index'],values=['utilization.gpu [%]','utilization.memory [%]']).fillna(0.0)
    fig = gpu_util_each.plot(alpha=0.5,figsize=(10,5))
    fig.set_xlabel('Elapsed time (min)')
    fig.set_ylabel('util (%)')
except:
    print('WARNING: command "nvidia-smi" cannot be used.')

In [None]:
try:
    gpu_util = gpu_stat.pivot_table(index='time',values=['utilization.gpu [%]','utilization.memory [%]'],aggfunc=sum)
    dsc = display(gpu_util.describe())
except:
    dsc = None
    print('WARNING: command "nvidia-smi" cannot be used.')
    
dsc

## Summary

In [None]:
ds1 = proc_stat.pivot_table(index='elapsed_time',values=['cpu','mem'],aggfunc=sum).describe()

try:
    ds2 = gpu_stat.pivot_table(index='time',values=['utilization.gpu [%]'],aggfunc=sum).describe()
    ds = pd.concat([ds1, ds2],axis=1)
    print(ds.loc[['mean','max'],['mem','cpu','utilization.gpu [%]']])
except:
    ds = ds1
    print(ds.loc[['mean','max'],['mem','cpu']])
    
print(' ')

try:
    dev_ds = io_stat[io_stat['device'].isin(mp_list)].pivot_table(index=['time'],columns=['device'],values=['rkB/s','wkB/s','%util'],aggfunc=sum).describe()
    print(dev_ds.loc[['mean','max'],['rkB/s','wkB/s']])
    print(dev_ds.loc[['mean','max'],['%util']])
except:
    pass