# Performance measurement in ExSeqProcessing (ver.1.4)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

import os
from IPython.display import display

pd.set_option('float_format', '{:f}'.format)

## top log

In [None]:
proc_stat = pd.read_csv('summary-top-proc-status.csv',parse_dates=[0])
mem_stat = pd.read_csv('summary-top-mem-usage.csv',parse_dates=[0])

In [None]:
proc_stat['elapsed_time'] = pd.to_timedelta(proc_stat['elapsed_time'])
proc_stat['proc'] = proc_stat['proc'].str.rstrip()

mem_stat['elapsed_time'] = pd.to_timedelta(mem_stat['elapsed_time'])

### System memory status

In [None]:
mem_stat.plot(x='elapsed_time',y=['mem_used','buf_cache','avail_mem'],figsize=(10,5))

### MATLAB processes memory status

In [None]:
proc_stat.pivot_table(index='elapsed_time',values=['res','virt'],aggfunc=sum).plot(figsize=(10,5))

In [None]:
proc_stat.pivot_table(index='elapsed_time',values=['res','virt'],aggfunc=sum).describe()

In [None]:
proc_stat.pivot_table(index='elapsed_time',columns=['proc'],values=['res'],aggfunc=sum).plot(figsize=(10,5))

### MATLAB processes cpu and memory usage (%)

In [None]:
proc_usage = proc_stat.pivot_table(index='elapsed_time',values=['cpu'],aggfunc=sum)
fig = proc_usage.plot(alpha=0.5,figsize=(10,5))
proc_usage.rolling(window=12,center=True).mean().plot(ax=fig)
fig.set_title('CPU usage of all MATLAB processes')
fig.set_ylabel('CPU usage (%)')
fig.legend(['cpu','SMA cpu'])

proc_usage = proc_stat.pivot_table(index='elapsed_time',values=['mem'],aggfunc=sum)
fig = proc_usage.plot(alpha=0.5,figsize=(10,5))
proc_usage.rolling(window=12,center=True).mean().plot(ax=fig)
fig.set_title('Memory usage of all MATLAB processes')
fig.set_ylabel('Memory usage (%)')
fig.legend(['mem','SMA mem'])

In [None]:
proc_usage_worker = proc_stat[proc_stat['proc'] == 'worker'].pivot_table(index='elapsed_time',values=['cpu'],aggfunc=sum)
fig = proc_usage_worker.plot(y=['cpu'],alpha=0.5,figsize=(10,5))
proc_usage_worker.rolling(window=12,center=True).mean().plot(ax=fig)
fig.set_title('CPU usage of MATLAB worker processes')
fig.set_ylabel('CPU usage (%)')
fig.legend(['cpu','SMA cpu'])

proc_usage_main = proc_stat[proc_stat['proc'] == 'main'].pivot_table(index='elapsed_time',values=['cpu'],aggfunc=sum)
fig = proc_usage_main.plot(y=['cpu'],alpha=0.5,figsize=(10,5))
proc_usage_main.rolling(window=12,center=True).mean().plot(ax=fig)
fig.set_title('CPU usage of MATLAB main process')
fig.set_ylabel('CPU usage (%)')
fig.legend(['cpu','SMA cpu'])

proc_usage_worker = proc_stat[proc_stat['proc'] == 'worker'].pivot_table(index='elapsed_time',values=['mem'],aggfunc=sum)
fig = proc_usage_worker.plot(y=['mem'],alpha=0.5,figsize=(10,5))
proc_usage_worker.rolling(window=12,center=True).mean().plot(ax=fig)
fig.set_title('Memory usage of MATLAB worker processes')
fig.set_ylabel('Memory usage (%)')
fig.legend(['mem','SMA mem'])

proc_usage_main = proc_stat[proc_stat['proc'] == 'main'].pivot_table(index='elapsed_time',values=['mem'],aggfunc=sum)
fig = proc_usage_main.plot(y=['mem'],alpha=0.5,figsize=(10,5))
proc_usage_main.rolling(window=12,center=True).mean().plot(ax=fig)
fig.set_title('Memory usage of MATLAB main process')
fig.set_ylabel('Memory usage (%)')
fig.legend(['mem','SMA mem'])

In [None]:
proc_stat.pivot_table(index='elapsed_time',values=['cpu','mem'],aggfunc=sum).describe()

## nfsiostat log

In [None]:
nfs_stat = pd.read_csv('summary-nfsiostat.csv',parse_dates=[0])

In [None]:
nfs_stat['time'] = pd.to_timedelta(nfs_stat['time'])

### read/write throughput (kb/s)

In [None]:
nfs_stat[nfs_stat['fs-path']=='/mp/nas1'].pivot_table(index='time',values=['read-kb/s','write-kb/s']).plot(alpha=0.5,figsize=(10,5))

In [None]:
nfs_stat[nfs_stat['fs-path']=='/mp/nas1'].pivot_table(index='time',values=['read-kb/s','write-kb/s'],aggfunc=sum).describe()

### pages in readpages/writepages

In [None]:
nfs_stat[nfs_stat['fs-path']=='/mp/nas1'].pivot_table(index='time',values=['pages-in-readpages','pages-in-writepages']).plot(alpha=0.5,figsize=(10,5))

## vmstat log

In [None]:
vm_stat = pd.read_csv('summary-vmstat.csv',parse_dates=[0])

In [None]:
vm_stat['time'] = pd.to_timedelta(vm_stat['time'])

### block i/o

In [None]:
vm_bio = vm_stat.pivot_table(index='time',values=['io-bi','io-bo'])
fig = vm_bio.plot(alpha=0.5,figsize=(10,5))
vm_bio.rolling(window=12,center=True).mean().plot(ax=fig)
fig.set_ylabel('blocks/s')
fig.legend(['io-bi','io-bo','SMA io-bi','SMA io-bo'])

In [None]:
vm_stat.pivot_table(index='time',values=['io-bi','io-bo']).describe()

### cpu status

In [None]:
fig = vm_stat.pivot_table(index='time',values=['cpu-us','cpu-sy','cpu-id','cpu-wa','cpu-st']).plot(alpha=0.5,figsize=(10,5))
fig.set_ylabel('CPU usage (%)')

In [None]:
vm_stat.pivot_table(index='time',values=['cpu-us','cpu-sy','cpu-id','cpu-wa','cpu-st']).describe()

## iostat log

In [None]:
io_stat = pd.read_csv('summary-iostat.csv',parse_dates=[0])

In [None]:
io_stat['time'] = pd.to_timedelta(io_stat['time'])

In [None]:
f = open('hostname.txt', 'r')
hostname = f.read().rstrip()
f.close()

dev_name = 'nvme'
if os.path.isfile('device.txt'):
    f = open('device.txt', 'r')
    dev_name = f.read().rstrip()
    f.close()
else:
    if hostname == 'bonito':
        dev_name = 'nvme0'
    elif hostname == 'mackerel':
        dev_name = 'nvme1'
    elif hostname == 'tuna':
        dev_name = 'nvme1'

### device read/write (kB/s)

In [None]:
dev_rw = io_stat[io_stat['device'].str.contains(dev_name)].pivot_table(index='time',values=['rkB/s','wkB/s'],aggfunc=sum)
fig = dev_rw.plot(alpha=0.5,figsize=(10,5))
dev_rw.rolling(window=12,center=True).mean().plot(ax=fig)
fig.set_ylabel('kB/s')
fig.legend(['rkB/s','wkB/s','SMA rkB/s','SMA wkB/s'])

In [None]:
dev_rw.describe()

### device util (%)

In [None]:
dev_util = io_stat[io_stat['device'].str.contains(dev_name)].pivot_table(index='time',values=['%util'],aggfunc=sum)
fig = dev_util.plot(alpha=0.5,figsize=(10,5))
dev_util.rolling(window=12,center=True).mean().plot(ax=fig)
fig.set_ylabel('device [' + dev_name + '] util (%)')
fig.legend(['%util','SMA %util'])

In [None]:
dev_util.describe()

## gpu log

In [None]:
gpu_stat = None
if os.path.exists('summary-gpu.csv'):
    gpu_stat = pd.read_csv('summary-gpu.csv',parse_dates=[0])

In [None]:
gpu_stat['time'] = pd.to_timedelta(gpu_stat['time'])

In [None]:
if gpu_stat is not None:
    gpu_util = gpu_stat.pivot_table(index='time',values=['utilization.gpu [%]','utilization.memory [%]'],aggfunc=sum)
    fig = gpu_util.plot(alpha=0.5,figsize=(10,5))
    gpu_util.rolling(window=10,center=True).mean().plot(ax=fig)
    fig.set_ylabel('util (%)')
    fig.legend(['gpu','gpu memory','SMA gpu','SMA gpu memory'])
    
    gpu_util_each = gpu_stat.pivot_table(index=['time'],columns=['index'],values=['utilization.gpu [%]','utilization.memory [%]']).fillna(0.0)
    fig = gpu_util_each.plot(alpha=0.5,figsize=(10,5))
    fig.set_ylabel('util (%)')

In [None]:
if gpu_stat is not None:
    gpu_util = gpu_stat.pivot_table(index='time',values=['utilization.gpu [%]','utilization.memory [%]'],aggfunc=sum)
    display(gpu_util.describe())