In [1]:
%run init.ipynb

In [5]:
import sys
sys.path.append("../")

In [6]:
import dask
import dask.array as da
import dask.bag as db
import dask.dataframe as dd
import json
import math
import numpy as np
import os
import pandas as pd
from dask import compute, delayed
from dask.dataframe import DataFrame
from dask.distributed import Client, LocalCluster, wait, worker_client
from wisio.core.analysis import Analysis
from wisio.core.metrics import filter_asymptote_delayed, filter_delayed, flatten_delayed, merge_delayed, sort_delayed
from wisio.utils.file_utils import ensure_dir
from wisio.utils.json_encoders import NpEncoder
from wisio.utils.logger import create_logger, format_log

In [2]:
client

0,1
Connection method: Cluster object,Cluster type: dask_jobqueue.LSFCluster
Dashboard: http://192.168.66.200:8788/status,

0,1
Dashboard: http://192.168.66.200:8788/status,Workers: 0
Total threads: 0,Total memory: 0 B

0,1
Comm: tcp://192.168.66.200:39671,Workers: 0
Dashboard: http://192.168.66.200:8788/status,Total threads: 0
Started: Just now,Total memory: 0 B


In [7]:
CACHE_DIR = "cached"
METRICS_DIR = "metrics"
INDEX_DIR = "indexed"


def compute_metrics_file_id(ddf: DataFrame, fg_index: str, log_dir: str):
    unique_filenames_d = unique_filenames_delayed(ddf=ddf, log_dir=log_dir,
                                                  dask_key_name=f"unique-filenames-{fg_index}")
    save_filenames_d = save_filenames_delayed(filenames=unique_filenames_d, log_dir=log_dir,
                                              dask_key_name=f"save-filenames-{fg_index}")
    metrics_d = metrics_filenames_delayed(ddf=ddf, filenames=unique_filenames_d,
                                          dask_key_name=f"metrics-{fg_index}")
    return [unique_filenames_d, save_filenames_d, [metrics_d]]


def load_global_min_max(log_dir: str):
    with open(f"{log_dir}/global.json") as file:
        global_min_max = json.load(file)
    return global_min_max


@delayed
def metrics_filenames_delayed(ddf: DataFrame, filenames: list, fg_index='file_id'):
    print('calculating metrics for filenames', len(filenames))
    tasks_d = []
    for filename in filenames:
        target_ddf_d = Analysis.target_ddf_delayed(ddf=ddf, start=filename, stop=filename,
                                                   dask_key_name=f"target-ddf-{fg_index}-{filename}")
        filter_d = filter_delayed(ddf=target_ddf_d, fg_index=fg_index, start=filename, stop=filename,
                                  dask_key_name=f"filter-{fg_index}-{filename}")
        flatten_d = flatten_delayed(filter_d, dask_key_name=f"flatten-{fg_index}-{filename}")
        tasks_d.append(flatten_d)
    print('num of tasks created', len(tasks_d))
    # with worker_client() as client:
    #     print('submitting on', client)
    #     metrics_f = client.compute(tasks_d)
    #     print('gathering tasks', len(metrics_f))
    #     metrics = client.gather(metrics_f)
    metrics = dask.compute(tasks_d)
    print('computed metrics', type(metrics))
    print('computed metrics', len(metrics))
    # print('computed metrics', metrics)
    return metrics


def read_and_index_logs(prefix: str, fg_index: str, log_dir: str, use_cache=True):
    dask_suffix = f"{prefix}-{fg_index}"
    fg_index_dir = f"{log_dir}/{INDEX_DIR}/{fg_index}"
    if use_cache and os.path.exists(f"{fg_index_dir}/_metadata"):
        ddf_d = read_parquet_delayed(log_dir=fg_index_dir, index=[fg_index],
                                     dask_key_name=f"read-parquet-{dask_suffix}")
        persisted_ddf_d = persist_ddf_delayed(ddf=ddf_d, dask_key_name=f"persist-ddf-{dask_suffix}")
        return [ddf_d, persisted_ddf_d]
    ddf_d = read_parquet_delayed(log_dir=log_dir, dask_key_name=f"read-parquet-{dask_suffix}")
    indexed_ddf_d = set_ddf_index_delayed(ddf=ddf_d, fg_index=fg_index,
                                          dask_key_name=f"set-index-{dask_suffix}")
    persisted_ddf_d = persist_ddf_delayed(ddf=indexed_ddf_d, dask_key_name=f"persist-ddf-{dask_suffix}")
    partitioned_ddf_d = repartition_delayed(ddf=persisted_ddf_d,
                                            dask_key_name=f"repartition-ddf-{dask_suffix}")
    save_ddf_d = save_ddf_delayed(ddf=partitioned_ddf_d, log_dir=log_dir, fg_index=fg_index,
                                  dask_key_name=f"save-ddf-{dask_suffix}")
    return [ddf_d, indexed_ddf_d, persisted_ddf_d, save_ddf_d, partitioned_ddf_d]


@delayed
def read_parquet_delayed(log_dir: str, index: list = None):
    if index:
        print("Index specified", index)
        return dd.read_parquet(f"{log_dir}/*.parquet", calculate_divisions=True, index=index)
    print("Index not specified")
    return dd.read_parquet(f"{log_dir}/*.parquet", index=False)


@delayed
def repartition_delayed(ddf: DataFrame, partition_size='128MB'):
    return ddf.repartition(partition_size=partition_size)


@delayed
def persist_ddf_delayed(ddf: DataFrame):
    ddf = ddf.persist()
    wait(ddf)
    return ddf


@delayed
def save_ddf_delayed(ddf: DataFrame, log_dir: str, fg_index: str):
    ddf.to_parquet(f"{log_dir}/{INDEX_DIR}/{fg_index}")


@delayed
def save_filenames_delayed(filenames: list, log_dir: str):
    filenames = list(filenames)
    filenames.sort()
    with open(f"{log_dir}/filenames.json", "w") as file:
        json.dump(filenames, file, cls=NpEncoder)


@delayed
def set_ddf_index_delayed(ddf: DataFrame, fg_index: str):
    return ddf.set_index([fg_index])


@delayed
def unique_filenames_delayed(ddf: DataFrame, log_dir: str):
    if os.path.exists(f"{log_dir}/filenames.json"):
        with open(f"{log_dir}/filenames.json", "r") as file:
            unique_filenames = json.load(file)
    else:
        unique_filenames = ddf.index.unique().compute()
    return unique_filenames


In [4]:
# log_dir = "/p/gpfs1/iopp/recorder_app_logs/cm1/nodes-32/workflow-4/_parquet"
log_dir = "/p/gpfs1/iopp/parquet_app_logs/cm1/nodes-32/workflow-4"

In [14]:
ddf = dd.read_parquet(f"{log_dir}/*.parquet")

ddf

Unnamed: 0_level_0,index,rank,thread_id,cat,tstart,tend,func_id,level,arg_count,args_1,args_2,args_3,args_4,args_5,args_6,args_7,args_8,args_9,args_10
npartitions=160,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
,int32,int32,int32,int32,float32,float32,object,int32,int32,object,object,object,object,object,object,object,object,object,object
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [5]:
ddf.head()

Unnamed: 0,index,rank,thread_id,cat,tstart,tend,func_id,level,arg_count,args_1,args_2,args_3,args_4,args_5,args_6,args_7,args_8,args_9,args_10
0,1,0,322192,0,2.82326,2.823267,__xstat,0,3,1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,%p,,,,,,,
1,2,0,322192,0,2.823293,2.823305,open,0,2,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,2,,,,,,,,
2,3,0,322192,0,2.823371,2.823373,__fxstat,0,3,1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,%p,,,,,,,
3,4,0,322192,0,2.82339,2.823399,read,0,3,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,%p,16777216,,,,,,,
4,5,0,322192,0,2.82349,2.823494,close,0,1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,,,,,,,,,


In [8]:
ddf[ddf['func_id'] == 'open'].compute().agg({'args_2':'unique','args_3':'unique'})

args_2    [2, 66]
args_3    [, 438]
dtype: object

In [6]:
ddf.index.count().compute()

27463

In [7]:
ddf_000001 = ddf[ddf['args_1'].str.contains("000001") | ddf['args_2'].str.contains("000001")].compute()

ddf_000001

Unnamed: 0,index,rank,thread_id,cat,tstart,tend,func_id,level,arg_count,args_1,args_2,args_3,args_4,args_5,args_6,args_7,args_8,args_9,args_10
59,60,0,322192,0,5.638446,5.638453,__xstat,0,3,1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,%p,,,,,,,
60,61,0,322192,0,5.638469,5.638579,open,0,2,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,2,,,,,,,,
61,62,0,322192,0,5.638655,5.638657,__fxstat,0,3,1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,%p,,,,,,,
62,63,0,322192,0,5.773078,5.776894,write,0,3,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,%p,16777216,,,,,,,
63,64,0,322192,0,5.844885,5.846870,write,0,3,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,%p,16777216,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86,87,1240,322192,0,344.429321,344.429321,__fxstat,0,3,1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,%p,,,,,,,
87,88,1240,322192,0,344.489044,344.489075,close,0,1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,,,,,,,,,
88,89,1240,322192,0,344.489075,344.489075,close,0,1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,,,,,,,,,
89,90,1240,322192,0,344.489075,344.489075,close,0,1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,,,,,,,,,


In [8]:
args1 = list(ddf_000001['args_1'].unique())
args2 = list(ddf_000001['args_2'].unique())
args3 = list(ddf_000001['args_3'].unique())

args1

['1',
 '/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_000001_s.dat',
 '/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_000001_u.dat',
 '/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_000001_v.dat',
 '/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_000001_w.dat',
 'cm1out_diag_000001_s.dat',
 'cm1out_diag_000001_w.dat',
 '/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_diag_000001_s.dat',
 '/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_diag_000001_w.dat',
 'cm1rst_000001_x.dat',
 '/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1rst_000001_s.dat',
 '/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1rst_000001_u.dat',
 '/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1rst_000001_v.dat',
 '/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1r

In [17]:
filename = 'cm1out_000001_s.dat'
ddf[(ddf['args_1'].str.contains(filename)) | (ddf['args_2'].str.contains(filename))].compute()

Unnamed: 0,index,rank,thread_id,cat,tstart,tend,func_id,level,arg_count,args_1,args_2,args_3,args_4,args_5,args_6,args_7,args_8,args_9,args_10
59,60,0,322192,0,5.638446,5.638453,__xstat,0,3,1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,%p,,,,,,,
60,61,0,322192,0,5.638469,5.638579,open,0,2,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,2,,,,,,,,
61,62,0,322192,0,5.638655,5.638657,__fxstat,0,3,1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,%p,,,,,,,
62,63,0,322192,0,5.773078,5.776894,write,0,3,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,%p,16777216,,,,,,,
63,64,0,322192,0,5.844885,5.846870,write,0,3,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,%p,16777216,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6,7,1240,322192,0,3.437691,3.437695,__xstat,0,3,1,cm1out_000001_s.dat,%p,,,,,,,
7,8,1240,322192,0,3.437723,5.588772,open,0,3,cm1out_000001_s.dat,66,438,,,,,,,
8,9,1240,322192,0,5.628821,5.628825,__fxstat,0,3,1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,%p,,,,,,,
9,10,1240,322192,0,5.628830,5.628833,__fxstat,0,3,1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,%p,,,,,,,


In [5]:
log_dir2 = "/p/gpfs1/iopp/recorder_app_logs/cm1/nodes-32/workflow-4/_parquet"

ddf2 = dd.read_parquet(f"{log_dir2}/*.parquet")

In [6]:
ddf2.head(10)

Unnamed: 0,index,proc,rank,thread_id,cat,io_cat,tstart,tend,func_id,level,hostname,app,filename,size,acc_pat,bandwidth,duration,tmid,file_id,proc_id
0,1,1,0,322192,0,3,2.82326,2.823267,__xstat,0,localhost,app1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,0,0,0.0,6.9e-06,28232634,4531734244992817467,5117778030848174740
1,2,1,0,322192,0,3,2.823293,2.823305,open,0,localhost,app1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,0,0,0.0,1.19e-05,28232988,4531734244992817467,5117778030848174740
2,3,1,0,322192,0,3,2.823371,2.823373,__fxstat,0,localhost,app1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,0,0,0.0,1.9e-06,28233717,4531734244992817467,5117778030848174740
3,4,1,0,322192,0,1,2.82339,2.823399,read,0,localhost,app1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,16777216,0,1758241.75,9.1e-06,28233943,4531734244992817467,5117778030848174740
4,5,1,0,322192,0,3,2.82349,2.823494,close,0,localhost,app1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,0,0,0.0,4.1e-06,28234916,4531734244992817467,5117778030848174740
5,6,1,0,322192,0,3,2.823522,2.823524,__xstat,0,localhost,app1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,0,0,0.0,2.1e-06,28235225,4531734247452668476,5117778030848174740
6,7,1,0,322192,0,3,2.823538,2.844243,open,0,localhost,app1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,0,0,0.0,0.0207049,28338901,4531734247452668476,5117778030848174740
7,8,1,0,322192,0,3,2.844259,2.84426,__fxstat,0,localhost,app1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,0,0,0.0,1.9e-06,28442595,4531734247452668476,5117778030848174740
8,9,1,0,322192,0,3,2.844264,2.844265,__fxstat,0,localhost,app1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,0,0,0.0,9e-07,28442640,4531734247452668476,5117778030848174740
9,10,1,0,322192,0,3,2.84429,2.844293,lseek,0,localhost,app1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,0,0,0.0,2.9e-06,28442919,4531734247452668476,5117778030848174740


In [7]:
ddf2[ddf2['acc_pat'] == 1].count().compute()

index        366
proc         366
rank         366
thread_id    366
cat          366
io_cat       366
tstart       366
tend         366
func_id      366
level        366
hostname     366
app          366
filename     366
size         366
acc_pat      366
bandwidth    366
duration     366
tmid         366
file_id      366
proc_id      366
dtype: int64

In [None]:
ddf2[(ddf2['filename'].str.contains(filename))].compute()

In [17]:
ddf2[ddf2['filename'] == ""].compute()

Unnamed: 0,index,proc,rank,thread_id,cat,io_cat,tstart,tend,func_id,level,hostname,app,filename,size,bandwidth,duration,tmid,file_id,proc_id


In [35]:
ddf2['proc_id'].nunique().compute()

1280

In [29]:
ddf2['file_id'].unique().compute()

0      4531734244992817467
1      4531734247452668476
2      4531734246070024789
3      4531734245959907731
4      4531734245058167199
              ...         
770    4531734246187791667
771    4531734246508946226
772    4531734246070374525
773    4531734245825639992
774     901974072163427748
Name: file_id, Length: 775, dtype: int64

In [60]:
ddf2['func_id'].unique().compute()

0      __xstat
1         open
2     __fxstat
3         read
4        close
5        lseek
6    ftruncate
7        write
Name: func_id, dtype: object

In [71]:
ddf2[ddf2['func_id'] == 'lseek']['file_id'].unique().compute()

0     4531734247452668476
1     4531734246070024789
2     4531734245058167199
3     4531734246809951295
4     4531734247914523647
5     4531734246793868961
6     4531734248720417485
7     4531734245717475248
8     4531734247130387716
9     4531734247883313948
10    4531734246784543057
11    4531734245586618195
12    4531734246721693969
13    4531734244455765367
Name: file_id, dtype: int64

In [30]:
ddf2['filename'].unique().compute()

0      /p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...
1      /p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...
2      /p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...
3      /p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...
4      /p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...
                             ...                        
770    /p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...
771    /p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...
772    /p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...
773    /p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...
774                                            /dev/null
Name: filename, Length: 775, dtype: object

In [None]:
ddf2[ddf2['file_id'] == 4531734244992817467].compute()

In [36]:
ddf2_file = ddf2[ddf2['file_id'] == 4531734244992817467].compute()
ddf2_file_agg = ddf2_file.groupby(['io_cat']).agg({
    'filename': 'unique',
    'func_id': 'unique',
    'index': 'count',
    'rank': 'unique',
    'tstart': min,
    'tend': max
})

ddf2_file_agg

Unnamed: 0_level_0,filename,func_id,index,rank,tstart,tend
io_cat,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,[/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fi...,[read],1921,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",0.500601,3.102023
3,[/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fi...,"[__xstat, open, __fxstat, close]",7684,"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,...",0.500343,3.102243


In [63]:
import portion as P

ddf2_file_p = P.open(ddf2_file_agg['tstart'].min(), ddf2_file_agg['tend'].max())
0.5003428 in ddf2_file_p 


True

In [12]:
file_acc_ids = [
    4531734247452668476,
    4531734246070024789,
    4531734245058167199,
    4531734246809951295,
    4531734247914523647,
    4531734246793868961,
    4531734248720417485,
    4531734245717475248,
    4531734247130387716,
    4531734247883313948,
    4531734246784543057,
    4531734245586618195,
    4531734246721693969,
    4531734244455765367,
    
]
#ddf2[ddf2['file_id'].isin(file_acc_ids)]
file_acc_stat_ddf = ddf2.compute()
file_acc_stat_ddf = file_acc_stat_ddf.groupby(['file_id','filename','func_id']).agg({'index':['count',min,max],'size':[min,max,'last',sum],'duration':sum,'rank':['nunique',min,max],'tstart':min,'tend':max,'acc_pat':[min,max]})
file_acc_stat_ddf['duration', 'per'] = file_acc_stat_ddf.div(file_acc_stat_ddf.groupby(level=0).sum(), level=0)['duration', 'sum']
file_acc_stat_ddf

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,index,index,index,size,size,size,size,duration,rank,rank,rank,tstart,tend,acc_pat,acc_pat,duration
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,min,max,min,max,last,sum,sum,nunique,min,max,min,max,min,max,per
file_id,filename,func_id,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
901974072163427748,/dev/null,__xstat,1279,15029,27458,0,0,0,0,0.003473,1279,1,1279,0.496752,3.065348,0,0,1.000000
4531734244435428412,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_diag_000157_w.dat,__fxstat,2,6507,6508,0,0,0,0,0.000004,1,0,0,295.725281,295.725281,0,0,0.005872
4531734244435428412,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_diag_000157_w.dat,__xstat,1,6505,6505,0,0,0,0,0.000003,1,0,0,295.724640,295.724640,0,0,0.004334
4531734244435428412,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_diag_000157_w.dat,close,1,6512,6512,0,0,0,0,0.000028,1,0,0,295.766418,295.766449,0,0,0.039284
4531734244435428412,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_diag_000157_w.dat,open,1,6506,6506,0,0,0,0,0.000613,1,0,0,295.724640,295.725250,0,0,0.856843
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4531734248727221027,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_diag_000067_s.dat,__fxstat,2,2833,2834,0,0,0,0,0.000003,1,0,0,139.272751,139.272766,0,0,0.001426
4531734248727221027,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_diag_000067_s.dat,__xstat,1,2831,2831,0,0,0,0,0.000003,1,0,0,139.270844,139.270844,0,0,0.001525
4531734248727221027,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_diag_000067_s.dat,close,1,2840,2840,0,0,0,0,0.000040,1,0,0,139.315491,139.315521,0,0,0.019675
4531734248727221027,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_diag_000067_s.dat,open,1,2832,2832,0,0,0,0,0.001877,1,0,0,139.270859,139.272736,0,0,0.923315


In [13]:
file_acc_stat_ddf.loc[([4531734246721693969],slice(None),slice(None))]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,index,index,index,size,size,size,size,duration,rank,rank,rank,tstart,tend,acc_pat,acc_pat,duration
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,min,max,min,max,last,sum,sum,nunique,min,max,min,max,min,max,per
file_id,filename,func_id,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
4531734246721693969,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1rst_000001_x.dat,__fxstat,2,7559,7560,0,0,0,0,4e-06,1,0,0,344.220123,344.220154,0,0,0.004533
4531734246721693969,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1rst_000001_x.dat,__xstat,1,7557,7557,0,0,0,0,4e-06,1,0,0,344.219421,344.219421,0,0,0.004422
4531734246721693969,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1rst_000001_x.dat,close,1,7579,7579,0,0,0,0,6.4e-05,1,0,0,344.770325,344.770386,0,0,0.070978
4531734246721693969,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1rst_000001_x.dat,ftruncate,1,7574,7574,0,0,0,0,6e-06,1,0,0,344.42923,344.42923,0,0,0.006633
4531734246721693969,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1rst_000001_x.dat,lseek,1,7573,7573,0,0,0,0,2e-06,1,0,0,344.42923,344.42923,0,0,0.002101
4531734246721693969,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1rst_000001_x.dat,open,1,7558,7558,0,0,0,0,0.00067,1,0,0,344.219452,344.220123,0,0,0.740962
4531734246721693969,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1rst_000001_x.dat,write,1,7578,7578,6112,6112,6112,6112,0.000154,1,0,0,344.770142,344.770294,0,0,0.17037


In [14]:
file_acc_file_ddf = file_acc_stat_ddf.loc[([4531734245586618195],slice(None),slice(None))]

file_acc_file_ddf

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,index,index,index,size,size,size,size,duration,rank,rank,rank,tstart,tend,acc_pat,acc_pat,duration
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,min,max,min,max,last,sum,sum,nunique,min,max,min,max,min,max,per
file_id,filename,func_id,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2
4531734245586618195,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_metadata.dat,__fxstat,8,48,14904,0,0,0,0,1.5e-05,1,0,0,5.635656,666.928345,0,0,0.01562
4531734245586618195,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_metadata.dat,__xstat,7,46,14902,0,0,0,0,6.5e-05,1,0,0,5.635112,666.928223,0,0,0.069493
4531734245586618195,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_metadata.dat,close,7,51,14908,0,0,0,0,4e-05,1,0,0,5.635734,666.928406,0,0,0.042397
4531734245586618195,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_metadata.dat,lseek,12,2524,14906,0,120,120,420,2.3e-05,1,0,0,127.622528,666.928406,0,1,0.024758
4531734245586618195,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_metadata.dat,open,7,47,14903,0,0,0,0,0.000649,1,0,0,5.635128,666.928284,0,0,0.689406
4531734245586618195,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_metadata.dat,write,7,50,14907,20,20,20,140,0.000149,1,0,0,5.635682,666.928406,0,0,0.158325


In [22]:
(file_acc_file_ddf['acc_pat', 'min'] == 0).all() and (file_acc_file_ddf['acc_pat', 'max'] == 0).all()

False

In [31]:
all([(file_acc_file_ddf['acc_pat', i].eq(0)).all() for i in ['min', 'max']])

False

In [47]:
write_size_sum = int(file_acc_file_ddf.loc[(slice(None),slice(None),'write')]['size', 'sum'])
write_size_last = int(file_acc_file_ddf.loc[(slice(None),slice(None),'write')]['size', 'last'])
lseek_size_last = int(file_acc_file_ddf.loc[(slice(None),slice(None),'lseek')]['size', 'last'])

write_size_sum - write_size_last == lseek_size_last

True

In [None]:
file_id_dict = dict(tuple(ddf2[ddf2['file_id'].isin(file_acc_ids)].compute().groupby(['file_id']).agg({'filename':min}).to_records()))

file_id_dict

{4531734244455765367: '/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1rst_000002_x.dat',
 4531734245058167199: '/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_stats.ctl',
 4531734245586618195: '/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_metadata.dat',
 4531734245717475248: '/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_w.ctl',
 4531734246070024789: '/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1_config.txt',
 4531734246721693969: '/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1rst_000001_x.dat',
 4531734246784543057: '/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_stats.dat',
 4531734246793868961: '/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_u.ctl',
 4531734246809951295: '/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_files/les_ConvBoundLayer/cm1out_metadata.ctl',
 4531734

In [40]:
proc_func_stat_ddf = ddf2[ddf2['proc_id'] == 5117778030848174740].groupby(['proc_id', 'io_cat', 'func_id']).agg({
    'index': 'count',
    'duration': 'sum',
    'size': 'sum',
    'file_id': tunique
}).compute()

proc_func_stat_ddf

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,index,duration,size,file_id
proc_id,io_cat,func_id,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
5117778030848174740,1,read,3,0.001326,50331648,2
5117778030848174740,2,write,2303,0.579398,1230852944,772
5117778030848174740,3,__fxstat,2993,0.005668,0,774
5117778030848174740,3,__xstat,2251,0.016423,0,774
5117778030848174740,3,close,2251,0.069,0,774
5117778030848174740,3,ftruncate,1122,0.163601,0,12
5117778030848174740,3,lseek,1854,0.004011,17674980,14
5117778030848174740,3,open,2251,3.275673,0,774


In [None]:
ddf2[ddf2['func_id'].str.contains('seek') & ddf2['size'] > 0].compute()

In [52]:
log_dir_h = "/p/gpfs1/iopp/recorder_app_logs/hacc/nodes-32/workflow-0/_parquet"
# /p/gpfs1/iopp/parquet_app_logs/cm1/nodes-32/workflow-4
ddf_h = dd.read_parquet(f"{log_dir_h}/*.parquet")

ddf_h

Unnamed: 0_level_0,index,proc,rank,thread_id,cat,io_cat,tstart,tend,func_id,level,hostname,app,filename,size,bandwidth,duration,tmid,file_id,proc_id
npartitions=1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
,int64,int64,int32,int32,int32,int32,float32,float32,object,int32,object,object,object,int64,float32,float32,int64,int64,int64
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [53]:
ddf_h.head()

Unnamed: 0,index,proc,rank,thread_id,cat,io_cat,tstart,tend,func_id,level,hostname,app,filename,size,bandwidth,duration,tmid,file_id,proc_id
0,1,1,0,312784,2,0,0.968866,0.968892,MPI_Comm_size,0,localhost,app1,,0,0.0,2.6e-05,9688790,6142509188972423790,5117778030848165332
1,2,1,0,312784,2,0,0.968923,0.968924,MPI_Comm_rank,0,localhost,app1,,0,0.0,1e-06,9689235,6142509188972423790,5117778030848165332
2,3,1,0,312784,4,0,1.127011,1.127014,_ZN15RestartIO_GLEANC1Ev,0,localhost,app1,,0,0.0,3e-06,11270123,6142509188972423790,5117778030848165332
3,4,1,0,312784,2,0,1.127036,4.299232,MPI_Comm_dup,0,localhost,app1,,0,0.0,3.172197,27131343,6142509188972423790,5117778030848165332
4,5,1,0,312784,0,3,4.29836,4.298391,open,1,localhost,app1,/dev/shm/job2154496201-35619-OMPI_COLL_IBM-0-c...,0,0.0,3.1e-05,42983753,3341432783455051907,5117778030848165332


In [7]:
ddf_h['file_id'].nunique().compute()

2562

In [93]:
ddf_h['func_id'].unique().compute()

0                                         MPI_Comm_size
1                                         MPI_Comm_rank
2                              _ZN15RestartIO_GLEANC1Ev
3                                          MPI_Comm_dup
4                                                  open
5                                             ftruncate
6                                                 close
7                                                unlink
8     _ZN15RestartIO_GLEAN23__duplicateCommunicatorE...
9      _ZN15RestartIO_GLEAN24__initalizePartitionInfoEv
10                                       MPI_Comm_split
11           _ZN15RestartIO_GLEAN18__createPartitionsEv
12    _ZN15RestartIO_GLEAN10InitializeEP19ompi_commu...
13        _ZN15RestartIO_GLEAN21SetPOSIX_IO_InterfaceEi
14                                        MPI_Allreduce
15                                           MPI_Gather
16                                               open64
17                                          MPI_

In [55]:
file_acc_ids_h = list(ddf_h[ddf_h['func_id'] == 'lseek64']['file_id'].unique().compute())

file_acc_ids_h[:10]

[5347710266301943549,
 5347710268972954261,
 5347710268382397342,
 5347710264855252483,
 5347710268895530979,
 5347710265310567791,
 5347710265562864681,
 5347710267155037871,
 5347710267821916138,
 5347710267671485346]

In [83]:
file_acc_stat_ddf_h = ddf_h[ddf_h['file_id'].isin(file_acc_ids_h)].compute()
file_acc_stat_ddf_h

Unnamed: 0,index,proc,rank,thread_id,cat,io_cat,tstart,tend,func_id,level,hostname,app,filename,size,bandwidth,duration,tmid,file_id,proc_id
27,28,1,0,312784,0,3,10.744860,14.600355,open64,0,localhost,app1,/p/gpfs1/iopp/temp/hacc_dir//test-Part00000000...,0,0.000000e+00,3.855495,126726074,5347710266301943549,5117778030848165332
28,29,1,0,312784,0,3,14.600630,14.600637,close,0,localhost,app1,/p/gpfs1/iopp/temp/hacc_dir/test-Part00000000-...,0,0.000000e+00,0.000007,146006333,5347710266301943549,5117778030848165332
30,31,1,0,312784,0,3,14.600682,14.600697,open64,0,localhost,app1,/p/gpfs1/iopp/temp/hacc_dir/test-Part00000000-...,0,0.000000e+00,0.000014,146006896,5347710266301943549,5117778030848165332
34,35,1,0,312784,0,3,18.552885,18.552887,lseek64,0,localhost,app1,/p/gpfs1/iopp/temp/hacc_dir/test-Part00000000-...,25165824,1.142857e+07,0.000002,185528857,5347710266301943549,5117778030848165332
35,36,1,0,312784,0,2,18.552891,18.677382,write,0,localhost,app1,/p/gpfs1/iopp/temp/hacc_dir/test-Part00000000-...,67108864,5.140983e+02,0.124490,186151356,5347710266301943549,5117778030848165332
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162567,162568,1,1279,312784,0,3,12.942348,12.942352,lseek64,0,localhost,app1,/p/gpfs1/iopp/temp/hacc_dir/test-Part00001279-...,494927872,1.151220e+08,0.000004,129423500,5347710265146560442,5117778030931985876
162568,162569,1,1279,312784,0,1,12.942355,13.053628,read,0,localhost,app1,/p/gpfs1/iopp/temp/hacc_dir/test-Part00001279-...,134217728,1.150326e+03,0.111273,129979915,5347710265146560442,5117778030931985876
162570,162571,1,1279,312784,0,3,13.053651,13.053655,lseek64,0,localhost,app1,/p/gpfs1/iopp/temp/hacc_dir/test-Part00001279-...,629145600,1.500000e+08,0.000004,130536530,5347710265146560442,5117778030931985876
162571,162572,1,1279,312784,0,1,13.053658,13.096679,read,0,localhost,app1,/p/gpfs1/iopp/temp/hacc_dir/test-Part00001279-...,33554432,7.438228e+02,0.043021,130751686,5347710265146560442,5117778030931985876


In [88]:
import os

file_acc_stat_ddf_h['filename'] = file_acc_stat_ddf_h['filename'].apply(lambda filename: os.path.normpath(filename))
file_acc_stat_ddf_h2 = file_acc_stat_ddf_h.groupby(['file_id','filename','func_id']).agg({'index':['count',min,max],'size':[min,max,'last','mean',sum],'duration':sum,'rank':['nunique',min,max],'tstart':min,'tend':max})
file_acc_stat_ddf_h2['duration', 'per'] = file_acc_stat_ddf_h2.div(file_acc_stat_ddf_h2.groupby(level=0).sum(), level=0)['duration', 'sum']

file_acc_stat_ddf_h2[:40]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,index,index,index,size,size,size,size,size,duration,rank,rank,rank,tstart,tend,duration
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,count,min,max,min,max,last,mean,sum,sum,nunique,min,max,min,max,per
file_id,filename,func_id,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
5347710264681493468,/p/gpfs1/iopp/temp/hacc_dir/test-Part00001027-of-00001280.data,close,4,130492,130578,0,0,0,0.0,0,0.00026,1,1027,1027,9.886889,20.933338,6.1e-05
5347710264681493468,/p/gpfs1/iopp/temp/hacc_dir/test-Part00001027-of-00001280.data,lseek64,20,130498,130574,0,629145600,629145600,270952038.4,5419040768,0.000145,1,1027,1027,15.026616,20.592703,3.4e-05
5347710264681493468,/p/gpfs1/iopp/temp/hacc_dir/test-Part00001027-of-00001280.data,open64,4,130491,130546,0,0,0,0.0,0,3.078018,1,1027,1027,6.808538,20.131613,0.721227
5347710264681493468,/p/gpfs1/iopp/temp/hacc_dir/test-Part00001027-of-00001280.data,read,10,130539,130575,25165824,134217728,33554432,66270003.2,662700032,0.350103,1,1027,1027,19.991077,20.602991,0.082034
5347710264681493468,/p/gpfs1/iopp/temp/hacc_dir/test-Part00001027-of-00001280.data,write,10,130499,130527,25165824,134217728,25165824,66270003.2,662700032,0.839225,1,1027,1027,15.026621,16.490946,0.196643
5347710264689155874,/p/gpfs1/iopp/temp/hacc_dir/test-Part00000350-of-00001280.data,close,4,44530,44616,0,0,0,0.0,0,0.000193,1,350,350,7.594072,24.679337,6.4e-05
5347710264689155874,/p/gpfs1/iopp/temp/hacc_dir/test-Part00000350-of-00001280.data,lseek64,20,44536,44612,0,629145600,629145600,270952038.4,5419040768,9.6e-05,1,350,350,10.667729,23.853323,3.2e-05
5347710264689155874,/p/gpfs1/iopp/temp/hacc_dir/test-Part00000350-of-00001280.data,open64,4,44529,44584,0,0,0,0.0,0,1.931071,1,350,350,4.666424,22.998804,0.639767
5347710264689155874,/p/gpfs1/iopp/temp/hacc_dir/test-Part00000350-of-00001280.data,read,10,44577,44613,25165824,134217728,33554432,66270003.2,662700032,0.858662,1,350,350,22.93013,23.860483,0.284476
5347710264689155874,/p/gpfs1/iopp/temp/hacc_dir/test-Part00000350-of-00001280.data,write,10,44537,44565,25165824,134217728,25165824,66270003.2,662700032,0.228375,1,350,350,10.667738,11.119619,0.075661


In [69]:
file_acc_file_ddf_h = file_acc_stat_ddf_h.loc[(5347710264707616257,slice(None),slice(None))]

write_size_sum_h = int(file_acc_file_ddf_h.loc[(slice(None),slice(None),'write')]['size', 'sum'])
write_size_last_h = int(file_acc_file_ddf_h.loc[(slice(None),slice(None),'write')]['size', 'last'])
read_size_sum_h = int(file_acc_file_ddf_h.loc[(slice(None),slice(None),'read')]['size', 'sum'])
read_size_last_h = int(file_acc_file_ddf_h.loc[(slice(None),slice(None),'read')]['size', 'last'])
lseek_size_last_h = int(file_acc_file_ddf_h.loc[(slice(None),slice(None),'lseek64')]['size', 'last'])

read_size_sum_h - read_size_last_h == lseek_size_last_h

True

In [49]:
ddf_h[ddf_h['func_id'] == 'MPI_Allreduce'].compute()

Unnamed: 0,index,proc,rank,thread_id,cat,io_cat,tstart,tend,func_id,level,hostname,app,filename,size,bandwidth,duration,tmid,file_id,proc_id
23,24,1,0,312784,2,0,10.648633,10.648634,MPI_Allreduce,0,localhost,app1,,0,0.0,0.000001,106486336,6142509188972423790,5117778030848165332
24,25,1,0,312784,2,0,10.648639,10.648885,MPI_Allreduce,0,localhost,app1,,0,0.0,0.000246,106487621,6142509188972423790,5117778030848165332
209,210,1,1,312784,2,0,10.662471,10.662474,MPI_Allreduce,0,localhost,app1,,0,0.0,0.000003,106624719,6142509188972423790,5117778030848230868
210,211,1,1,312784,2,0,10.662478,10.662730,MPI_Allreduce,0,localhost,app1,,0,0.0,0.000252,106626044,6142509188972423790,5117778030848230868
336,337,1,2,312784,2,0,10.659667,10.659669,MPI_Allreduce,0,localhost,app1,,0,0.0,0.000002,106596678,6142509188972423790,5117778030848296404
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162230,162231,1,1277,312784,2,0,4.657865,4.658106,MPI_Allreduce,0,localhost,app1,,0,0.0,0.000241,46579856,6142509188972423790,5117778030931854804
162356,162357,1,1278,312784,2,0,4.722832,4.722835,MPI_Allreduce,0,localhost,app1,,0,0.0,0.000003,47228331,6142509188972423790,5117778030931920340
162357,162358,1,1278,312784,2,0,4.722840,4.723229,MPI_Allreduce,0,localhost,app1,,0,0.0,0.000389,47230342,6142509188972423790,5117778030931920340
162483,162484,1,1279,312784,2,0,4.700896,4.700898,MPI_Allreduce,0,localhost,app1,,0,0.0,0.000002,47008968,6142509188972423790,5117778030931985876


In [50]:
ddf_h[ddf_h['file_id'] == 5347710266789296543].compute()

Unnamed: 0,index,proc,rank,thread_id,cat,io_cat,tstart,tend,func_id,level,hostname,app,filename,size,bandwidth,duration,tmid,file_id,proc_id
10498,10499,1,82,312784,0,3,4.720165,13.159923,open64,0,localhost,app1,/p/gpfs1/iopp/temp/hacc_dir//test-Part00000082...,0,0.0,8.439758,89400437,5347710266789296543,5117778030853539284
10499,10500,1,82,312784,0,3,13.497088,13.497097,close,0,localhost,app1,/p/gpfs1/iopp/temp/hacc_dir/test-Part00000082-...,0,0.0,8e-06,134970927,5347710266789296543,5117778030853539284
10501,10502,1,82,312784,0,3,13.525151,13.525364,open64,0,localhost,app1,/p/gpfs1/iopp/temp/hacc_dir/test-Part00000082-...,0,0.0,0.000212,135252576,5347710266789296543,5117778030853539284
10505,10506,1,82,312784,0,3,22.427418,22.42742,lseek64,0,localhost,app1,/p/gpfs1/iopp/temp/hacc_dir/test-Part00000082-...,25165824,10909090.0,2e-06,224274195,5347710266789296543,5117778030853539284
10506,10507,1,82,312784,0,2,22.427425,22.641762,write,0,localhost,app1,/p/gpfs1/iopp/temp/hacc_dir/test-Part00000082-...,67108864,298.5953,0.214337,225345929,5347710266789296543,5117778030853539284
10508,10509,1,82,312784,0,3,22.641792,22.641796,lseek64,0,localhost,app1,/p/gpfs1/iopp/temp/hacc_dir/test-Part00000082-...,92274688,31428570.0,3e-06,226417940,5347710266789296543,5117778030853539284
10509,10510,1,82,312784,0,2,22.641798,22.653433,write,0,localhost,app1,/p/gpfs1/iopp/temp/hacc_dir/test-Part00000082-...,67108864,5501.165,0.011634,226476154,5347710266789296543,5117778030853539284
10511,10512,1,82,312784,0,3,22.65349,22.653496,lseek64,0,localhost,app1,/p/gpfs1/iopp/temp/hacc_dir/test-Part00000082-...,159383552,25333330.0,6e-06,226534925,5347710266789296543,5117778030853539284
10512,10513,1,82,312784,0,2,22.653498,23.002195,write,0,localhost,app1,/p/gpfs1/iopp/temp/hacc_dir/test-Part00000082-...,67108864,183.5398,0.348698,228278464,5347710266789296543,5117778030853539284
10514,10515,1,82,312784,0,3,23.002258,23.002272,lseek64,0,localhost,app1,/p/gpfs1/iopp/temp/hacc_dir/test-Part00000082-...,226492416,16488550.0,1.3e-05,230022648,5347710266789296543,5117778030853539284


In [9]:
log_dir_m = "/p/gpfs1/iopp/recorder_app_logs/montage/nodes-32/_parquet"
# /p/gpfs1/iopp/parquet_app_logs/cm1/nodes-32/workflow-4
ddf_m = dd.read_parquet(f"{log_dir_m}/*.parquet")

ddf_m

Unnamed: 0_level_0,index,proc,rank,thread_id,cat,io_cat,tstart,tend,func_id,level,hostname,app,filename,size,bandwidth,duration,tmid,file_id,proc_id
npartitions=10,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
,int64,int64,int32,int32,int32,int32,float32,float32,object,int32,object,object,object,int64,float32,float32,int64,int64,int64
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [10]:
ddf_m.head()

Unnamed: 0,index,proc,rank,thread_id,cat,io_cat,tstart,tend,func_id,level,hostname,app,filename,size,bandwidth,duration,tmid,file_id,proc_id
0,1,61856,0,314672,0,3,2.798459,2.798468,__xstat64,0,lassen70,mImgtbl,/p/gpfs1/iopp/temp/montage/2854964/0,0,0.0,9e-06,27984635,1571755609790269981,244022999328542004
1,2,61856,0,314672,0,3,2.798558,2.798704,fopen64,0,lassen70,mImgtbl,/p/gpfs1/iopp/temp/montage/2854964/0/run/Kimag...,0,0.0,0.000146,27986308,7672382664121083431,244022999328542004
2,3,61856,0,314672,0,3,2.798825,2.798844,opendir,0,lassen70,mImgtbl,/p/gpfs1/iopp/temp/montage/2854964/0,0,0.0,1.9e-05,27988344,1571755609790269981,244022999328542004
3,4,61856,0,314672,0,3,2.798926,2.798935,__xstat64,0,lassen70,mImgtbl,/p/gpfs1/iopp/temp/montage/2854964/0,0,0.0,9e-06,27989303,1571755609790269981,244022999328542004
4,5,61856,0,314672,0,3,2.798986,2.798995,__xstat64,0,lassen70,mImgtbl,/p/gpfs1/iopp/temp/montage/2854964,0,0.0,9e-06,27989902,5347462497940582382,244022999328542004


In [8]:
ddf_m['file_id'].nunique().compute()

1924

In [9]:
ddf_m['proc_id'].nunique().compute()

255

In [10]:
ddf_m['index'].count().compute()

4997828

In [11]:
ddf_m['func_id'].unique().compute()

0         __xstat64
1           fopen64
2           opendir
3             fread
4            fclose
5             fseek
6            fwrite
7          vfprintf
8          closedir
9            unlink
10           remove
11    MPI_Comm_size
12    MPI_Comm_rank
13             open
14             read
15            close
16           access
17            write
18    MPI_Allreduce
19      MPI_Barrier
Name: func_id, dtype: object

In [12]:
ddf_m[ddf_m['func_id'] == 'fopen64'].head(10)

Unnamed: 0,index,proc,rank,thread_id,cat,io_cat,tstart,tend,func_id,level,hostname,app,filename,size,bandwidth,duration,tmid,file_id,proc_id
1,2,61856,0,314672,0,3,2.798558,2.798704,fopen64,0,lassen70,mImgtbl,/p/gpfs1/iopp/temp/montage/2854964/0/run/Kimag...,0,0.0,0.000146,27986308,7672382664121083431,244022999328542004
6,7,61856,0,314672,0,3,2.800691,2.800708,fopen64,0,lassen70,mImgtbl,/p/gpfs1/iopp/temp/montage/2854964/0/2mass-atl...,0,0.0,1.7e-05,28006992,8459472472013222008,244022999328542004
9,10,61856,0,314672,0,3,2.800879,2.800894,fopen64,0,lassen70,mImgtbl,/p/gpfs1/iopp/temp/montage/2854964/0/2mass-atl...,0,0.0,1.5e-05,28008860,8459472472013222008,244022999328542004
119,120,61856,0,314672,0,3,2.835232,2.835248,fopen64,0,lassen70,mImgtbl,/p/gpfs1/iopp/temp/montage/2854964/0/2mass-atl...,0,0.0,1.6e-05,28352399,8459472473575113085,244022999328542004
122,123,61856,0,314672,0,3,2.835402,2.835418,fopen64,0,lassen70,mImgtbl,/p/gpfs1/iopp/temp/montage/2854964/0/2mass-atl...,0,0.0,1.6e-05,28354098,8459472473575113085,244022999328542004
207,208,61856,0,314672,0,3,2.869323,2.869339,fopen64,0,lassen70,mImgtbl,/p/gpfs1/iopp/temp/montage/2854964/0/2mass-atl...,0,0.0,1.6e-05,28693311,8459472472445179539,244022999328542004
210,211,61856,0,314672,0,3,2.869492,2.869507,fopen64,0,lassen70,mImgtbl,/p/gpfs1/iopp/temp/montage/2854964/0/2mass-atl...,0,0.0,1.5e-05,28694995,8459472472445179539,244022999328542004
295,296,61856,0,314672,0,3,2.903233,2.903249,fopen64,0,lassen70,mImgtbl,/p/gpfs1/iopp/temp/montage/2854964/0/2mass-atl...,0,0.0,1.6e-05,29032410,8459472469806324207,244022999328542004
298,299,61856,0,314672,0,3,2.903403,2.903419,fopen64,0,lassen70,mImgtbl,/p/gpfs1/iopp/temp/montage/2854964/0/2mass-atl...,0,0.0,1.6e-05,29034109,8459472469806324207,244022999328542004
384,385,61856,0,314672,0,3,2.937233,2.937249,fopen64,0,lassen70,mImgtbl,/p/gpfs1/iopp/temp/montage/2854964/0/2mass-atl...,0,0.0,1.6e-05,29372408,8459472470315449988,244022999328542004


In [20]:
ddf_m[ddf_m['filename'].str.contains('ngc3372.fits')].head(10)

Unnamed: 0,index,proc,rank,thread_id,cat,io_cat,tstart,tend,func_id,level,hostname,app,filename,size,bandwidth,duration,tmid,file_id,proc_id


In [7]:
import copy
import numpy as np
import pandas as pd
from dask import delayed
from dask.dataframe import DataFrame
from dask.distributed import get_worker


@delayed
def filter_delayed(ddf: DataFrame, fg_index: str, start: int, stop: int):
    empty = {
        'agg_bw': 0.0,
        'agg_dur': 0.0,
        'agg_size': 0,
    }

    if ddf.empty:
        return {
            'start': start,
            'stop': stop,
            'all': empty,
            'read': empty,
            'write': empty,
            'metadata': empty
        }

    def g(x):
        print('----g----')
        print(x)
        get_worker().log_event("g", x)
        d = {}
        d['duration'] = x['duration'].sum()

        return pd.Series(d, index=['duration'])  # , 'size']) #, 'bandwidth', 'index'])

    def f(x):
        print('----f----')
        print(x)
        get_worker().log_event("f", x)
        proc_df = x.groupby(['proc_id']).apply(g)
        proc_df_desc = proc_df.describe()
        # print('duration', proc_df_desc.loc['max']['duration'], x['duration'].sum(), proc_df_desc.loc['count']['index'])

        d = {}
        d['duration'] = proc_df_desc.loc['max']['duration']
        d['size'] = x['size'].sum()

        # High-level filter without uniques to understand focus areas
        # Low-level filter to explore details
        # apply to aggregate (10x faster)

        return pd.Series(d, index=['duration', 'size'])

    agg_values = ddf.groupby(['io_cat']).apply(f)

    del ddf

    io_cats = agg_values.index.unique()

    read_values = empty
    write_values = empty
    metadata_values = empty

    if 1 in io_cats:
        read_agg_dur = agg_values.loc[1]['duration']
        read_agg_size = agg_values.loc[1]['size']
        read_values = {
            'agg_bw': 0 if read_agg_dur == 0 else read_agg_size / read_agg_dur,
            'agg_dur': read_agg_dur,
            'agg_size': read_agg_size,
        }
    if 2 in io_cats:
        write_agg_dur = agg_values.loc[2]['duration']
        write_agg_size = agg_values.loc[2]['size']
        write_values = {
            'agg_bw': 0 if write_agg_dur == 0 else write_agg_size / write_agg_dur,
            'agg_dur': write_agg_dur,
            'agg_size': write_agg_size,
        }
    if 3 in io_cats:
        metadata_values = {
            'agg_dur': agg_values.loc[3]['duration'],
        }

    total_agg_dur = read_values['agg_dur'] + write_values['agg_dur'] + metadata_values['agg_dur']
    total_agg_size = read_values['agg_size'] + write_values['agg_size']

    all_values = {
        'agg_bw': 0 if total_agg_dur == 0 else total_agg_size / total_agg_dur,
        'agg_dur': total_agg_dur,
        'agg_size': total_agg_size,
    }

    filter_result = {
        'start': start,
        'stop': stop,
        'all': all_values,
        'read': read_values,
        'write': write_values,
        'metadata': metadata_values
    }

    # Return results
    return filter_result


@delayed
def merge_delayed(x, y):
    return {
        'start': x['start'],
        'stop': y['stop'],
        'all': {
            'agg_bw': x['all']['agg_bw'] + y['all']['agg_bw'],
            'agg_dur': x['all']['agg_dur'] + y['all']['agg_dur'],
            'agg_size': x['all']['agg_size'] + y['all']['agg_size'],
        },
        'read': {
            'agg_bw': x['read']['agg_bw'] + y['read']['agg_bw'],
            'agg_dur': x['read']['agg_dur'] + y['read']['agg_dur'],
            'agg_size': x['read']['agg_size'] + y['read']['agg_size'],
        },
        'write': {
            'agg_bw': x['write']['agg_bw'] + y['write']['agg_bw'],
            'agg_dur': x['write']['agg_dur'] + y['write']['agg_dur'],
            'agg_size': x['write']['agg_size'] + y['write']['agg_size'],
        },
        'metadata': {
            'agg_dur': x['metadata']['agg_dur'] + y['metadata']['agg_dur'],
        }
    }


@delayed
def flatten_delayed(x):
    return {
        'start': x['start'],
        'stop': x['stop'],
        'all': {
            'agg_bw': x['all']['agg_bw'],
            'agg_dur': x['all']['agg_dur'],
            'agg_size': x['all']['agg_size'],
        },
        'read': {
            'agg_bw': x['read']['agg_bw'],
            'agg_dur': x['read']['agg_dur'],
            'agg_size': x['read']['agg_size'],
        },
        'write': {
            'agg_bw': x['write']['agg_bw'],
            'agg_dur': x['write']['agg_dur'],
            'agg_size': x['write']['agg_size'],
        },
        'metadata': {
            'agg_dur': x['metadata']['agg_dur'],
        }
    }

In [30]:
ddf2_20 = ddf2.loc[:2000].compute()

ddf2_20

Unnamed: 0,index,proc,rank,thread_id,cat,io_cat,tstart,tend,func_id,level,hostname,app,filename,size,acc_pat,bandwidth,duration,tmid,file_id,proc_id
0,1,1,0,322192,0,3,2.823260,2.823267,__xstat,0,localhost,app1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,0,0,0.000000e+00,0.000007,28232634,4531734244992817467,5117778030848174740
1,2,1,0,322192,0,3,2.823293,2.823305,open,0,localhost,app1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,0,0,0.000000e+00,0.000012,28232988,4531734244992817467,5117778030848174740
2,3,1,0,322192,0,3,2.823371,2.823373,__fxstat,0,localhost,app1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,0,0,0.000000e+00,0.000002,28233717,4531734244992817467,5117778030848174740
3,4,1,0,322192,0,1,2.823390,2.823399,read,0,localhost,app1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,16777216,0,1.758242e+06,0.000009,28233943,4531734244992817467,5117778030848174740
4,5,1,0,322192,0,3,2.823490,2.823494,close,0,localhost,app1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,0,0,0.000000e+00,0.000004,28234916,4531734244992817467,5117778030848174740
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1996,1997,1,0,322192,0,3,104.814606,104.814606,lseek,0,localhost,app1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,0,0,0.000000e+00,0.000002,1048146082,4531734246784543057,5117778030848174740
1997,1998,1,0,322192,0,3,104.814629,104.814629,lseek,0,localhost,app1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,12784,1,1.219177e+04,0.000001,1048146287,4531734246784543057,5117778030848174740
1998,1999,1,0,322192,0,2,104.814636,104.814651,write,0,localhost,app1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,272,0,1.980148e+01,0.000013,1048146416,4531734246784543057,5117778030848174740
1999,2000,1,0,322192,0,3,104.814651,104.814659,close,0,localhost,app1,/p/gpfs1/iopp/temp/cm1r20.3.2846827/config_fil...,0,0,0.000000e+00,0.000005,1048146545,4531734246784543057,5117778030848174740


In [31]:
ddf2_20_filtered = filter_delayed(ddf2_20, 'tmid', ddf2_20['tstart'].min(), ddf2_20['tend'].max()).compute()

ddf2_20_filtered

{'start': 2.82326,
 'stop': 104.814896,
 'all': {'agg_bw': 90136908.5360303,
  'agg_dur': 2.247855881578289,
  'agg_size': 202614780.0},
 'read': {'agg_bw': 37960362620.452415,
  'agg_dur': 0.0013259000843390822,
  'agg_size': 50331648.0},
 'write': {'agg_bw': 2105923572.7939007,
  'agg_dur': 0.07231180369853973,
  'agg_size': 152283132.0},
 'metadata': {'agg_dur': 2.17421817779541}}

In [32]:
def g(x):

    d = {}
    d['duration'] = x['duration'].sum()

    return pd.Series(d, index=['duration']) 

ddf2_20.groupby(['proc_id']).apply(g)

Unnamed: 0_level_0,duration
proc_id,Unnamed: 1_level_1
5117778030848174740,2.247856


In [33]:
ddf2_20.groupby(['proc_id', 'io_cat']).agg({'duration':sum})

Unnamed: 0_level_0,Unnamed: 1_level_0,duration
proc_id,io_cat,Unnamed: 2_level_1
5117778030848174740,1,0.001326
5117778030848174740,2,0.072312
5117778030848174740,3,2.174218


In [34]:
ddf2_20_agg = ddf2_20.groupby(['io_cat']).agg({'duration':[max,sum],'size':sum})
ddf2_20_agg

Unnamed: 0_level_0,duration,duration,size
Unnamed: 0_level_1,max,sum,sum
io_cat,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
1,0.001307,0.001326,50331648
2,0.003816,0.072312,152283132
3,1.970968,2.174218,306816


In [30]:
ddf2_20_agg.loc[1]['duration']

1.8900000213761814e-05

In [13]:
@delayed
def filter_delayed2(ddf: DataFrame, fg_index: str, start: int, stop: int):
    empty = {
        'agg_bw': 0.0,
        'agg_dur': 0.0,
        'agg_size': 0,
    }

    if ddf.empty:
        return {
            'start': start,
            'stop': stop,
            'all': empty,
            'read': empty,
            'write': empty,
            'metadata': empty
        }

    agg_values = ddf.groupby(['io_cat']).agg({'duration':max,'size':sum})

    del ddf

    io_cats = agg_values.index.unique()

    read_values = empty
    write_values = empty
    metadata_values = empty

    if 1 in io_cats:
        read_agg_dur = agg_values.loc[1]['duration']
        read_agg_size = agg_values.loc[1]['size']
        read_values = {
            'agg_bw': 0 if read_agg_dur == 0 else read_agg_size / read_agg_dur,
            'agg_dur': read_agg_dur,
            'agg_size': read_agg_size,
        }
    if 2 in io_cats:
        write_agg_dur = agg_values.loc[2]['duration']
        write_agg_size = agg_values.loc[2]['size']
        write_values = {
            'agg_bw': 0 if write_agg_dur == 0 else write_agg_size / write_agg_dur,
            'agg_dur': write_agg_dur,
            'agg_size': write_agg_size,
        }
    if 3 in io_cats:
        metadata_values = {
            'agg_dur': agg_values.loc[3]['duration'],
        }

    total_agg_dur = read_values['agg_dur'] + write_values['agg_dur'] + metadata_values['agg_dur']
    total_agg_size = read_values['agg_size'] + write_values['agg_size']

    all_values = {
        'agg_bw': 0 if total_agg_dur == 0 else total_agg_size / total_agg_dur,
        'agg_dur': total_agg_dur,
        'agg_size': total_agg_size,
    }

    filter_result = {
        'start': start,
        'stop': stop,
        'all': all_values,
        'read': read_values,
        'write': write_values,
        'metadata': metadata_values
    }

    # Return results
    return filter_result

In [15]:
ddf2_20_filtered2 = filter_delayed2(ddf2_20, 'tmid', ddf2_20['tstart'].min(), ddf2_20['tend'].max()).compute()

ddf2_20_filtered2

{'start': 2.82326,
 'stop': 3.376893,
 'all': {'agg_bw': 1615705687.8077831,
  'agg_dur': 0.020843700838668155,
  'agg_size': 33677286.0},
 'read': {'agg_bw': 3423921617465.7046,
  'agg_dur': 9.800000043469481e-06,
  'agg_size': 33554432.0},
 'write': {'agg_bw': 952356642.7493943,
  'agg_dur': 0.0001289999927394092,
  'agg_size': 122854.0},
 'metadata': {'agg_dur': 0.020704900845885277}}

In [20]:
import pandas as pd 

df = pd.DataFrame({'A': 'a a b'.split(),
                   'B': [1,2,3],
                   'C': [4,6,5]})

df

Unnamed: 0,A,B,C
0,a,1,4
1,a,2,6
2,b,3,5


In [24]:
import time

def xxx(x):
    print(x)
    print(time.time())
    return x

df.groupby(['A']).apply(xxx)

   A  B  C
0  a  1  4
1  a  2  6
1671690182.4205222
   A  B  C
2  b  3  5
1671690182.423368


Unnamed: 0,A,B,C
0,a,1,4
1,a,2,6
2,b,3,5


In [11]:
cosmo_ddf = dd.read_parquet("/p/gpfs1/iopp/recorder_app_logs/lbann-cosmoflow/nodes-32/_parquet/*.parquet")
cosmo_ddf

Unnamed: 0_level_0,index,proc,rank,thread_id,cat,io_cat,tstart,tend,func_id,level,hostname,app,filename,size,acc_pat,bandwidth,duration,tmid,file_id,proc_id
npartitions=2140,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
,int64,int64,int32,int32,int32,int32,float32,float32,object,int32,object,object,object,int64,int32,float32,float32,int64,int64,int64
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [12]:
cosmo_ddf.head()

Unnamed: 0,index,proc,rank,thread_id,cat,io_cat,tstart,tend,func_id,level,hostname,app,filename,size,acc_pat,bandwidth,duration,tmid,file_id,proc_id
0,1,1,0,2099194032,2,0,0.295049,3.254283,MPI_Comm_dup,0,localhost,app1,,0,0,0.0,2.959234,17746659,6142509188972423790,5117778030848157135
1,2,1,0,2099194032,0,3,3.253641,3.253651,open,1,localhost,app1,/dev/shm/job2154496201-35619-OMPI_COLL_IBM-0-c...,0,0,0.0,1e-05,32536458,3341432783455051907,5117778030848157135
2,3,1,0,2099194032,0,3,3.253686,3.25369,open,1,localhost,app1,/dev/shm/job2154496201-35619-OMPI_COLL_IBM-0-c...,0,0,0.0,4e-06,32536878,3341432783455051907,5117778030848157135
3,4,1,0,2099194032,0,3,3.253701,3.253709,ftruncate,1,localhost,app1,/dev/shm/job2154496201-35619-OMPI_COLL_IBM-0-c...,0,0,0.0,8e-06,32537048,3341432783455051907,5117778030848157135
4,5,1,0,2099194032,0,3,3.253717,3.253719,close,1,localhost,app1,/dev/shm/job2154496201-35619-OMPI_COLL_IBM-0-c...,0,0,0.0,2e-06,32537177,3341432783455051907,5117778030848157135


In [13]:
%%time
cosmo_ddf.index.count().compute()

CPU times: user 17.3 s, sys: 786 ms, total: 18.1 s
Wall time: 46 s


2225473791

In [15]:
%%time
cosmo_file_ddf = cosmo_ddf.set_index(['file_id'])
cosmo_file_ddf = cosmo_file_ddf.persist()
cosmo_file_ddf

CPU times: user 22.4 s, sys: 689 ms, total: 23.1 s
Wall time: 24.8 s


Unnamed: 0_level_0,index,proc,rank,thread_id,cat,io_cat,tstart,tend,func_id,level,hostname,app,filename,size,acc_pat,bandwidth,duration,tmid,proc_id
npartitions=2140,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
-9223019127069887488,int64,int64,int32,int32,int32,int32,float32,float32,object,int32,object,object,object,int64,int32,float32,float32,int64,int64
-9203892175950633984,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9208214218330078208,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9223057279186121728,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [16]:
cosmo_file_ddf.index

Dask Index Structure:
npartitions=2140
-9223019127069887488    int64
-9203892175950633984      ...
                        ...  
 9208214218330078208      ...
 9223057279186121728      ...
Name: file_id, dtype: int64
Dask Name: sort_index, 4280 tasks

In [None]:
%%time
cosmo_file_ddf.index.nunique().compute()

In [15]:
genome_ddf = dd.read_parquet("/p/gpfs1/iopp/recorder_app_logs/genome_pegasus/nodes-32/_parquet/*.parquet")
genome_ddf

Unnamed: 0_level_0,index,proc,rank,thread_id,cat,io_cat,tstart,tend,func_id,level,hostname,app,filename,size,acc_pat,bandwidth,duration,tmid,file_id,proc_id
npartitions=698,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
,int64,int64,int32,int32,int32,int32,float32,float32,object,int32,object,object,object,int64,int32,float32,float32,int64,int64,int64
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [16]:
genome_ddf.head()

Unnamed: 0,index,proc,rank,thread_id,cat,io_cat,tstart,tend,func_id,level,hostname,app,filename,size,acc_pat,bandwidth,duration,tmid,file_id,proc_id
0,1,53431,0,315008,0,3,0.237784,0.237786,getcwd,0,lassen72,pegasus-mpi-cluster,,0,0,0.0,2e-06,2377848,6142509188972423790,7523492661920059012
1,2,53431,0,315008,0,3,0.250517,0.250523,closedir,0,lassen72,pegasus-mpi-cluster,%p,0,0,0.0,6e-06,2505197,-5561148475055268842,7523492661920059012
2,3,53431,0,315008,0,3,0.250669,0.250673,opendir,0,lassen72,pegasus-mpi-cluster,/g/g92/haridev/.openmpi/components,0,0,0.0,4e-06,2506708,5197033824288711371,7523492661920059012
3,4,53431,0,315008,0,3,0.291718,0.291721,getcwd,0,lassen72,pegasus-mpi-cluster,,0,0,0.0,3e-06,2917192,6142509188972423790,7523492661920059012
4,5,53431,0,315008,0,3,0.295864,0.295868,closedir,0,lassen72,pegasus-mpi-cluster,%p,0,0,0.0,4e-06,2958657,-5561148475055268842,7523492661920059012


In [17]:
%%time
genome_ddf.index.count().compute()

CPU times: user 2.92 s, sys: 200 ms, total: 3.12 s
Wall time: 14.6 s


715248240

In [18]:
%%time
genome_file_ddf = genome_ddf.set_index(['file_id'])
genome_file_ddf

CPU times: user 7.41 s, sys: 473 ms, total: 7.88 s
Wall time: 14.6 s


Unnamed: 0_level_0,index,proc,rank,thread_id,cat,io_cat,tstart,tend,func_id,level,hostname,app,filename,size,acc_pat,bandwidth,duration,tmid,proc_id
npartitions=698,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
-9223371986035283781,int64,int64,int32,int32,int32,int32,float32,float32,object,int32,object,object,object,int64,int32,float32,float32,int64,int64
-9223310963041105055,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9223308253708107101,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9223369538921024184,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [19]:
%%time
genome_file_ddf.index.count().compute()

CPU times: user 1min 37s, sys: 11.5 s, total: 1min 48s
Wall time: 5min 57s


715248240

In [20]:
%%time
genome_file_ddf.index.count().compute()

CPU times: user 2min 1s, sys: 14.4 s, total: 2min 15s
Wall time: 7min 3s


715248240

In [None]:
%%time
genome_file_ddf = genome_file_ddf.persist()
wait(genome_file_ddf)

In [23]:
%%time
genome_file_ddf.index.count().compute()

CPU times: user 5.93 s, sys: 770 ms, total: 6.7 s
Wall time: 40.1 s


715248240

In [25]:
%%time
genome_ddf['file_id'].nunique().compute()

CPU times: user 1.54 s, sys: 220 ms, total: 1.76 s
Wall time: 6.52 s


21267735

In [26]:
%%time
genome_file_ddf.index.nunique().compute()

CPU times: user 1.36 s, sys: 149 ms, total: 1.51 s
Wall time: 5.16 s


21267735

In [8]:
%%time

cm1_dir = "/p/vast1/iopp/recorder_app_logs/cm1/nodes-32/workflow-4/_parquet"

cm1_tmid_ddf_d = read_and_index_logs('cm1', 'tmid', log_dir=cm1_dir)
cm1_tmid_ddf = compute(*cm1_tmid_ddf_d)[-1]
cm1_tmid_ddf

CPU times: user 675 ms, sys: 444 ms, total: 1.12 s
Wall time: 5.45 s


Unnamed: 0_level_0,index,proc,rank,thread_id,cat,io_cat,tstart,tend,func_id,level,hostname,app,filename,size,acc_pat,bandwidth,duration,file_id,proc_id
npartitions=1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
4967534,int64,int64,int32,int32,int32,int32,float32,float32,object,int32,object,object,object,int64,int32,float32,float32,int64,int64
6683055727,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [9]:
%%time
cm1_file_ddf_d = read_and_index_logs('cm1', 'file_id', log_dir=cm1_dir)
cm1_file_ddf = compute(*cm1_file_ddf_d)[-1]
cm1_file_ddf

CPU times: user 135 ms, sys: 12.4 ms, total: 147 ms
Wall time: 2.05 s


Unnamed: 0_level_0,index,proc,rank,thread_id,cat,io_cat,tstart,tend,func_id,level,hostname,app,filename,size,acc_pat,bandwidth,duration,tmid,proc_id
npartitions=1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
901974072163427748,int64,int64,int32,int32,int32,int32,float32,float32,object,int32,object,object,object,int64,int32,float32,float32,int64,int64
4531734248727221027,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [10]:
%%time

cm1_file_agg_ddf = cm1_file_ddf \
    .groupby([cm1_file_ddf.index.name, 'proc_id', 'io_cat']) \
    .agg({'duration': sum, 'size': sum}) \
    .compute() \
    .groupby([cm1_file_ddf.index.name, 'io_cat']) \
    .agg({'duration': max, 'size': sum})

cm1_file_agg_ddf

CPU times: user 65.1 ms, sys: 0 ns, total: 65.1 ms
Wall time: 112 ms


Unnamed: 0_level_0,Unnamed: 1_level_0,duration,size
file_id,io_cat,Unnamed: 2_level_1,Unnamed: 3_level_1
901974072163427748,3,0.000004,0
4531734244435428412,2,0.000067,47144
4531734244435428412,3,0.000648,0
4531734244436508198,2,0.000078,49340
4531734244436508198,3,0.000881,0
...,...,...,...
4531734248720417485,3,0.001617,0
4531734248722472348,2,0.000101,49340
4531734248722472348,3,0.000823,0
4531734248727221027,2,0.000110,49340


In [11]:
cm1_file_agg_ddf.groupby(level=0).sum().sort_values('duration', ascending=False)

Unnamed: 0_level_0,duration,size
file_id,Unnamed: 1_level_1,Unnamed: 2_level_1
4531734245058167199,2.220052,1996330
4531734247156679037,2.198721,44564480
4531734244909082846,0.325141,49340
4531734247515089012,0.246595,44564480
4531734246187791667,0.236232,46923776
...,...,...
4531734247840317221,0.000297,47144
4531734245613589701,0.000292,47144
4531734247359713448,0.000291,47144
4531734245522122172,0.000288,47144


tornado.application - ERROR - Exception in callback functools.partial(<function TCPServer._handle_connection.<locals>.<lambda> at 0x2000360aae18>, <Task finished coro=<BaseTCPListener._handle_stream() done, defined at /usr/WS2/iopp/projects/vani-analysis-tool/virenv-lassen/lib/python3.7/site-packages/distributed/comm/tcp.py:530> exception=ValueError('invalid operation on non-started TCPListener')>)
Traceback (most recent call last):
  File "/usr/WS2/iopp/projects/vani-analysis-tool/virenv-lassen/lib/python3.7/site-packages/tornado/ioloop.py", line 740, in _run_callback
    ret = callback()
  File "/usr/WS2/iopp/projects/vani-analysis-tool/virenv-lassen/lib/python3.7/site-packages/tornado/tcpserver.py", line 391, in <lambda>
    gen.convert_yielded(future), lambda f: f.result()
  File "/usr/WS2/iopp/projects/vani-analysis-tool/virenv-lassen/lib/python3.7/site-packages/distributed/comm/tcp.py", line 536, in _handle_stream
    logger.debug("Incoming connection from %r to %r", address, sel