In [9]:
from backend_manager import BackendManager

In [10]:
import os
import pandas as pd
import glob


class DataCollector:
    def __init__(self, local_data_dir, backend_data_dir=None, backend_manager=None):
        self.local_data_dir = local_data_dir
        self.backend_manager = backend_manager
        self.backend_data_dir = backend_data_dir

    def read_log(self, log_path):
        if os.path.exists(log_path):
            df = pd.read_csv(log_path, sep=';')
            return df
        return -1

    def get_table(self, table_name):
        table_path = os.path.join(self.local_data_dir, table_name)
        if os.path.exists(table_path):
            return self.read_log(table_path)
        elif self.backend_data_dir:
            backend_path = os.path.join(self.backend_data_dir, table_name)
            self.backend_manager.copy_from_backend(backend_path, self.local_data_dir)
            if os.path.exists(table_path):
                return self.read_log(table_path)

        return -1

    def get_case_table(self):
        return self.get_table("case_table.csv")

    def get_meta_table(self):
        """get event table without actual execution times."""
        return self.get_table("event_meta_table.csv")

    def get_runtimes_table(self):
        """get event table with actual execution times."""
        return self.get_table("run_times.csv")

    def get_runtimes_competing_table(self, run_id):
        return self.get_table("run_times_competing_{}.csv".format(run_id))

    def get_ranks(self):
        return self.get_table("ranks.csv")

    def get_mean_ranks(self):
        return self.get_table("mean_ranks.csv")

    def delete_competing_measurements(self):
        files = glob.glob(os.path.join(self.local_data_dir, "*_competing_*.csv"))
        for f in files:
            if os.path.exists(f):
                print("removing ", f)
                os.remove(f)
        if self.backend_manager:
            cmd = "rm -rf {arg_dir}/*_competing_*".format(arg_dir=self.backend_data_dir)
            ret = self.backend_manager.run_cmd(cmd)
            return ret
        return 0

    def delete_ranks(self):
        files = glob.glob(os.path.join(self.local_data_dir, "*ranks.csv"))
        for f in files:
            if os.path.exists(f):
                print("removing ", f)
                os.remove(f)
        if self.backend:
            cmd = "rm -rf {arg_dir}/*ranks.csv".format(arg_dir=self.backend_data_dir)
            ret = self.backend.run_cmd(cmd)
            return ret
        return 0




### Local data

In [6]:
dc_local = DataCollector("sample_data/100_100_10_100_100")

In [7]:
dc_local.get_case_table()

Unnamed: 0,case:concept:name,case:flops,case:num_kernels
0,algorithm0,600000.0,3
1,algorithm2,2400000.0,3
2,algorithm4,4200000.0,3
3,algorithm1,600000.0,3
4,algorithm5,4200000.0,3
5,algorithm3,2400000.0,3


In [8]:
dc_local.get_runtimes_table()

Unnamed: 0,case:concept:name,concept:name,concept:flops,concept:operation,concept:kernel,timestamp:start,timestamp:end
0,algorithm0,gemm_2e+05,200000.0,tmp3 = (C D),"gemm!('N', 'N', 1.0, ml2, ml3, 0.0, ml4)",1657009000.0,1657009000.0
1,algorithm0,gemm_2e+05,200000.0,tmp1 = (A B),"gemm!('N', 'N', 1.0, ml0, ml1, 0.0, ml5)",1657009000.0,1657009000.0
2,algorithm0,gemm_2e+05,200000.0,tmp6 = (tmp1 tmp3),"gemm!('N', 'N', 1.0, ml5, ml4, 0.0, ml6)",1657009000.0,1657009000.0
3,algorithm2,gemm_2e+05,200000.0,tmp1 = (A B),"gemm!('N', 'N', 1.0, ml0, ml1, 0.0, ml4)",1657009000.0,1657009000.0
4,algorithm2,gemm_2e+05,200000.0,tmp4 = (tmp1 C),"gemm!('N', 'N', 1.0, ml4, ml2, 0.0, ml5)",1657009000.0,1657009000.0
5,algorithm2,gemm_2e+06,2000000.0,tmp6 = (tmp4 D),"gemm!('N', 'N', 1.0, ml5, ml3, 0.0, ml6)",1657009000.0,1657009000.0
6,algorithm4,gemm_2e+05,200000.0,tmp2 = (B C),"gemm!('N', 'N', 1.0, ml1, ml2, 0.0, ml4)",1657009000.0,1657009000.0
7,algorithm4,gemm_2e+06,2000000.0,tmp5 = (tmp2 D),"gemm!('N', 'N', 1.0, ml4, ml3, 0.0, ml5)",1657009000.0,1657009000.0
8,algorithm4,gemm_2e+06,2000000.0,tmp6 = (A tmp5),"gemm!('N', 'N', 1.0, ml0, ml5, 0.0, ml6)",1657009000.0,1657009000.0
9,algorithm1,gemm_2e+05,200000.0,tmp1 = (A B),"gemm!('N', 'N', 1.0, ml0, ml1, 0.0, ml4)",1657009000.0,1657009000.0


### Backend data

In [13]:
bm = BackendManager(server="login18-1.hpc.itc.rwth-aachen.de", uname="as641651")
bm.connect()

In [17]:
local_dir = "sample_data/cluster/"
backend_dir = "~/PhD/performance-analyazer/Experiment2/Matrix-Chain-4/variants-linnea/saved/75_75_8_75_75/"

In [19]:
dc_backend = DataCollector(local_dir,backend_dir,bm)

In [20]:
dc_backend.get_case_table()

scp as641651@login18-1.hpc.itc.rwth-aachen.de:~/PhD/performance-analyazer/Experiment2/Matrix-Chain-4/variants-linnea/saved/75_75_8_75_75/case_table.csv sample_data/cluster/
b''


Unnamed: 0,case:concept:name,case:flops,case:num_kernels
0,algorithm0,270000.0,3
1,algorithm2,1020000.0,3
2,algorithm4,1780000.0,3
3,algorithm1,270000.0,3
4,algorithm5,1780000.0,3
5,algorithm3,1020000.0,3
