In [None]:
from matplotlib import colormaps
import matplotlib.pyplot as plt
from typing import Optional, List, Tuple, Dict
import pandas as pd
import os
import re
import json
import subprocess
import numpy as np
from scipy import stats
from collections import defaultdict, namedtuple
from sklearn.cluster import DBSCAN
import math as ma

#### Global variables

In [None]:
TRACE_CMD_CACHE_FILENAME = "trace_cmd_runtimes"
RESULTS_DIR_PATH = "../results"
ABSOLUTE_RESULTS_DIR_PATH = "/home/cgachod/analysis/results"

WORKING_DIR = "/root/tests"
PERF_SCRIPT_RESULTS_FILEPATH = f"{WORKING_DIR}/.perf_mem_results.log"

NODE_1_PHYS_ADDR_START = 0x1840000000

DAHU_NODE_0_CPUID = [0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62]
DAHU_NODE_1_CPUID = [1,3,5,7,9,11,13,15,17,19,21,23,25,27,29,31,33,35,37,39,41,43,45,47,49,51,53,55,57,59,61,63]
dahu_cpu_nodes = [1 if cpuid in DAHU_NODE_1_CPUID else 0 for cpuid in range(64)]

def in_working_dir(path: str) :
    return os.path.join(WORKING_DIR, path)

def get_result_dir_path(result_dir_name):
    return os.path.join(RESULTS_DIR_PATH, result_dir_name)

def get_absolute_result_dir_path(result_dir_name) :
    return os.path.join(ABSOLUTE_RESULTS_DIR_PATH, result_dir_name)

#### Visualization params

In [None]:
nb_enabled_color = "tab:orange"
nb_disabled_color = "tab:blue"

figure_height = 10 # 10
min_figure_width = 40
figure_width_coeff = 0.3

line_width = 1.0

Idea is to represent where each core accesses memory
- Sort cores by node
- For each core, have a timeline or maybe just a percentage of time where it accesses which node
- Maybe we can have 2 histograms, one up which is local node and 1 down which is remote 

In [None]:
# So for that we only need cpu and memory address and time
# We assume we have the perf data file

# 1: pid, 2: tid, 3: cpuid, 4: time, 5: period, 6: event , 7: virtual address
# basic_info_regex_str = r"^ *[\w\/\-\.\: ]+ +(\d+) \[(\d+)\] (\d+\.\d+): +([\w\/\-\.]+).*?: +([0-9a-f]+)"
basic_info_regex_str = r"^ *[\w\/\-\.\:]+ +(\d+)\/(\d+) +\[(\d+)\] +(\d+\.\d+): +(\d+) +([\w\/\-\.]+).*?: +([0-9a-f]+)"

# 8: memory access type, 9: TLB access type
data_src_regex_str = r"[0-9a-f]+ \|OP (?:LOAD|STORE)\|([^\|]+)\|[^\|]+\|(TLB [^\|]+)\|[^\|]+\|[a-zA-Z\/\- ]+"
phys_addr_regex_str = r"([0-9a-f]+)"
# data_src_regex = re.compile()

line_regex = re.compile(basic_info_regex_str + r"\s+" + data_src_regex_str + phys_addr_regex_str)

def generate_perf_mem_log(file_path: str) -> dict :
    command_str = f"perf script -i {file_path} -c cg.C.x -F 'comm,pid,tid,cpu,time,period,event,addr,data_src,phys_addr' > {PERF_SCRIPT_RESULTS_FILEPATH}"
    print(command_str)
    result = subprocess.run(
        command_str,
        shell=True,
        stdout = subprocess.PIPE,
        universal_newlines = True
    )
    
# perf script -L -F 'comm,pid,tid,cpu,time,addr,event,ip,phys_addr,data_src,period' --reltime --time 1.0,2.0

In [None]:
resgex = line_regex.match("          cg.C.x 10674/10674 [000]  2789.844649:      26661         cpu/mem-stores/P: ffffe9b88d9bfcd0      1e05080144 |OP STORE|LVL L1 or N/A hit|SNP N/A|TLB N/A|LCK N/A|BLK  N/A                                0")
print(resgex.group(6)[4:14] == "mem-stores")

In [None]:
# generate_perf_mem_log(in_working_dir("perf-mem-sequential.data"))

In [None]:
testval = ([], [])
testval[0].append(9)

print(testval)

In [None]:
def build_memory_accesses_dfs(filepath: str) -> pd.DataFrame :
    # loads, stores
    timestamp = ([], []) 
    cpuid = ([], [])
    virtual_addr = ([], [])
    physical_addr = ([], [])
    cache_result = ([], [])
    period = ([], [])

    # filename = "assets/perf_mem_sample.log"
    # filename = "assets/.perf_mem_results.log"
    with open(filepath) as f :
        for line in f :
            matched = line_regex.match(line)
            if matched :
                load_store_idx = int(matched[6][4:14] == "mem-stores")
                timestamp[load_store_idx].append(float(matched[4]))
                cpuid[load_store_idx].append(int(matched[3]))
                cache_result[load_store_idx].append(matched[8])
                virtual_addr[load_store_idx].append(int(matched[7], base=16))
                physical_addr[load_store_idx].append(int(matched[10], base=16))
                period[load_store_idx].append(int(matched[5]))
                pass
            else :
                print("Not matched line : ", line)
                
    dahu_cpu_nodes_map = {cpuid: 1 if cpuid in DAHU_NODE_1_CPUID else 0 for cpuid in range(64)}
    
    memory_dfs = []
    for i in range(2) :
        df = pd.DataFrame({
            "time": timestamp[i], 
            "cpuid": cpuid[i],  
            "virt": virtual_addr[i], 
            "phys": physical_addr[i], 
            "cache_result": cache_result[i], 
            "period": period[i]
        })
        df['time_offset'] = df['time'].diff()
        df['cpu_node'] = df['cpuid'].map(dahu_cpu_nodes_map)
        df['memory_node'] = (df['phys'] >= NODE_1_PHYS_ADDR_START).astype(int)
        df['time'] = df['time'] - df['time'].min()
        memory_dfs.append(df.loc[df['phys'] != 0])
        
                
    # # memory_dfs = [
    # #     ),
    # #     pd.DataFrame(
    # #     {"time": timestamp, "cpuid": cpuid,  "virt": virtual_addr, "phys": physical_addr , "cache_result": cache_result, "period": period})
    # # ]

    # # print(NODE_1_PHYS_ADDR_START)
    # memory_df = 

    # memory_loads_df = 0
    # memory_stores_df =     

    # memory_df['cpu_node'] = memory_df['cpuid'].map(dahu_cpu_nodes_map)
    # memory_df['memory_node'] = (memory_df['phys'] >= NODE_1_PHYS_ADDR_START).astype(int)
    # memory_df['time'] = memory_df['time'] - memory_df['time'].min()
    # memory_df['time_offset'] = memory_df['time'].diff()
    # return memory_df.loc[memory_df['phys'] != 0]

In [None]:
def build_memory_accesses_dfs(filepath: str) -> Tuple[pd.DataFrame, pd.DataFrame] :
    # loads, stores
    is_store = []
    timestamp = []
    cpuid = []
    virtual_addr = []
    physical_addr = []
    cache_result = []
    period = []

    # filename = "assets/perf_mem_sample.log"
    # filename = "assets/.perf_mem_results.log"
    with open(filepath) as f :
        for line in f :
            matched = line_regex.match(line)
            if matched :
                is_store.append(int(matched[6][4:14] == "mem-stores"))
                timestamp.append(float(matched[4]))
                cpuid.append(int(matched[3]))
                cache_result.append(matched[8])
                virtual_addr.append(int(matched[7], base=16))
                physical_addr.append(int(matched[10], base=16))
                period.append(int(matched[5]))
                pass
            else :
                print("Not matched line : ", line)
                
    dahu_cpu_nodes_map = {cpuid: 1 if cpuid in DAHU_NODE_1_CPUID else 0 for cpuid in range(64)}
    
    accesses_df = pd.DataFrame({
        "time": timestamp, 
        "cpuid": cpuid,  
        "virt": virtual_addr, 
        "phys": physical_addr, 
        "cache_result": cache_result, 
        "period": period,
        "is_store": is_store
    })
    accesses_df['time'] = accesses_df['time'] - accesses_df['time'].min()
    # accesses_df['time_offset'] = accesses_df['time'].diff()
    accesses_df['cpu_node'] = accesses_df['cpuid'].map(dahu_cpu_nodes_map)
    accesses_df['memory_node'] = (accesses_df['phys'] >= NODE_1_PHYS_ADDR_START).astype(int)
    accesses_df = accesses_df.loc[accesses_df['phys'] != 0]
    
    loads_df = accesses_df.loc[accesses_df["is_store"] == 0]
    loads_df['time_offset'] = loads_df['time'].diff()
    
    stores_df = accesses_df.loc[accesses_df["is_store"] == 1]
    stores_df['time_offset'] = stores_df['time'].diff()
    
    return loads_df.iloc[1:], stores_df.iloc[1:]

In [None]:
# 6 664 894 lines
loads_df, stores_df = build_memory_accesses_dfs(in_working_dir("perf_mem_sample.log"))

In [None]:
loads_df

In [None]:
# stores_df.loc[stores_df['cache_result'] == "LVL L1 or N/A hit"]

In [None]:
# plt.cm.Spectral_r([0,1])

# for colorname in plt.colormaps() :
#     display(plt.cm.get_cmap(colorname))
# colormaps["turbo"]([100, 128, 196, 70])
# plt.cm.get_cmap("OrRd")

# colormaps["OrRd"]


In [None]:
def get_cache_results_stats(input_df: pd.DataFrame) :
    period_mean = []
    time_offset_mean = []
    cache_results_counts = input_df['cache_result'].value_counts()
    for key, value in cache_results_counts.items() :
        period_mean.append(input_df.loc[input_df['cache_result'] == key]['period'].mean())
        time_offset_mean.append(input_df.loc[input_df['cache_result'] == key]['time_offset'].mean())
    result_df = cache_results_counts.to_frame()
    result_df['period_mean'] = period_mean
    result_df['time_offset_mean'] = time_offset_mean
    return result_df.sort_values('period_mean', ascending=False)


def plot_cache_results(input_df: pd.DataFrame) :
    period_per_cr = {}
    time_offset_per_cr = {}
    cache_results_counts = input_df['cache_result'].value_counts()
    for key, value in cache_results_counts.items() :
        period_per_cr[key] = input_df.loc[input_df['cache_result'] == key]['period']
        time_offset_per_cr[key] = input_df.loc[input_df['cache_result'] == key]['time_offset']
        
    # Period values
    plt.boxplot(period_per_cr.values(), labels=period_per_cr.keys())
    plt.xticks(rotation=20, ha='right')
    plt.show()
    
    # Time offsets values
    plt.boxplot(time_offset_per_cr.values(), labels=time_offset_per_cr.keys(), showfliers=False, showmeans=True)
    plt.xticks(rotation=20, ha='right')
    # plt.gcf().set_size_inches(12, 50)
    plt.show()
    
    ratio = {key: period_per_cr[key] / time_offset_per_cr[key] for key in period_per_cr.keys()}
    plt.boxplot(ratio.values(), labels=period_per_cr.keys(), showfliers=False, showmeans=True)
    plt.xticks(rotation=20, ha='right')
    # plt.gcf().set_size_inches(12, 50)
    plt.show()
    
plot_cache_results(loads_df)
    
        

In [None]:
print("Loads cache results stats")
print(get_cache_results_stats(loads_df))

print("\n\nStores cache results stats")
print(get_cache_results_stats(stores_df))

In [None]:
def scatter_accesses_for_cpu(accesses_df: pd.DataFrame, cpuid = -1, min_phys = None max_phys = None) :
    cache_results_df = get_cache_results_stats(accesses_df)
    color_map = colormaps["turbo"]
    cache_results_colors = {cache_res_val: color_map(25 * i) for i, cache_res_val in enumerate(period_df.index)}

In [None]:


# remote_memory_df['cache_result'].value_counts().items() # I want to have value counts and average period for each
period_df = get_cache_results_stats(loads_df)
period_df = period_df.sort_values('period_mean', ascending=False)

print(period_df)

color_map = colormaps["turbo"]
cache_results_colors = {cache_res_val: color_map(25 * i) for i, cache_res_val in enumerate(period_df.index)}


In [None]:
def get_clusters_1D(values: np.array, eps: int, min_size: int) -> Tuple[list, list] :
    sorted_values = np.sort(values)
    prev_val = np.concatenate(([sorted_values[0]],sorted_values[:-1])) # n - 1
    virtual_addresses_minus_diff = np.absolute(sorted_values - prev_val) # addr[i] - addr[i - 1]
    
    indices = np.where(virtual_addresses_minus_diff > eps)[0]
    # print(range)
    
    clusters_bounds = [] # (min, max excluded)
    clusters_lengths = []
    if indices[0] >= min_size :
        clusters_bounds.append( (0, sorted_values[indices[0]]) )
        clusters_lengths.append(indices[0])
    for i in range(len(indices) - 1) :
        cluster_len = indices[i + 1] - indices[i]
        if cluster_len < min_size :
            continue
        clusters_bounds.append(( sorted_values[indices[i]], sorted_values[indices[i + 1]] ))
        clusters_lengths.append(cluster_len)
    
    return clusters_bounds, clusters_lengths


# def get_virtual_addrs_clusters(data: dict) -> Tuple[list, list] :
#     return get_clusters_1D(np.concatenate([np.array(cpu['virtual_addrs']) for cpu in data]), 1e7, 10)

In [None]:
get_clusters_1D(loads_df['phys'], 1e8, 100000)

In [None]:
bounds = get_clusters_1D(loads_df['phys'], 1e8, 100000)[0][3]
bounds

In [None]:
plt.plot(remote_memory_df['phys'].index, remote_memory_df['phys'].sort_values())
plt.show()



Plan :
- Have a graph of the exact phys remote addresses, maybe we can see smth there
- Otherwise scatter all the addresses but y axis only remote vs local node, and color according to type of cache hit

In [None]:
phys_addr_subset_df = memory_df.loc[(memory_df['phys'] > bounds[0]) & (memory_df['phys'] < bounds[1])]
phys_addr_subset_df

In [None]:
cpuid = 1
# subset_df = memory_df.loc[(memory_df['cache_result'] in ["LVL L1 or L1 hit", LVL Remote RAM hit])]
subset_df = phys_addr_subset_df.loc[memory_df['cpuid'] == cpuid]



plt.scatter(subset_df["time"], subset_df["phys"], s=6, alpha=0.5, c=colors[n])

subset_df

In [None]:
remote_loads_df = loads_df.loc[loads_df['cpu_node'] != loads_df['memory_node']]

In [None]:
remote_loads_df

In [None]:
# remote_memory_df

# fig, ax = plt.subplots()
# colors = 
# colors = {result: plt.cm.tab20(i) for i, result in enumerate(remote_memory_df['cache_result'].unique())}
cache_results_colors
for result, color in cache_results_colors.items():
    # print("Cache result :", result, "color : ")
    subset = remote_loads_df.loc[remote_loads_df['cache_result'] == result]
    # print(subset)
    plt.scatter(subset['time'], subset['phys'], label=result, color=color, s=6, alpha=0.5)
    # break

# fig = plt.gcf()
# if width is None :
#     width = 28
# if height is None :
#     height = 12
plt.gcf().set_size_inches(120, 60)
plt.grid(axis="y", which="both")
plt.grid(axis="x", which="major")
plt.xlabel('Time')
plt.ylabel('Phys')
plt.title('Scatter Plot of Time vs Phys with Cache Results Colored')
plt.legend(title='Cache Result')
plt.show()