In [16]:
import numpy as np
import pandas as pd
import plotly.express as px

In [17]:
def average_eight_streaks(strides):
    streak_lengths = []
    current_streak = 0

    for num in strides:
        if num == 8:
            current_streak += 1
        else:
            if current_streak > 0:
                streak_lengths.append(current_streak)
            current_streak = 0

    if current_streak > 0:
        streak_lengths.append(current_streak)

    if streak_lengths:
        average_streak = sum(streak_lengths) / len(streak_lengths)
        return average_streak
    else:
        return 0

In [18]:
def get_average_streak_length(strides):
    if not strides:
        return 0
    
    streak_lengths = []
    current_streak = 1

    for i in range(1, len(strides)):
        if strides[i] == strides[i - 1]:
            current_streak += 1
        else:
            streak_lengths.append(current_streak)
            current_streak = 1

    streak_lengths.append(current_streak)

    if streak_lengths:
        average_streak = sum(streak_lengths) / len(streak_lengths)
        return average_streak
    else:
        return 0

In [19]:
def calculate_strides(group):
    # Calculate the difference between consecutive 'vpn' values
    strides = group['vpn'].diff().dropna()
    return strides

def average_streak_length(strides):
    if strides.empty:
        return 0  # Return 0 if no streaks

    streak_lengths = []
    current_streak = 1

    for i in range(1, len(strides)):
        if strides.iloc[i] == strides.iloc[i - 1]:
            current_streak += 1
        else:
            streak_lengths.append(current_streak)
            current_streak = 1  # Reset streak

    # Append the last streak
    streak_lengths.append(current_streak)

    average_streak = sum(streak_lengths) / len(streak_lengths)
    return average_streak

def process_dataframe(df):
    # Group by 'eip' and calculate the strides for each group
    grouped = df.groupby('eip').apply(calculate_strides).reset_index(level=0, drop=True)
    
    # Calculate the average streak length for each 'eip' group
    average_streaks = grouped.groupby(df['eip']).apply(average_streak_length)
    
    return average_streaks

In [20]:
def total_strides_per_eip(df):
    total_entries = df.groupby('eip').size()
    total_strides = total_entries - 1
    return total_strides

In [21]:
def analyse_workload(name):

    print("---------------------------------------------")
    print(f"Analyzing {name} workload...")
    print("---------------------------------------------")
    
    df = pd.read_csv(name+'_full.csv')
    num_unique_eip = df['eip'].nunique()

    print(f"The number of unique 'eip' values is: {num_unique_eip}")

    my_strides = []
    my_list = df['vpn'].to_list()

    my_eips = df['eip'].to_list()

    for i in range(len(my_list)-1):
        stride = my_list[i+1] - my_list[i]
        my_strides.append(stride)

    strides = pd.Series(my_strides)

    # Stride frequency analysis
    stride_counts = strides.value_counts()
    print("Stride Frequency:")
    i = 0
    for stride, count in stride_counts.items():
        if i > 10:
            break
        print(f"Stride: {stride}, Count: {count}")
        i += 1

    # Total stride count
    print("Total Strides:", len(strides.to_list()))

    # average_streak = average_eight_streaks(my_strides)
    # print(f"The average length of consecutive 8s is: {average_streak}")

    average_streak = get_average_streak_length(my_strides)
    print(f"The average length of any streak is: {average_streak}")

    # Process the DataFrame and get average streak lengths by 'eip'
    average_streaks_by_eip = process_dataframe(df)
    total_strides_by_eip = total_strides_per_eip(df)

    total_strides_by_eip_df = pd.DataFrame({'eip': total_strides_by_eip.index, 'total_strides': total_strides_by_eip.values})
    average_streaks_by_eip_df = pd.DataFrame({'eip': average_streaks_by_eip.index, 'average_streak': average_streaks_by_eip.values})

    merged = pd.merge(total_strides_by_eip_df, average_streaks_by_eip_df, on='eip')

    sorted_df = merged.sort_values('average_streak', ascending=False)
    print("Average Streaks by 'eip':")
    print(sorted_df)
    print("")

In [22]:
analyse_workload("bc")
analyse_workload("bfs")
analyse_workload("cc")
analyse_workload("dlrm")
analyse_workload("gc")
analyse_workload("gen")
analyse_workload("pr")
analyse_workload("rnd")
analyse_workload("sssp")
analyse_workload("tc")
analyse_workload("xs")

---------------------------------------------
Analyzing bc workload...
---------------------------------------------
The number of unique 'eip' values is: 49
Stride Frequency:
Stride: 34, Count: 11461
Stride: 35, Count: 10896
Stride: 11, Count: 10781
Stride: 33, Count: 9641
Stride: 10, Count: 8702
Stride: 36, Count: 8645
Stride: 12, Count: 8522
Stride: 14, Count: 8388
Stride: 13, Count: 7759
Stride: 105, Count: 7754
Stride: 106, Count: 7667
Total Strides: 3662309
The average length of any streak is: 1.0013564473492815
Average Streaks by 'eip':
                eip  total_strides  average_streak
18  140461382775704            272      272.000000
23  140461383287890            261      261.000000
1    93907474509834           2848      189.866667
6    93907474510098            125      125.000000
8    93907474510130            113      113.000000
4    93907474509917            140       70.000000
2    93907474509887            747       57.461538
3    93907474509908            810       4