In [1]:
import pandas as pd
import os
from functools import reduce

In [3]:
idle_directory = "../data/combined data files/idle/"
attack_directory = "../data/combined data files/spectre/"

idle_files = [file for file in os.listdir(idle_directory) if file.endswith('.csv')]
attack_files = [file for file in os.listdir(attack_directory) if file.endswith('.csv')]


In [23]:
DEFAULT = ['Branch Instructions', 'Branch Misses', 'Cache References', 'Cache Misses']
BATCH1 = ['L1 Data Cache Loads', 'L1 Data Cache Load Misses',
          'Executed Branch Instructions', 'Executed Conditional Branches']
BATCH2 = ['L2 All Demand Data Reads', 'L2 Demand Data Read Hits',
          'Retired Branch Instructions', 'Retired Conditional Branches']
BATCH3 = ['Offcore Demand Data Reads LLC Miss to DRAM',
          'Offcore Demand Data Reads LLC Hit Any Response',
          'Offcore All Data Reads LLC Any Response',
          'Offcore All Data Reads LLC Miss to DRAM']
BATCH4 = ['Retired Branch Mispredictions', 'Retired Near-Taken Branch Mispredictions',
          'Executed Branch Mispredictions', 'Executed Conditional Branch Mispredictions']

fileName = idle_directory + idle_files[0]
df = pd.read_csv(fileName)

def checkedBatch(batch, droppedBatches):
    default_zero = (batch[DEFAULT] == 0).all(axis=0).sum()
    batch1_zero = (batch[BATCH1] == 0).all(axis=0).sum()
    batch2_zero = (batch[BATCH2] == 0).all(axis=0).sum()
    batch3_zero = (batch[BATCH3] == 0).all(axis=0).sum()
    batch4_zero = (batch[BATCH4] == 0).all(axis=0).sum()

    if default_zero == 4:
        droppedBatches[0] += 1
        return False
    elif batch1_zero  == 4:
        droppedBatches[1] += 1
        return False
    elif batch2_zero  == 4:
        droppedBatches[2] += 1
        return False
    elif batch3_zero == 4:
        droppedBatches[3] += 1
        return False
    elif batch4_zero == 4:
        droppedBatches[4] += 1
        return False
    return True


# Initialize droppedBatches counter
droppedBatches = [0, 0, 0, 0, 0]

# Iterate through DataFrame in chunks of 5 rows
for i in range(100):
    batch = df[i * 5:(i + 1) * 5]
    checkedBatch(batch, droppedBatches)

# Print results
print(droppedBatches)


[25, 0, 19, 0, 22]


In [27]:
import sys
import pandas as pd

# Function to generate a progress bar
def progress_bar(current, total, bar_length=50):
    progress = current / total
    bar = '=' * int(progress * bar_length) + '-' * (bar_length - int(progress * bar_length))
    sys.stdout.write(f'\r[{bar}] {progress * 100:.2f}%')
    sys.stdout.flush()

# Function to combine and save rows in batches of specified size
def combine_in_batches(files, isIdle, output_file, batch_size):
    droppedBatches = [0, 0, 0, 0, 0]  # Keeps track of dropped batches for each category
    totalBatches = 0  # Keeps track of the total batches

    batch_data = []  # To store valid batches

    total_files = len(files)  # Total number of files to process
    print("Processing Files:")
    
    for idx, file in enumerate(files):
        # Show progress bar
        progress_bar(idx + 1, total_files)

        if isIdle:
            fileName = idle_directory + file
        else:
            fileName = attack_directory + file

        df = pd.read_csv(fileName)  # Read the CSV file
        for i in range(0, len(df), batch_size):
            batch = df.iloc[i:i + batch_size]  # Get a batch of rows
            if len(batch) == batch_size:  # Only include full batches
                totalBatches += 1  # Increment total batches
                if checkedBatch(batch, droppedBatches):  # Check if the batch is valid
                    batch_data.append(batch)

    # Concatenate all valid full batches
    combined_df = pd.concat(batch_data, ignore_index=True)
    combined_df.to_csv(output_file, index=False)  # Save to output file

    # Analysis: Print total and dropped batch details
    print("\nAnalysis of Batches:")
    print(f"Total Batches Processed: {totalBatches}")
    print(f"Dropped Batches by Category:")
    print(f"  DEFAULT: {droppedBatches[0]}")
    print(f"  BATCH1: {droppedBatches[1]}")
    print(f"  BATCH2: {droppedBatches[2]}")
    print(f"  BATCH3: {droppedBatches[3]}")
    print(f"  BATCH4: {droppedBatches[4]}")
    print(f"Valid Batches Included: {totalBatches - sum(droppedBatches)}")

    return combined_df


In [28]:
batch_size = 5

# Combine idle files with a batch size of 5 and save
idle_combined_df = combine_in_batches(idle_files, True, "../data/combined data files/combined_idle_batches.csv", batch_size)
print("Idle files combined into '../data/combined data files/combined_idle_batches.csv'")

# Combine attack files with a batch size of 5 and save
attack_combined_df = combine_in_batches(attack_files, False,"../data/combined data files/combined_attack_batches.csv", batch_size)
print("Attack files combined into '../data/combined data files/combined_attack_batches.csv'")

Processing Files:
Analysis of Batches:
Total Batches Processed: 48534
Dropped Batches by Category:
  DEFAULT: 15809
  BATCH1: 134
  BATCH2: 10463
  BATCH3: 0
  BATCH4: 7174
Valid Batches Included: 14954
Idle files combined into '../data/combined data files/combined_idle_batches.csv'
Processing Files:
Analysis of Batches:
Total Batches Processed: 52031
Dropped Batches by Category:
  DEFAULT: 12678
  BATCH1: 1011
  BATCH2: 14110
  BATCH3: 678
  BATCH4: 5409
Valid Batches Included: 18145
Attack files combined into '../data/combined data files/combined_attack_batches.csv'


In [29]:
# Print the sizes of the DataFrames
print(f"Size of idle_combined_df: {idle_combined_df.shape}")
print(f"Size of attack_combined_df: {attack_combined_df.shape}")


Size of idle_combined_df: (74770, 21)
Size of attack_combined_df: (90725, 21)


In [30]:
import pandas as pd
import numpy as np
import csv
import sys

# Function to generate a progress bar
def progress_bar(current, total, bar_length=50):
    progress = current / total
    bar = '=' * int(progress * bar_length) + '-' * (bar_length - int(progress * bar_length))
    sys.stdout.write(f'\r[{bar}] {progress * 100:.2f}%')
    sys.stdout.flush()

# Function to write shuffled batches directly to CSV
def write_shuffled_batches_to_csv(idle_df, attack_df, batch_size, output_file):
    # Calculate the maximum number of batches that can be drawn from each DataFrame
    max_batches_idle = len(idle_df) // batch_size
    max_batches_attack = len(attack_df) // batch_size
    total_batches = min(max_batches_idle, max_batches_attack) * 2  # Equal distribution
    
    # Initialize batch counters
    idle_start, attack_start = 0, 0
    batches = []
    
    print("\nCreating batches...")
    for i in range(total_batches // 2):
        # Create one batch from idle data
        idle_batch = idle_df.iloc[idle_start:idle_start + batch_size].copy()
        idle_batch['Label'] = 0  # Label for idle
        batches.append(idle_batch)
        idle_start += batch_size
        
        # Create one batch from attack data
        attack_batch = attack_df.iloc[attack_start:attack_start + batch_size].copy()
        attack_batch['Label'] = 1  # Label for attack
        batches.append(attack_batch)
        attack_start += batch_size
        
        # Update progress bar for batch creation
        progress_bar(i + 1, total_batches // 2)
    
    # Shuffle the batches
    print("\n\nShuffling batches...")
    np.random.shuffle(batches)
    
    # Write the shuffled batches to CSV
    print("\nWriting batches to CSV...")
    with open(output_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        
        # Write the header row
        writer.writerow(list(idle_df.columns) + ['Label'])
        
        for i, batch in enumerate(batches):
            writer.writerows(batch.values)
            
            # Update progress bar for writing batches
            progress_bar(i + 1, len(batches))
    
    print("\nShuffled data successfully written to CSV.")

# Specify batch size and output file
batch_size = 5
output_file = "../data/combined data files/shuffled_master_data.csv"

# Write shuffled batches directly to CSV
write_shuffled_batches_to_csv(idle_combined_df, attack_combined_df, batch_size, output_file)



Creating batches...

Shuffling batches...

Writing batches to CSV...
[====----------------------------------------------] 8.10%

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)





IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



Shuffled data successfully written to CSV.


In [40]:
idle_directory_bm = "../data/combined data files/idle/benchmark data runs/"
attack_directory_bm = "../data/combined data files/spectre/benchmark data runs/"
batch_size = 5

In [44]:
idle = "1. idle/"
idle_idle_files = [file for file in os.listdir(idle_directory_bm+idle) if file.endswith('.csv')]
attack_idle_files = [file for file in os.listdir(attack_directory_bm+idle) if file.endswith('.csv')]

idle_directory = idle_directory_bm+idle
attack_directory = attack_directory_bm+idle
print("starting for idle mode:")
# Combine idle files with a batch size of 5 and save
idle_combined_df = combine_in_batches(idle_idle_files, True, "../data/combined data files/bm/combined_idle_batches.csv", batch_size)
print("Idle files combined into '../data/combined data files/bm/combined_idle_batches.csv'")

# Combine attack files with a batch size of 5 and save
attack_combined_df = combine_in_batches(attack_idle_files, False,"../data/combined data files/bm/combined_attack_batches.csv", batch_size)
print("Attack files combined into '../data/combined data files/bm/combined_attack_batches.csv'")

output_file = "../data/combined data files/bm/idle_shuffled_master_data.csv"

# Write shuffled batches directly to CSV
write_shuffled_batches_to_csv(idle_combined_df, attack_combined_df, batch_size, output_file)

starting for idle mode:
Processing Files:
Analysis of Batches:
Total Batches Processed: 3000
Dropped Batches by Category:
  DEFAULT: 973
  BATCH1: 0
  BATCH2: 575
  BATCH3: 0
  BATCH4: 392
Valid Batches Included: 1060
Idle files combined into '../data/combined data files/bm/combined_idle_batches.csv'
Processing Files:
Analysis of Batches:
Total Batches Processed: 2842
Dropped Batches by Category:
  DEFAULT: 414
  BATCH1: 70
  BATCH2: 555
  BATCH3: 156
  BATCH4: 293
Valid Batches Included: 1354
Attack files combined into '../data/combined data files/bm/combined_attack_batches.csv'

Creating batches...

Shuffling batches...

Writing batches to CSV...
Shuffled data successfully written to CSV.


In [46]:
mode = "2. cpu/"
idle_mode_files = [file for file in os.listdir(idle_directory_bm+mode) if file.endswith('.csv')]
attack_mode_files = [file for file in os.listdir(attack_directory_bm+mode) if file.endswith('.csv')]

idle_directory = idle_directory_bm+mode
attack_directory = attack_directory_bm+mode
print("starting for cpu mode:")
# Combine idle files with a batch size of 5 and save
idle_combined_df = combine_in_batches(idle_mode_files, True, "../data/combined data files/bm/combined_cpu_idle_batches.csv", batch_size)
print("Idle files combined into '../data/combined data files/bm/combined_cpu_idle_batches.csv'")

# Combine attack files with a batch size of 5 and save
attack_combined_df = combine_in_batches(attack_mode_files, False,"../data/combined data files/bm/combined_cpu_attack_batches.csv", batch_size)
print("Attack files combined into '../data/combined data files/bm/combined_cpu_attack_batches.csv'")

output_file = "../data/combined data files/bm/cpu_shuffled_master_data.csv"

# Write shuffled batches directly to CSV
write_shuffled_batches_to_csv(idle_combined_df, attack_combined_df, batch_size, output_file)

starting for cpu mode:
Processing Files:
Analysis of Batches:
Total Batches Processed: 3005
Dropped Batches by Category:
  DEFAULT: 0
  BATCH1: 0
  BATCH2: 0
  BATCH3: 0
  BATCH4: 0
Valid Batches Included: 3005
Idle files combined into '../data/combined data files/bm/combined_cpu_idle_batches.csv'
Processing Files:
Analysis of Batches:
Total Batches Processed: 3205
Dropped Batches by Category:
  DEFAULT: 1032
  BATCH1: 1113
  BATCH2: 0
  BATCH3: 1
  BATCH4: 0
Valid Batches Included: 1059
Attack files combined into '../data/combined data files/bm/combined_cpu_attack_batches.csv'

Creating batches...

Shuffling batches...

Writing batches to CSV...

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [47]:
mode = "3. memory/"
idle_mode_files = [file for file in os.listdir(idle_directory_bm+mode) if file.endswith('.csv')]
attack_mode_files = [file for file in os.listdir(attack_directory_bm+mode) if file.endswith('.csv')]

idle_directory = idle_directory_bm+mode
attack_directory = attack_directory_bm+mode
print("starting for memory mode:")
# Combine idle files with a batch size of 5 and save
idle_combined_df = combine_in_batches(idle_mode_files, True, "../data/combined data files/bm/combined_memory_idle_batches.csv", batch_size)
print("Idle files combined into '../data/combined data files/bm/combined_memory_idle_batches.csv'")

# Combine attack files with a batch size of 5 and save
attack_combined_df = combine_in_batches(attack_mode_files, False,"../data/combined data files/bm/combined_memory_attack_batches.csv", batch_size)
print("Attack files combined into '../data/combined data files/bm/combined_memory_attack_batches.csv'")

output_file = "../data/combined data files/bm/memory_shuffled_master_data.csv"

# Write shuffled batches directly to CSV
write_shuffled_batches_to_csv(idle_combined_df, attack_combined_df, batch_size, output_file)

starting for memory mode:
Processing Files:
Analysis of Batches:
Total Batches Processed: 2530
Dropped Batches by Category:
  DEFAULT: 1
  BATCH1: 1
  BATCH2: 0
  BATCH3: 0
  BATCH4: 0
Valid Batches Included: 2528
Idle files combined into '../data/combined data files/bm/combined_memory_idle_batches.csv'
Processing Files:
Analysis of Batches:
Total Batches Processed: 2309
Dropped Batches by Category:
  DEFAULT: 223
  BATCH1: 198
  BATCH2: 11
  BATCH3: 137
  BATCH4: 0
Valid Batches Included: 1740
Attack files combined into '../data/combined data files/bm/combined_memory_attack_batches.csv'

Creating batches...

Shuffling batches...

Writing batches to CSV...

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [48]:
mode = "4. cpu +memory/"
idle_mode_files = [file for file in os.listdir(idle_directory_bm+mode) if file.endswith('.csv')]
attack_mode_files = [file for file in os.listdir(attack_directory_bm+mode) if file.endswith('.csv')]

idle_directory = idle_directory_bm+mode
attack_directory = attack_directory_bm+mode
print("starting for combined mode:")
# Combine idle files with a batch size of 5 and save
idle_combined_df = combine_in_batches(idle_mode_files, True, "../data/combined data files/bm/combined_ccombined_idle_batches.csv", batch_size)
print("Idle files combined into '../data/combined data files/bm/combined_combined_idle_batches.csv'")

# Combine attack files with a batch size of 5 and save
attack_combined_df = combine_in_batches(attack_mode_files, False,"../data/combined data files/bm/combined_combined_attack_batches.csv", batch_size)
print("Attack files combined into '../data/combined data files/bm/combined_combined_attack_batches.csv'")

output_file = "../data/combined data files/bm/combined_shuffled_master_data.csv"

# Write shuffled batches directly to CSV
write_shuffled_batches_to_csv(idle_combined_df, attack_combined_df, batch_size, output_file)

FileNotFoundError: [WinError 3] The system cannot find the path specified: '../data/combined data files/idle/benchmark data runs/4. cpu+memory/'