### EDA(Exploratory Data Analysis) of Power Trace files
The goal of this notebook is to go through all power trace values and get the average exponent value. This will be used to increase all power values so the CNN would struggle less and unlikely run into vanishing gradient problems.

### Essential imports
Rather than using Pandas, I decided to use a simple loop over all files with a line-read function.

Will use seaborn for visualization.

In [1]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from collections import defaultdict
import os
import re

## Helper Functions

In [2]:
''' 
Function that extracts all exponent values from a SINGLE power trace file
Input:
    1) filename: string; name of power trace file
Returns:
    1) avg_exponent: float; average exponent of all power trace values
'''
def extract_exponents_from_file(file_path):
    exponents = []
    with open(file_path, 'r') as f:
        next(f)
        for line in f:
            parts = line.strip().split()
            if len(parts) < 2:
                continue  # Skip lines with fewer than 2 columns
            value_str = parts[1]
            if 'e' not in value_str:
                print(f"Skipping non-scientific value '{value_str}' in {file_path}")
                continue
            try:
                exponent = int(value_str.split('e')[1])
                exponents.append(exponent)
            except ValueError:
                print(f"Invalid exponent format in '{value_str}' from {file_path}")
                continue
    return exponents

### Data Analysis

In [3]:
# Loop through all power trace files, create dictionary of avg exponent values
cwd = os.getcwd()
print(cwd)
root_path = os.path.join(cwd, 'src')
root_src = os.path.join(root_path, 'trace_files')
print(root_src)
exp_results = defaultdict(list)
min_val = None
max_val = None

# exp_results: key = each dir, items = list of (sum(exponent), len(exponent)) tuples per file
for root, subfolders, files in os.walk(root_src):
    if files:
        print(f"Handling: {files}...")
        for file in files:
            if file.endswith(".txt"):
                file_path = os.path.join(root, file)
                temp = extract_exponents_from_file(file_path)
                if min_val is None or min_val > min(temp):
                    min_val = min(temp)
                if max_val is None or max_val < max(temp):
                    max_val = max(temp)
                exp_results[os.path.basename(root)].append((sum(temp), len(temp)))
                

c:\Users\Calvin\Desktop\cnn_multi_pixel
c:\Users\Calvin\Desktop\cnn_multi_pixel\src\trace_files
Handling: ['.DS_Store']...
Handling: ['lin_s0_0.txt', 'lin_s100_0.390625.txt', 'lin_s101_0.394531.txt', 'lin_s102_0.398438.txt', 'lin_s103_0.402344.txt', 'lin_s104_0.40625.txt', 'lin_s105_0.410156.txt', 'lin_s106_0.414062.txt', 'lin_s107_0.417969.txt', 'lin_s108_0.421875.txt', 'lin_s109_0.425781.txt', 'lin_s10_0.0390625.txt', 'lin_s110_0.429688.txt', 'lin_s111_0.433594.txt', 'lin_s112_0.4375.txt', 'lin_s113_0.441406.txt', 'lin_s114_0.445312.txt', 'lin_s115_0.449219.txt', 'lin_s116_0.453125.txt', 'lin_s117_0.457031.txt', 'lin_s118_0.460938.txt', 'lin_s119_0.464844.txt', 'lin_s11_0.0429688.txt', 'lin_s120_0.46875.txt', 'lin_s121_0.472656.txt', 'lin_s122_0.476562.txt', 'lin_s123_0.480469.txt', 'lin_s124_0.484375.txt', 'lin_s125_0.488281.txt', 'lin_s126_0.492188.txt', 'lin_s127_0.496094.txt', 'lin_s128_0.5.txt', 'lin_s129_0.503906.txt', 'lin_s12_0.046875.txt', 'lin_s130_0.507812.txt', 'lin_s131_

In [4]:
# key = dir, val = list of its avg exponent values
tot_exp_val = 0
tot_exp_len = 0
avg_results = {}
for k in exp_results.keys():
    dir_exp_val = 0
    dir_exp_len = 0
    for vals in exp_results[k]:
        dir_exp_val += vals[0]
        dir_exp_len += vals[1]
    avg_results[k] = np.float64(dir_exp_val / dir_exp_len)
    tot_exp_val += dir_exp_val
    tot_exp_len += dir_exp_len

for k, v in avg_results.items():
    print(f"Folder \"{k}\": {v}")
print(f"FINAL VALUE: {int(tot_exp_val/tot_exp_len)}")
print(f"SMALLEST VALUE: {min_val}")
print(f"LARGEST VALUE: {max_val}")

Folder "analog_1_tt_x": -3.0610274082484663
Folder "analog_5px_tt_pm": -3.0102740113382107
Folder "analog_5px_tt_px": -3.0102739541751897
Folder "analog_5px_tt_xm": -3.0102739541751897
Folder "analog_5px_tt_xx": -3.0102739541751897
Folder "analog_5_tt_x": -2.9776076407050938
Folder "digital_5px_fs_p": -6.115951530316835
FINAL VALUE: -3
SMALLEST VALUE: -10
LARGEST VALUE: 0


In [None]:
def compute_avg(lst):
    return [x[0] / x[1] if x[1] != 0 else np.nan for x in lst]

# Create avg dict
avg_dict = {}

for k in exp_results.keys():
    avg_dict[k] = compute_avg(exp_results[k])

max_len = max(len(v) for k, v in avg_dict.items())

for k in avg_dict.keys():
    avg_dict[k].extend([np.nan] * (max_len - len(avg_dict[k])))

for k, v in avg_dict.items():
    # Create a DataFrame for each file's data
    df = pd.DataFrame({
        'Index': list(range(max_len)),
        f"Folder_{k}": v
    })

    # Melt the DataFrame for easier plotting with seaborn
    df_melted = df.melt(id_vars=["Index"], value_vars=[f"Folder_{k}"],
                        var_name="Folder", value_name="Ratio")

    # Create the barplot
    plt.figure(figsize=(16, 12))
    sns.barplot(x="Index", y="Ratio", hue="Folder", data=df_melted)
    plt.xticks(ticks=range(0, max_len, 10), rotation=90)
    plt.title(f'Comparison of Average Exponent Value for {k}')
    plt.xlabel('File Index')
    plt.ylabel('Average Exponent')
    plt.xticks(rotation=90)
    plt.legend(title='File')
    plt.show()

ValueError: Could not interpret value `Folder` for `hue`. An entry with this name does not appear in `data`.

<Figure size 1600x1200 with 0 Axes>