# SG Test Data Processing Tool - v2

### Import libraries

In [2]:
# region
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display
from scipy.signal import butter, filtfilt
from plotly_resampler import register_plotly_resampler, unregister_plotly_resampler, FigureWidgetResampler
from concurrent.futures import ThreadPoolExecutor, as_completed
# endregion

### Sample Data

In [3]:
# region

# Data generation
sampling_rate = 200  # 200 Hz
total_duration = 300  # Total duration in seconds
num_steps = 5  # Number of steps in the static loading test
base_noise_level = 0.1  # Base noise level
num_channels = 200  # Number of channels

# Generate time data
# region
time = np.arange(0, total_duration, 1/sampling_rate)
base_strain = np.linspace(0, 1, num=num_steps).repeat(len(time) // num_steps)

# Create a list to store channel data
channel_data = []

for channel in range(num_channels):
    noise_level = base_noise_level * (1 + 0.5 * channel)
    amplitude = 1 + 0.2 * channel
    channel_noise = np.random.normal(0, noise_level, size=base_strain.shape)
    channel_strain = amplitude * base_strain + channel_noise
    phase_shift = np.pi / 4 * channel
    channel_strain = channel_strain * np.cos(time + phase_shift)
    channel_data.append(channel_strain)
# endregion

# Create the DataFrame by concatenating all channels at once
# region
column_names = [f'Strain_Channel_{channel + 1}' for channel in range(num_channels)]
df = pd.DataFrame(data=np.column_stack(channel_data), columns=column_names)
df['Time'] = time  # Add the 'Time' column separately
df.set_index('Time', inplace=True)
# endregion

# endregion

### Select the CSV file of original test data

In [4]:
# Selecting the files through a dialog box
# region

# import sys
# from PyQt5.QtWidgets import QApplication, QFileDialog

# # Initialize the application
# app = QApplication(sys.argv)

# # File selection for raw test data of strain gauge rosettes
# file_path_measured, _ = QFileDialog().getOpenFileName(None, 'Open test data files', '', 'All Files (*);;CSV Files (*.csv)')

# # Check if a file was selected for test data
# if file_path_measured:
#     print("Selected test data:", file_path_measured)
# else:
#     print("No test data file selected. The program will quit...")
#     exit()

# endregion

### Parse the test data

In [5]:
# region

# if file_path_measured:

#     if "_raw_format" in file_path_measured:
#         data = pd.read_csv(file_path_measured)
#         data.drop([0,2,3,4,5,6], inplace=True)
#         data.columns=data.iloc[0]
#         new_columns = data.columns.tolist()
#         new_columns[1] = 'Time'
#         data.columns = new_columns
#         data.drop([1], inplace=True)
#         data.drop(data.columns[0], axis=1, inplace=True)
#         time = data['Time']
#         df = data.iloc[:, 1:].filter(regex='SG')
#         df.reset_index(drop=True, inplace=True)
#         time.reset_index(drop=True, inplace=True)
#         df
#         print("Selected test data from directory:   ", file_path_measured)
#     else:
#         print("""The input file is not read from raw format. Check whether it is in the correct directory or has the correct file name convention, with '_raw_data' suffix.
#               The program will proceed by assuming that the data is already in a clean format. """)
#         data = pd.read_csv(file_path_measured)
#         time = data['Time']
#         df = data.iloc[:, 1:].filter(regex='SG')
#         df.reset_index(drop=True, inplace=True)
#         time.reset_index(drop=True, inplace=True)
#         df

# endregion

In [6]:
df

Unnamed: 0_level_0,Strain_Channel_1,Strain_Channel_2,Strain_Channel_3,Strain_Channel_4,Strain_Channel_5,Strain_Channel_6,Strain_Channel_7,Strain_Channel_8,Strain_Channel_9,Strain_Channel_10,...,Strain_Channel_191,Strain_Channel_192,Strain_Channel_193,Strain_Channel_194,Strain_Channel_195,Strain_Channel_196,Strain_Channel_197,Strain_Channel_198,Strain_Channel_199,Strain_Channel_200
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.000,0.104005,0.033334,1.846075e-17,-0.122811,-0.185084,0.141734,3.207504e-17,0.010827,1.113707,0.111663,...,-4.953261e-15,9.143811,-5.576283,-4.887810,-6.267424e-14,4.952274,-11.387928,-0.722740,1.079706e-15,2.245582
0.005,-0.199024,-0.048024,5.048534e-04,0.700520,-0.046069,0.324836,3.886191e-04,-0.675278,-0.202467,0.501491,...,-6.338762e-02,-2.592994,8.801251,-2.281808,8.675595e-02,0.637429,3.734557,0.967423,6.870323e-03,-8.941644
0.010,0.050185,-0.129146,1.995560e-03,-0.064266,-0.459089,0.356757,-3.924300e-03,0.518102,-0.426674,0.330315,...,-1.418330e-01,4.497034,11.653415,0.387244,-1.476988e-01,-0.128383,-10.998659,-3.301525,4.163267e-02,6.062447
0.015,0.032452,-0.059556,-4.594357e-03,-0.205837,-0.638873,0.324845,-2.269135e-03,-0.258569,-0.276518,0.429373,...,7.776810e-02,2.832694,-5.575106,8.716086,2.004623e-01,1.626782,1.620064,1.574785,8.812603e-02,-4.967906
0.020,0.055381,-0.124272,-1.764467e-03,-0.158979,0.073995,0.049151,-2.304480e-03,0.053543,0.332184,-0.068499,...,-1.062914e-01,-7.393439,-14.970218,-4.998850,-7.474652e-02,9.450207,-23.487273,-4.095410,-5.712452e-02,-5.243723
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
299.975,-0.046722,0.953258,1.527144e+00,0.961825,0.066474,-1.186143,-2.243346e+00,-1.617420,-0.122305,1.322660,...,-3.697365e+01,-42.286158,-2.184351,21.472299,3.939478e+01,38.926101,1.557034,-36.058656,-4.023415e+01,-18.017363
299.980,-0.043454,0.952153,1.444062e+00,1.018931,0.073243,-1.526171,-1.657031e+00,-1.817149,-0.125581,1.556218,...,-3.886316e+01,-34.527792,-1.654403,17.969184,4.684822e+01,33.346726,1.286143,-18.294034,-3.692138e+01,-22.777640
299.985,-0.035077,0.799428,1.493957e+00,1.336357,0.069153,-1.387234,-2.362937e+00,-1.853292,-0.131207,2.079613,...,-4.511960e+01,-21.663535,-1.300357,35.398766,4.587846e+01,32.008003,1.211196,-23.289536,-3.482011e+01,-25.026290
299.990,-0.026976,0.861115,1.675017e+00,1.258414,0.055916,-1.628540,-2.556566e+00,-1.901555,-0.107593,1.827518,...,-2.393740e+01,-36.717056,-0.856485,31.051802,2.999597e+01,28.632969,1.200770,-25.883714,-3.131051e+01,-15.175025


### Plot & Modify Data

In [7]:
# region

# Convert columns to more memory-efficient types
# region
def optimize_data_types(df):
    for col in df.columns:
        if df[col].dtype == 'float64':
            df[col] = df[col].astype('float32')
        elif df[col].dtype == 'int64':
            max_val = df[col].max()
            if max_val < 2**7:
                df[col] = df[col].astype('int8')
            elif max_val < 2**15:
                df[col] = df[col].astype('int16')
            elif max_val < 2**31:
                df[col] = df[col].astype('int32')
    return df

df = optimize_data_types(df)
# endregion

# Copy the original DataFrame for plotting
# region
original_df = df.copy()
downsampled_df_copy = None
# endregion

# Function definitions of initial setup and tab widgets
# region
def find_divisors(num):
    return [i for i in range(1, num + 1) if num % i == 0]

def apply_butterworth_filter(data, cutoff, order, sampling_rate):
    nyquist = 0.5 * sampling_rate
    normal_cutoff = cutoff / nyquist
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    filtered_data = filtfilt(b, a, data)
    return filtered_data

def downsample_data(df, original_rate, new_rate):
    if new_rate >= original_rate:
        return df
    step_size = int(original_rate / new_rate)
    downsampled_df = df.iloc[::step_size, :].copy()
    downsampled_df.index = np.linspace(df.index[0], df.index[-1], len(downsampled_df))
    return downsampled_df

def apply_time_offset(df, offset):
    # Find the index closest to the specified offset time
    closest_time_point = np.abs(df.index - offset).argmin()
    offset_df = df.iloc[closest_time_point:].copy()
    offset_df.index = offset_df.index - offset_df.index[0]
    return offset_df

def export_data(b):
    global downsampled_df_copy
    new_rate = sampling_rate_dropdown.value
    apply_filter = apply_filter_checkbox.value
    apply_offset = apply_time_offset_checkbox.value
    offset_time = time_offset_input.value

    # Check if any processing is applied
    is_processing_applied = (new_rate != sampling_rate) or apply_filter or (apply_offset and offset_time != 0)

    if downsampled_df_copy is not None and is_processing_applied:
        processed_df = downsampled_df_copy.copy()

        # Apply filter if needed
        if apply_filter:
            filter_cutoff = filter_cutoff_input.value
            filter_order = filter_order_input.value
            for col in processed_df.columns:
                processed_df[col] = apply_butterworth_filter(processed_df[col], filter_cutoff, filter_order, new_rate)

        # Apply time offset if needed
        if apply_offset and offset_time != 0:
            processed_df = apply_time_offset(processed_df, offset_time)
        
        # Export to CSV
        filename = 'exported_data.csv'
        processed_df.to_csv(filename)
        print(f'Data exported as {filename}')
    else:
        print('No modified data to export.')

def find_closest_not_exceeding(target_rate, list_of_numbers):
    closest = None
    for number in list_of_numbers:
        if number <= target_rate and (closest is None or abs(target_rate - number) < abs(target_rate - closest)):
            closest = number
    return closest

def create_initial_plot(df):
    fig = go.Figure()
    for col in df.columns:
        fig.add_trace(go.Scattergl(x=df.index, y=df[col], mode='lines', name=f'{col}', visible=False))
        fig.add_trace(go.Scattergl(x=df.index, y=df[col], mode='lines', name=f'{col} Processed '))
    fig.update_layout(title='Data Plot', xaxis_title='Time (s)', yaxis_title='Values')
    return fig

def toggle_original_data(b):
    for i in range(len(original_df.columns)):
        plot_widget.data[i*2].visible = not plot_widget.data[i*2].visible
# endregion

# Interactive widgets setup
# region
# -----------------------------------------------------------
# Rendering Tab with Chunk Input
# region
use_batch_update_checkbox = widgets.Checkbox(value=True, description='Use batch_update', style={'description_width': 'initial'})
chunk_input = widgets.IntText(value=1, description='Number of Chunks:', style={'description_width': 'initial'})
progress_bar = widgets.IntProgress(value=0, min=0, max=len(df.columns), description='Processing:', bar_style='info', orientation='horizontal')
rendering_tab = widgets.VBox([use_batch_update_checkbox, chunk_input, progress_bar])
# endregion

# Downsampling Tab
# region

divisors = find_divisors(sampling_rate)

#Calculate Maximum Initial Downsampling Rate
# region 
max_points_limit = 4000000  # Maximum data points to display initially
total_points_in_data = df.size  # Maximum number of data points in dataframe
knockdown_factor_for_intial_sampling_rate = total_points_in_data / max_points_limit  # Sampling rate of the data should be divided by at least this much
sampling_rate_w_knockdown = sampling_rate / knockdown_factor_for_intial_sampling_rate
    
def find_closest_not_exceeding(target_rate, list_of_numbers):
    closest = None
    for number in list_of_numbers:
        if number <= target_rate and (closest is None or abs(target_rate - number) < abs(target_rate - closest)):
            closest = number
    return closest

new_rate = find_closest_not_exceeding(sampling_rate_w_knockdown, divisors)
# endregion

sampling_rate_dropdown = widgets.Dropdown(options=divisors, value=new_rate, description='New Rate (Hz):', style={'description_width': 'initial'})
toggle_original_data_button = widgets.Button(description='Toggle Original Data', button_style='info', tooltip='Click to show/hide original data')
downsampling_tab = widgets.VBox([sampling_rate_dropdown, toggle_original_data_button])
# endregion

# Butterworth Filter Tab
# region
filter_cutoff_input = widgets.FloatText(value=3, description='Cut-off Frequency (Hz):', style={'description_width': 'initial'})
filter_order_input = widgets.IntText(value=2, description='Filter Order:', style={'description_width': 'initial'})
apply_filter_checkbox = widgets.Checkbox(value=False, description='Apply Butterworth Filter')
butterworth_tab = widgets.VBox([filter_cutoff_input, filter_order_input, apply_filter_checkbox])
# endregion

# Time Offset Tab
# region
time_offset_input = widgets.FloatText(value=0, description='Time Offset (s):', style={'description_width': 'initial'})
apply_time_offset_checkbox = widgets.Checkbox(value=False, description='Apply Time Offset')
time_offset_tab = widgets.VBox([time_offset_input, apply_time_offset_checkbox])
# endregion

# Time Range Tab
# region
use_time_range_checkbox = widgets.Checkbox(value=False, description='Use Time Range', style={'description_width': 'initial'})
lower_time_input = widgets.FloatText(value=0, description='Lower Time (s):', style={'description_width': 'initial'})
upper_time_input = widgets.FloatText(value=0, description='Upper Time (s):', style={'description_width': 'initial'})
time_range_tab = widgets.VBox([use_time_range_checkbox, lower_time_input, upper_time_input])
# endregion

# Export Tab
# region
export_button = widgets.Button(description='Export as CSV', button_style='success', tooltip='Click to export data')
export_tab = widgets.VBox([export_button])
# endregion

# Tab widget
# region
tab = widgets.Tab(children=[rendering_tab, downsampling_tab, butterworth_tab, time_offset_tab, time_range_tab, export_tab])
tab.set_title(0, 'Rendering')
tab.set_title(1, 'Downsampling')
tab.set_title(2, 'Butterworth Filter')
tab.set_title(3, 'Time Offset')
tab.set_title(4, 'Time Range')
tab.set_title(5, 'Export Data')
# endregion
# -----------------------------------------------------------
# endregion

# Plotting setup (initial)
# region
downsampled_df_copy = downsample_data(original_df, sampling_rate, new_rate)
initial_plot = create_initial_plot(downsampled_df_copy)
plot_widget = FigureWidgetResampler(go.FigureWidget(initial_plot))
# endregion

# Observers and event handling
# region
def update_plot(change):
    global downsampled_df_copy
    new_rate = sampling_rate_dropdown.value
    apply_filter = apply_filter_checkbox.value
    filter_cutoff = filter_cutoff_input.value
    filter_order = filter_order_input.value
    apply_offset = apply_time_offset_checkbox.value
    offset_time = time_offset_input.value

    # Downsample data
    if downsampled_df_copy is None or change['owner'] == sampling_rate_dropdown:
        downsampled_df_copy = downsample_data(original_df, sampling_rate, new_rate)

    # Copy the downsampled data for processing
    processed_df = downsampled_df_copy.copy()

    # Apply Butterworth filter if needed
    if apply_filter:
        for col in processed_df.columns:
            processed_df[col] = apply_butterworth_filter(processed_df[col], filter_cutoff, filter_order, new_rate)

    # Apply time offset if needed
    if apply_offset:
        processed_df = apply_time_offset(processed_df, offset_time)

    use_time_range = use_time_range_checkbox.value
    lower_time = lower_time_input.value
    upper_time = upper_time_input.value

    # Filter data based on time range only if the checkbox is active
    if use_time_range:
        processed_df = processed_df[(processed_df.index >= lower_time) & (processed_df.index <= upper_time)]

    use_batch = use_batch_update_checkbox.value
    num_chunks = chunk_input.value
    progress_bar.max = len(original_df.columns)  # Adjust the max value of the progress bar

    if use_batch:
        # Calculate the number of columns per chunk
        columns_per_chunk = max(1, len(original_df.columns) // num_chunks)
        for chunk_start in range(0, len(original_df.columns), columns_per_chunk):
            with plot_widget.batch_update():
                chunk_end = min(chunk_start + columns_per_chunk, len(original_df.columns))
                for i in range(chunk_start, chunk_end):
                    col = original_df.columns[i]
                    plot_widget.data[i * 2 + 1].x = processed_df.index
                    plot_widget.data[i * 2 + 1].y = processed_df[col]
            progress_bar.value = chunk_end
    else:
        # Reset progress bar
        progress_bar.value = 0
        progress_bar.bar_style = 'info'

        # Update without batch_update using parallel processing
        def update_column(i, col):
            plot_widget.data[i * 2 + 1].x = processed_df.index
            plot_widget.data[i * 2 + 1].y = processed_df[col]
            return i

        with ThreadPoolExecutor() as executor:
            futures = [executor.submit(update_column, i, col) for i, col in enumerate(original_df.columns)]
            for future in as_completed(futures):
                progress_bar.value = future.result() + 1

        progress_bar.bar_style = 'success'

def on_time_range_change(change):
    if use_time_range_checkbox.value:
        update_plot(change)

sampling_rate_dropdown.observe(update_plot, names='value')
toggle_original_data_button.on_click(toggle_original_data)
apply_filter_checkbox.observe(update_plot, names='value')
filter_cutoff_input.observe(update_plot, names='value')
filter_order_input.observe(update_plot, names='value')
apply_time_offset_checkbox.observe(update_plot, names='value')
time_offset_input.observe(update_plot, names='value')
use_time_range_checkbox.observe(update_plot, names='value')
lower_time_input.observe(on_time_range_change, names='value')
upper_time_input.observe(on_time_range_change, names='value')
export_button.on_click(export_data)
# endregion

# Display the widgets and the plot
display(tab, plot_widget)

# endregion

Tab(children=(VBox(children=(Checkbox(value=True, description='Use batch_update', style=CheckboxStyle(descript…

FigureWidgetResampler({
    'data': [{'mode': 'lines',
              'name': ('<b style="color:sandybrown">[R' ... 'style="color:#fc9944">~0.3</i>'),
              'type': 'scattergl',
              'uid': 'bb6a74f8-1cf8-498c-9c3e-7f9b3321b5c5',
              'visible': False,
              'x': array([0.00000000e+00, 1.60008001e-01, 3.40017001e-01, ..., 2.99394970e+02,
                          2.99834992e+02, 2.99995000e+02]),
              'y': array([ 0.10400486, -0.08349123,  0.13740641, ..., -0.63013744, -0.23589559,
                          -0.04345419], dtype=float32)},
             {'mode': 'lines',
              'name': ('<b style="color:sandybrown">[R' ... 'style="color:#fc9944">~0.3</i>'),
              'type': 'scattergl',
              'uid': '8c585e72-b47f-4fe3-af84-25ff21adaca0',
              'x': array([0.00000000e+00, 1.60008001e-01, 3.40017001e-01, ..., 2.99394970e+02,
                          2.99834992e+02, 2.99995000e+02]),
              'y': array([ 0.10400486

In [8]:
def find_closest_not_exceeding(target, numbers):
    closest = None
    for number in numbers:
        if number <= target and (closest is None or abs(target - number) < abs(target - closest)):
            closest = number
    return closest

closest_number = find_closest_not_exceeding(sampling_rate_w_knockdown, divisors)
print("Closest number not exceeding:", closest_number)

Closest number not exceeding: 50


In [9]:
total_memory_usage =downsampled_df_copy.memory_usage(deep=True).sum()
total_memory_usage_gb = total_memory_usage / (2**20)
print(f"Total memory usage: {total_memory_usage_gb} megabytes")

Total memory usage: 11.55853271484375 megabytes
