# SG Test Data Processing Tool - v1

### Import libraries

In [1]:
# region
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display
from scipy.signal import butter, filtfilt
from plotly_resampler import register_plotly_resampler, unregister_plotly_resampler
from concurrent.futures import ThreadPoolExecutor, as_completed
# endregion

### Sample Data

In [2]:
# region

# Data generation
sampling_rate = 200  # 200 Hz
total_duration = 600  # Total duration in seconds
num_steps = 5  # Number of steps in the static loading test
base_noise_level = 0.1  # Base noise level
num_channels = 200  # Number of channels

# Generate time data
# region
time = np.arange(0, total_duration, 1/sampling_rate)
base_strain = np.linspace(0, 1, num=num_steps).repeat(len(time) // num_steps)

# Create a list to store channel data
channel_data = []

for channel in range(num_channels):
    noise_level = base_noise_level * (1 + 0.5 * channel)
    amplitude = 1 + 0.2 * channel
    channel_noise = np.random.normal(0, noise_level, size=base_strain.shape)
    channel_strain = amplitude * base_strain + channel_noise
    phase_shift = np.pi / 4 * channel
    channel_strain = channel_strain * np.cos(time + phase_shift)
    channel_data.append(channel_strain)
# endregion

# Create the DataFrame by concatenating all channels at once
# region
column_names = [f'Strain_Channel_{channel + 1}' for channel in range(num_channels)]
df = pd.DataFrame(data=np.column_stack(channel_data), columns=column_names)
df['Time'] = time  # Add the 'Time' column separately
df.set_index('Time', inplace=True)
# endregion

# endregion

### Select the CSV file of original test data

In [3]:
# Selecting the files through a dialog box
# region

# import sys
# from PyQt5.QtWidgets import QApplication, QFileDialog

# # Initialize the application
# app = QApplication(sys.argv)

# # File selection for raw test data of strain gauge rosettes
# file_path_measured, _ = QFileDialog().getOpenFileName(None, 'Open test data files', '', 'All Files (*);;CSV Files (*.csv)')

# # Check if a file was selected for test data
# if file_path_measured:
#     print("Selected test data:", file_path_measured)
# else:
#     print("No test data file selected. The program will quit...")
#     exit()

# endregion

### Parse the test data

In [4]:
# region

# if file_path_measured:

#     if "_raw_format" in file_path_measured:
#         data = pd.read_csv(file_path_measured)
#         data.drop([0,2,3,4,5,6], inplace=True)
#         data.columns=data.iloc[0]
#         new_columns = data.columns.tolist()
#         new_columns[1] = 'Time'
#         data.columns = new_columns
#         data.drop([1], inplace=True)
#         data.drop(data.columns[0], axis=1, inplace=True)
#         time = data['Time']
#         df = data.iloc[:, 1:].filter(regex='SG')
#         df.reset_index(drop=True, inplace=True)
#         time.reset_index(drop=True, inplace=True)
#         df
#         print("Selected test data from directory:   ", file_path_measured)
#     else:
#         print("""The input file is not read from raw format. Check whether it is in the correct directory or has the correct file name convention, with '_raw_data' suffix.
#               The program will proceed by assuming that the data is already in a clean format. """)
#         data = pd.read_csv(file_path_measured)
#         time = data['Time']
#         df = data.iloc[:, 1:].filter(regex='SG')
#         df.reset_index(drop=True, inplace=True)
#         time.reset_index(drop=True, inplace=True)
#         df

# endregion

In [5]:
df

Unnamed: 0_level_0,Strain_Channel_1,Strain_Channel_2,Strain_Channel_3,Strain_Channel_4,Strain_Channel_5,Strain_Channel_6,Strain_Channel_7,Strain_Channel_8,Strain_Channel_9,Strain_Channel_10,...,Strain_Channel_191,Strain_Channel_192,Strain_Channel_193,Strain_Channel_194,Strain_Channel_195,Strain_Channel_196,Strain_Channel_197,Strain_Channel_198,Strain_Channel_199,Strain_Channel_200
Time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0.000,-0.018453,-0.003483,-2.187328e-17,-0.159876,-0.211988,-0.624628,5.208542e-17,-0.405806,-0.056268,0.206807,...,2.287263e-15,-3.308980,-1.326907,-7.757556,-1.028380e-13,8.223982,1.607655,-2.042254,-4.162261e-14,1.049710
0.005,0.055532,-0.098258,1.311880e-03,-0.061239,0.066042,-0.043779,4.852027e-04,0.207068,0.242045,0.519475,...,5.799145e-02,1.375211,-4.426797,11.208332,7.466925e-02,10.578209,-12.048953,-0.197257,-8.113665e-02,-7.088249
0.010,-0.127762,-0.079647,-1.543800e-03,0.019857,-0.153546,0.032621,-4.411374e-03,0.448470,-0.356339,-0.109140,...,-7.622914e-02,1.930029,-5.798119,-6.027907,1.063095e-01,-8.782852,-9.563365,12.601360,-1.097420e-01,-0.689857
0.015,-0.043735,0.120972,1.696595e-03,0.104774,0.658233,-0.259340,-2.405563e-03,0.408003,-0.835246,-0.338889,...,1.702509e-01,-0.460545,5.972489,-0.549617,-2.536897e-01,11.284541,0.534655,7.574140,-1.775242e-02,7.821180
0.020,-0.155348,0.045081,4.040845e-03,0.096117,0.823661,-0.364091,1.409797e-02,0.090117,0.033744,0.013802,...,-1.622569e-01,-10.298363,-18.314783,14.374743,1.936326e-01,-2.252157,-8.590213,1.468282,-3.494637e-01,-4.475205
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
599.975,-1.103617,-0.946008,-9.252047e-02,1.022340,1.900170,1.640506,1.203329e-01,-1.564513,-2.488991,-2.203524,...,3.446967e+00,-18.692761,-43.699139,-16.847171,-2.794553e+00,29.325767,39.326841,18.235651,2.234941e+00,-13.938471
599.980,-0.912815,-0.799722,-1.044987e-01,0.771237,1.934105,2.129537,1.259590e-01,-1.500297,-2.891533,-2.497231,...,2.762078e+00,-20.813900,-36.949993,-31.375591,-1.564564e+00,21.373175,44.048876,31.278963,2.609258e+00,-20.429783
599.985,-1.031395,-0.779672,-9.551814e-02,1.401955,1.815562,1.350160,1.255984e-01,-2.005897,-1.950560,-1.939835,...,3.413608e+00,-38.754713,-62.733758,-33.244516,-1.702566e+00,22.206096,34.130498,26.909689,2.114889e+00,-37.561920
599.990,-1.054884,-0.981885,-7.352593e-02,1.232851,1.556610,1.527633,1.248126e-01,-1.581476,-3.025109,-1.959528,...,1.885342e+00,-23.843238,-26.418829,-35.173289,-2.492999e+00,26.959376,34.740560,14.122787,2.603400e+00,-33.346983


### Plot & Modify Data

In [6]:
# region

register_plotly_resampler(mode="auto", default_n_shown_samples=500)

# Convert columns to more memory-efficient types
def optimize_data_types(df):
    for col in df.columns:
        if df[col].dtype == 'float64':
            df[col] = df[col].astype('float32')
        elif df[col].dtype == 'int64':
            max_val = df[col].max()
            if max_val < 2**7:
                df[col] = df[col].astype('int8')
            elif max_val < 2**15:
                df[col] = df[col].astype('int16')
            elif max_val < 2**31:
                df[col] = df[col].astype('int32')
    return df

df = optimize_data_types(df)

# Copy the original DataFrame for plotting
original_df = df.copy()
downsampled_df_copy = None

# Function definitions of initial setup and tab widgets
# region
def find_divisors(num):
    return [i for i in range(1, num + 1) if num % i == 0]

def apply_butterworth_filter(data, cutoff, order, sampling_rate):
    nyquist = 0.5 * sampling_rate
    normal_cutoff = cutoff / nyquist
    b, a = butter(order, normal_cutoff, btype='low', analog=False)
    filtered_data = filtfilt(b, a, data)
    return filtered_data

def downsample_data(df, original_rate, new_rate):
    if new_rate >= original_rate:
        return df
    step_size = int(original_rate / new_rate)
    downsampled_df = df.iloc[::step_size, :].copy()
    downsampled_df.index = np.linspace(df.index[0], df.index[-1], len(downsampled_df))
    return downsampled_df

def apply_time_offset(df, offset):
    # Find the index closest to the specified offset time
    closest_time_point = np.abs(df.index - offset).argmin()
    offset_df = df.iloc[closest_time_point:].copy()
    offset_df.index = offset_df.index - offset_df.index[0]
    return offset_df

def export_data(b):
    global downsampled_df_copy
    new_rate = sampling_rate_dropdown.value
    apply_filter = apply_filter_checkbox.value
    apply_offset = apply_time_offset_checkbox.value
    offset_time = time_offset_input.value

    # Check if any processing is applied
    is_processing_applied = (new_rate != sampling_rate) or apply_filter or (apply_offset and offset_time != 0)

    if downsampled_df_copy is not None and is_processing_applied:
        processed_df = downsampled_df_copy.copy()

        # Apply filter if needed
        if apply_filter:
            filter_cutoff = filter_cutoff_input.value
            filter_order = filter_order_input.value
            for col in processed_df.columns:
                processed_df[col] = apply_butterworth_filter(processed_df[col], filter_cutoff, filter_order, new_rate)

        # Apply time offset if needed
        if apply_offset and offset_time != 0:
            processed_df = apply_time_offset(processed_df, offset_time)
        
        # Export to CSV
        filename = 'exported_data.csv'
        processed_df.to_csv(filename)
        print(f'Data exported as {filename}')
    else:
        print('No modified data to export.')

def create_initial_plot(df):
    fig = go.Figure()
    for col in df.columns:
        fig.add_trace(go.Scattergl(x=df.index, y=df[col], mode='lines', name=f'{col}', visible=False))
        fig.add_trace(go.Scattergl(x=df.index, y=df[col], mode='lines', name=f'{col} Processed '))
    fig.update_layout(title='Data Plot', xaxis_title='Time (s)', yaxis_title='Values')
    return fig

def toggle_original_data(b):
    for i in range(len(original_df.columns)):
        plot_widget.data[i*2].visible = not plot_widget.data[i*2].visible
# endregion

# Interactive widgets setup
# region
# -----------------------------------------------------------
# Rendering Tab with Chunk Input
# region
use_batch_update_checkbox = widgets.Checkbox(value=True, description='Use batch_update', style={'description_width': 'initial'})
chunk_input = widgets.IntText(value=1, description='Number of Chunks:', style={'description_width': 'initial'})
progress_bar = widgets.IntProgress(value=0, min=0, max=len(df.columns), description='Processing:', bar_style='info', orientation='horizontal')
rendering_tab = widgets.VBox([use_batch_update_checkbox, chunk_input, progress_bar])
# endregion

# Downsampling Tab
# region
divisors = find_divisors(sampling_rate)
sampling_rate_dropdown = widgets.Dropdown(options=divisors, value=sampling_rate, description='New Rate (Hz):', style={'description_width': 'initial'})
toggle_original_data_button = widgets.Button(description='Toggle Original Data', button_style='info', tooltip='Click to show/hide original data')
downsampling_tab = widgets.VBox([sampling_rate_dropdown, toggle_original_data_button])
# endregion

# Butterworth Filter Tab
# region
filter_cutoff_input = widgets.FloatText(value=3, description='Cut-off Frequency (Hz):', style={'description_width': 'initial'})
filter_order_input = widgets.IntText(value=2, description='Filter Order:', style={'description_width': 'initial'})
apply_filter_checkbox = widgets.Checkbox(value=False, description='Apply Butterworth Filter')
butterworth_tab = widgets.VBox([filter_cutoff_input, filter_order_input, apply_filter_checkbox])
# endregion

# Time Offset Tab
# region
time_offset_input = widgets.FloatText(value=0, description='Time Offset (s):', style={'description_width': 'initial'})
apply_time_offset_checkbox = widgets.Checkbox(value=False, description='Apply Time Offset')
time_offset_tab = widgets.VBox([time_offset_input, apply_time_offset_checkbox])
# endregion

# Time Range Tab
# region
use_time_range_checkbox = widgets.Checkbox(value=False, description='Use Time Range', style={'description_width': 'initial'})
lower_time_input = widgets.FloatText(value=0, description='Lower Time (s):', style={'description_width': 'initial'})
upper_time_input = widgets.FloatText(value=0, description='Upper Time (s):', style={'description_width': 'initial'})
time_range_tab = widgets.VBox([use_time_range_checkbox, lower_time_input, upper_time_input])
# endregion

# Export Tab
# region
export_button = widgets.Button(description='Export as CSV', button_style='success', tooltip='Click to export data')
export_tab = widgets.VBox([export_button])
# endregion

# Tab widget
# region
tab = widgets.Tab(children=[rendering_tab, downsampling_tab, butterworth_tab, time_offset_tab, time_range_tab, export_tab])
tab.set_title(0, 'Rendering')
tab.set_title(1, 'Downsampling')
tab.set_title(2, 'Butterworth Filter')
tab.set_title(3, 'Time Offset')
tab.set_title(4, 'Time Range')
tab.set_title(5, 'Export Data')
# endregion
# -----------------------------------------------------------
# endregion

# Plotting setup (initial)
# region
initial_plot = create_initial_plot(original_df)
plot_widget = go.FigureWidget(initial_plot)
# endregion

# Observers and event handling
# region
def update_plot(change):
    global downsampled_df_copy
    new_rate = sampling_rate_dropdown.value
    apply_filter = apply_filter_checkbox.value
    filter_cutoff = filter_cutoff_input.value
    filter_order = filter_order_input.value
    apply_offset = apply_time_offset_checkbox.value
    offset_time = time_offset_input.value

    # Downsample data
    if downsampled_df_copy is None or change['owner'] == sampling_rate_dropdown:
        downsampled_df_copy = downsample_data(original_df, sampling_rate, new_rate)

    # Copy the downsampled data for processing
    processed_df = downsampled_df_copy.copy()

    # Apply Butterworth filter if needed
    if apply_filter:
        for col in processed_df.columns:
            processed_df[col] = apply_butterworth_filter(processed_df[col], filter_cutoff, filter_order, new_rate)

    # Apply time offset if needed
    if apply_offset:
        processed_df = apply_time_offset(processed_df, offset_time)

    use_time_range = use_time_range_checkbox.value
    lower_time = lower_time_input.value
    upper_time = upper_time_input.value

    # Filter data based on time range only if the checkbox is active
    if use_time_range:
        processed_df = processed_df[(processed_df.index >= lower_time) & (processed_df.index <= upper_time)]

    use_batch = use_batch_update_checkbox.value
    num_chunks = chunk_input.value
    progress_bar.max = len(original_df.columns)  # Adjust the max value of the progress bar

    if use_batch:
        # Calculate the number of columns per chunk
        columns_per_chunk = max(1, len(original_df.columns) // num_chunks)
        for chunk_start in range(0, len(original_df.columns), columns_per_chunk):
            with plot_widget.batch_update():
                chunk_end = min(chunk_start + columns_per_chunk, len(original_df.columns))
                for i in range(chunk_start, chunk_end):
                    col = original_df.columns[i]
                    plot_widget.data[i * 2 + 1].x = processed_df.index
                    plot_widget.data[i * 2 + 1].y = processed_df[col]
            progress_bar.value = chunk_end
    else:
        # Reset progress bar
        progress_bar.value = 0
        progress_bar.bar_style = 'info'

        # Update without batch_update using parallel processing
        def update_column(i, col):
            plot_widget.data[i * 2 + 1].x = processed_df.index
            plot_widget.data[i * 2 + 1].y = processed_df[col]
            return i

        with ThreadPoolExecutor() as executor:
            futures = [executor.submit(update_column, i, col) for i, col in enumerate(original_df.columns)]
            for future in as_completed(futures):
                progress_bar.value = future.result() + 1

        progress_bar.bar_style = 'success'

def on_time_range_change(change):
    if use_time_range_checkbox.value:
        update_plot(change)

sampling_rate_dropdown.observe(update_plot, names='value')
toggle_original_data_button.on_click(toggle_original_data)
apply_filter_checkbox.observe(update_plot, names='value')
filter_cutoff_input.observe(update_plot, names='value')
filter_order_input.observe(update_plot, names='value')
apply_time_offset_checkbox.observe(update_plot, names='value')
time_offset_input.observe(update_plot, names='value')
use_time_range_checkbox.observe(update_plot, names='value')
lower_time_input.observe(on_time_range_change, names='value')
upper_time_input.observe(on_time_range_change, names='value')
export_button.on_click(export_data)
# endregion

# Display the widgets and the plot
display(tab, plot_widget)

# endregion

Tab(children=(VBox(children=(Checkbox(value=True, description='Use batch_update', style=CheckboxStyle(descript…

FigureWidgetResampler({
    'data': [{'mode': 'lines',
              'name': ('<b style="color:sandybrown">[R' ... 'i style="color:#fc9944">~1</i>'),
              'type': 'scattergl',
              'uid': 'ce92bd34-e3e4-467b-816d-2e4da79febb0',
              'visible': False,
              'x': array([0.00000e+00, 8.00000e-02, 1.32500e+00, ..., 5.97565e+02, 5.99915e+02,
                          5.99995e+02]),
              'y': array([-0.01845321,  0.38824087, -0.06307334, ...,  0.6270917 , -1.2316966 ,
                          -0.8689617 ], dtype=float32)},
             {'mode': 'lines',
              'name': ('<b style="color:sandybrown">[R' ... 'i style="color:#fc9944">~1</i>'),
              'type': 'scattergl',
              'uid': '7235e4f7-d18f-4e0b-a9f1-6d9e58adb4c4',
              'x': array([0.00000e+00, 8.00000e-02, 1.32500e+00, ..., 5.97565e+02, 5.99915e+02,
                          5.99995e+02]),
              'y': array([-0.01845321,  0.38824087, -0.06307334, ...,  0.

In [7]:
memory_usage = downsampled_df_copy['Strain_Channel_1'].memory_usage(deep=True)
memory_usage

TypeError: 'NoneType' object is not subscriptable

In [None]:
total_memory_usage = df.memory_usage(deep=True).sum()
total_memory_usage_gb = total_memory_usage / (2**20)
print(f"Total memory usage: {total_memory_usage_gb} megabytes")

In [None]:
import sys
# Get the size of the object
size_in_bytes = sys.getsizeof(initial_plot)

# Convert bytes to megabytes and gigabytes
size_in_megabytes = size_in_bytes / (2**20)
size_in_gigabytes = size_in_bytes / (2**30)

print(f" {size_in_gigabytes} GB")

In [None]:
downsampled_df_copy['Strain_Channel_1']