In [13]:
import sqlite3
import pandas as pd

db_path = 'D:\\code\\uom_explore\\database\\voc_lab.db'

conn = sqlite3.connect(db_path)

# Define the specific conditions
experiment_batch = 'exp_efficiency_test_async_1'
experiment_id = '02_17_16s1c0r0'

# Write the query to select specific columns based on conditions
query = f"""
SELECT channel_id, heater_setting, timestamp, sensor_value
FROM ExperimentData
WHERE experiment_batch = ?
AND experiment_id = ?
"""

# Execute the query and load the data into a DataFrame
df = pd.read_sql_query(query, conn, params=(experiment_batch, experiment_id))

# Close the database connection
conn.close()

grouped = df.groupby('heater_setting', as_index=False, group_keys=False)

def normalize_timstamp(group):
    group['timestamp'] = group['timestamp'] - group['timestamp'].iloc[0]
    return group

df_ts = grouped.apply(normalize_timstamp).reset_index(drop=True)
grouped_ts = df_ts.groupby('heater_setting')

print (grouped_ts.head(5))

     channel_id  heater_setting  timestamp  sensor_value
0             0             140          0        7052.0
1             0             150          0        6948.0
2             0             152          0        6453.0
3             0             155          0        5875.0
4             0             157          0        5369.0
..          ...             ...        ...           ...
200           0             240        181        5970.0
201           0             242        181        6105.0
202           0             245        181        6105.0
203           0             247        181        6237.0
204           0             250        180        6479.0

[205 rows x 4 columns]






In [10]:
# use plotly to explore the data interactively
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
from scipy.signal import butter, filtfilt


# Filter requirements.
order = 6
fs = 30.0  # sample rate, Hz
cutoff = 3.66  # desired cutoff frequency of the filter, Hz
window_size = 30
group_channel = 'sensor_value'

# apply low pass filter to each group
def apply_filter(group, order, fs, cutoff, group_channel, window_size):
    # Get the filter coefficients
    b, a = butter(order, cutoff / (fs / 2), btype='low', analog=False)
    group['ADC_filtered'] = filtfilt(b, a, group[group_channel])
    group['ADC_filtered'] = group['ADC_filtered'].rolling(window_size, center=True).median()
    return group


# Apply the filter to each group
df_normalized = grouped_ts.apply(apply_filter, order=order, fs=fs, cutoff=cutoff, group_channel=group_channel, window_size=window_size).reset_index(drop=True)
input_voltage = 3.3
RL_2 = 10000 # 10kOhm

# convert to resistance where R_laod is R2 = 10kOhm input_voltage = 3.3V and 1_bit = 0.125mV (3.3V/2^14)
df_normalized['Vo'] = df_normalized['ADC_filtered']*0.000125
df_normalized['Resistance'] = (RL_2/df_normalized['Vo'])*((input_voltage/df_normalized['Vo'])-1)


# Define the figure without subplots as we are plotting only one channel
fig = go.Figure()

# The channel to plot
channel = 'Resistance' 
# 'ADC_filtered'

# Plot the channel
for setting in df_normalized['heater_setting'].unique():
    # Filter the DataFrame for each setting
    df_filtered = df_normalized[df_normalized['heater_setting'] == setting]
    fig.add_trace(
        go.Scatter(
            x=df_filtered['timestamp'],
            y=df_filtered[channel],
            mode='lines',
            name=f'Setting {setting}',
            # Optionally, you can uncomment and adjust the line color settings if needed
            # line=dict(color=colors[setting % len(colors)])  # Loop through colors cyclically
        )
    )

# Set titles and axes labels
fig.update_xaxes(title_text='Timestamp')
fig.update_yaxes(title_text=channel)
fig.update_layout(height=500, width=800, title_text=f'{channel} vs. Timestamp', showlegend=True)

# Display figure
fig.show()





In [14]:
# show the number of unique heater_settings
print (df_normalized['heater_setting'].unique())

[140 150 152 155 157 160 162 165 167 170 172 175 177 180 182 185 187 190
 192 195 197 200 202 205 210 212 215 217 220 222 225 227 230 232 235 237
 240 242 245 247 250]


# Transform each experiment into a single feature 

|feature 1|feature 2|feature 3...| ground truth (class)
- each feature is the ratio of a temperature
- each row is a dataset
- each file contain 41 settings 

In [21]:
# from the database retrieve the all experiment_id, heater_setting, sensor_value, channel_id

db_path = 'D:\\code\\uom_explore\\database\\voc_lab.db'

conn = sqlite3.connect(db_path)

# Write the query to select specific columns based on conditions
query = f"""
SELECT experiment_id, heater_setting, timestamp, sensor_value, channel_id
FROM ExperimentData
"""

# Execute the query and load the data into a DataFrame
df = pd.read_sql_query(query, conn)
df.head()

Unnamed: 0,experiment_id,heater_setting,timestamp,sensor_value,channel_id
0,02_17_16s1c0r0,140,62654,7052.0,0
1,02_17_16s1c0r0,150,62656,6948.0,0
2,02_17_16s1c0r0,152,62657,6453.0,0
3,02_17_16s1c0r0,155,62658,5875.0,0
4,02_17_16s1c0r0,157,62659,5369.0,0


In [49]:

# group df by experiment_id and heater_setting
grouped = df.groupby(['experiment_id', 'heater_setting'], as_index=False, group_keys=False)

# normalise
def normalize_timstamp(group):
    group['timestamp'] = group['timestamp'] - group['timestamp'].iloc[0]
    return group

df_ts = grouped.apply(normalize_timstamp).reset_index(drop=True)
grouped_ts = df_ts.groupby(['experiment_id','heater_setting'], as_index=False, group_keys=False)

# Filter requirements.
order = 6
fs = 30.0  # sample rate, Hz
cutoff = 3.66  # desired cutoff frequency of the filter, Hz
window_size = 30
target_channel = 'sensor_value'

# apply low pass filter to each group
def apply_filter(group, order, fs, cutoff, target_channel, window_size):
  # Get the filter coefficients
  b, a = butter(order, cutoff / (fs / 2), btype='low', analog=False)
  group['filtered'] = filtfilt(b, a, group[group_channel])
  group['filtered'] = group[target_channel].rolling(window_size, center=False).median()
  return group


# within each experiment_id group, apply the filter to each heater_setting group
# df_filtered = grouped_ts.apply(apply_filter, order=order, fs=fs, cutoff=cutoff, target_channel=group_channel, window_size=window_size).reset_index(drop=True)

filtered_data = []
for name, group in grouped_ts:
  filtered_group = group.copy()  # Avoid modifying original data
  filtered_group['filtered'] = group['sensor_value'].rolling(window_size, center=False).mean()
  filtered_data.append(filtered_group)

df_filtered = pd.concat(filtered_data, ignore_index=True)


# convert to resistance where R_laod is R2 = 10kOhm input_voltage = 3.3V and 1_bit = 0.125mV (3.3V/2^14)
input_voltage = 3.3
RL_2 = 10000 # 10kOhm

# convert to resistance where R_laod is R2 = 10kOhm input_voltage = 3.3V and 1_bit = 0.125mV (3.3V/2^14)
df_normalized['Vo'] = df_filtered['sensor_value']*0.000125
df_normalized['Resistance'] = (RL_2/df_normalized['Vo'])*((input_voltage/df_normalized['Vo'])-1)






## Retrieve a specific group

In [54]:
import plotly.express as px

experiment_id_value = "02_17_16s1c0r0"	# Example value for experiment_id
heater_setting_value = 140  # Example value for heater setting

# Use a tuple to get the specific group
group = grouped_ts.get_group((experiment_id_value, heater_setting_value))

# Now you can plot this group
fig = px.line(group, x='timestamp', y='sensor_value', title=f'Experiment {experiment_id_value} - Heater Setting {heater_setting_value}')
fig.show()

## Retrieve with index

In [80]:
# plot one specific group with plotly
import plotly.express as px

# Group by 'experiment_id' and 'heater_setting'
grouped_ts = df_ts.groupby(['experiment_id', 'heater_setting'], as_index=False, group_keys=False)

# Convert the groups to a list
groups = list(grouped_ts.groups.keys())

# Use the index to get the specific group
index = 0  # Replace with the desired index
experiment_id_value, heater_setting_value = groups[index]

# Fetch the specific group
group = grouped_ts.get_group((experiment_id_value, heater_setting_value))

# Now you can plot this group
fig = px.line(group, x='timestamp', y='sensor_value', title=f'Experiment {experiment_id_value} - Heater Setting {heater_setting_value}')
fig.show()


In [63]:
print (group['sensor_value'])

1        6948.0
42       6335.0
83       6486.0
124      6587.0
165      6402.0
          ...  
32309    6177.0
32350    6151.0
32391    6209.0
32432    6144.0
32473    6116.0
Name: sensor_value, Length: 793, dtype: float64


In [None]:
# Filter requirements.
order = 6
fs = 30.0  # sample rate, Hz
cutoff = 3.66  # desired cutoff frequency of the filter, Hz
window_size = 3
group_channel = 'sensor_value'
grouping_channel = ['experiment_id', 'heater_setting']

def apply_filter(group, order, fs, cutoff, target_channel, window_size):
    if target_channel not in group:
        raise KeyError(f"Column '{target_channel}' not found in the DataFrame")
    
    # print(f"Applying filter to group with target_channel: {target_channel}")
    
    # Get the filter coefficients
    b, a = butter(order, cutoff / (fs / 2), btype='low', analog=False)
    
    # # Apply the filter
    group['filtered'] = filtfilt(b, a, group[target_channel])
    
    # # Apply the rolling median
    group['filtered'] = group[target_channel].rolling(window_size, center=True).median()

     # Handle any remaining NaN values by filling them
    group['filtered'] = group['filtered'].fillna(method='bfill').fillna(method='ffill')
    
    return group

try:
    df_filtered = df.groupby(grouping_channel).apply(apply_filter, order=order, fs=fs, cutoff=cutoff, target_channel=group_channel, window_size=window_size).reset_index(drop=True)
    print(df_filtered.head())
except KeyError as e:
    print(e)

In [82]:
filtered_grouped = df_filtered.groupby(grouping_channel, as_index=False, group_keys=False)
groups_filtered = list(filtered_grouped.groups.keys())
# Use the index to get the specific group
index = 0  # Replace with the desired index
experiment_id_value, heater_setting_value = groups_filtered[index]

# Fetch the specific group
f_group = filtered_grouped.get_group((experiment_id_value, heater_setting_value))

# Now you can plot this group
fig = px.line(f_group, x='timestamp', y='filtered', title=f'Experiment {experiment_id_value} - Heater Setting {heater_setting_value}')
fig.show()

In [89]:
# use the filtered column get the resistance
# convert to resistance where R_laod is R2 = 10kOhm input_voltage = 3.3V and 1_bit = 0.125mV (3.3V/2^14)

input_voltage = 3.3
RL_2 = 10000 # 10kOhm
adc_bit = 0.000125 # voltage of 1 bit in ADS1115
channel_to_convert = 'filtered'

# convert to resistance where R_laod is R2 = 10kOhm input_voltage = 3.3V and 1_bit = 0.125mV (3.3V/2^14)
df_normalized['Vo'] = df_filtered['sensor_value']*0.000125
df_normalized['resistance'] = (RL_2/df_normalized['Vo'])*((input_voltage/df_normalized['Vo'])-1)

def Vo2Resistance(group, target_channel, input_voltage, RL_2, bit_V, ):
    group['Vo'] = group[target_channel]*bit_V
    group['resistance'] = (RL_2/group['Vo'])*((input_voltage/group['Vo'])-1)
    return group

df_res = df_filtered.groupby(grouping_channel).apply(Vo2Resistance, target_channel=channel_to_convert, input_voltage=input_voltage, RL_2=RL_2, bit_V=adc_bit)
df_res = df_res.reset_index(drop=True)






In [86]:
df_res.head()

Unnamed: 0,experiment_id,heater_setting,timestamp,sensor_value,channel_id,filtered,Vo,Resistance
0,02_17_16s1c0r0,140,62654,7052.0,0,6522.0,0.81525,37385.318448
1,02_17_16s1c0r0,140,62698,6522.0,0,6522.0,0.81525,37385.318448
2,02_17_16s1c0r0,140,62749,6486.0,0,6522.0,0.81525,37385.318448
3,02_17_16s1c0r0,140,62792,6587.0,0,6577.0,0.822125,36660.947429
4,02_17_16s1c0r0,140,62835,6577.0,0,6582.0,0.82275,36596.036759


In [100]:
def ratioCalculation(group):
    # take the first 50 samples of the data as median to be the baseline
    group['baseline'] = group['resistance'].head(50).median()
    group['max_reaction_R'] = group['resistance'].min()
    group['responsivity'] = (group['baseline']/group['max_reaction_R'])-1
    return group

df_full_feature = df_res.groupby(grouping_channel).apply(ratioCalculation)
df_full_feature = df_full_feature.reset_index(drop=True)
df_full_feature.head()





Unnamed: 0,experiment_id,heater_setting,timestamp,sensor_value,channel_id,filtered,Vo,resistance,baseline,max_reaction_R,responsivity
0,02_17_16s1c0r0,140,62654,7052.0,0,6522.0,0.81525,37385.318448,35841.649438,27784.393298,0.289992
1,02_17_16s1c0r0,140,62698,6522.0,0,6522.0,0.81525,37385.318448,35841.649438,27784.393298,0.289992
2,02_17_16s1c0r0,140,62749,6486.0,0,6522.0,0.81525,37385.318448,35841.649438,27784.393298,0.289992
3,02_17_16s1c0r0,140,62792,6587.0,0,6577.0,0.822125,36660.947429,35841.649438,27784.393298,0.289992
4,02_17_16s1c0r0,140,62835,6577.0,0,6582.0,0.82275,36596.036759,35841.649438,27784.393298,0.289992


In [101]:
# keep unique responsivity values after grouping
df_responsivity = df_full_feature.groupby(grouping_channel, as_index=False).first()
df_responsivity.head()

Unnamed: 0,experiment_id,heater_setting,timestamp,sensor_value,channel_id,filtered,Vo,resistance,baseline,max_reaction_R,responsivity
0,02_17_16s1c0r0,140,62654,7052.0,0,6522.0,0.81525,37385.318448,35841.649438,27784.393298,0.289992
1,02_17_16s1c0r0,150,62656,6948.0,0,6486.0,0.81075,37869.938611,35810.154804,28302.237005,0.265276
2,02_17_16s1c0r0,152,62657,6453.0,0,6282.0,0.78525,40782.977212,39297.162032,30024.031709,0.308857
3,02_17_16s1c0r0,155,62658,5875.0,0,5796.0,0.7245,49066.465486,47518.12647,36095.007941,0.316474
4,02_17_16s1c0r0,157,62659,5369.0,0,5325.0,0.665625,59459.10203,57241.607017,43975.069252,0.301683


In [102]:
# plot the responsivity against the heater setting within each experiment_id
fig = px.line(df_responsivity, x='heater_setting', y='responsivity', color='experiment_id', title='Responsivity vs. Heater Setting')
fig.show()

In [103]:
# transpose the responsivity values to have heater setting as columns and experiment_id as index
df_responsivity_pivot = df_responsivity.pivot(index='experiment_id', columns='heater_setting', values='responsivity')
df_responsivity_pivot.head()

heater_setting,140,150,152,155,157,160,162,165,167,170,...,227,230,232,235,237,240,242,245,247,250
experiment_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
02_17_16s1c0r0,0.289992,0.265276,0.308857,0.316474,0.301683,0.354073,0.308133,0.308787,0.253672,0.249509,...,0.353442,0.351504,0.319964,0.304643,0.30134,0.315273,0.300079,0.30616,0.297775,0.302454
02_17_51s1c1r0,0.190347,0.201155,0.261942,0.259362,0.239436,0.21484,0.243046,0.197277,0.206084,0.218502,...,0.22661,0.214454,0.215712,0.209748,0.206435,0.221081,0.231202,0.197481,0.207455,0.201596
02_18_26s1c2r0,0.099843,0.100285,0.123185,0.163675,0.353439,0.310796,0.265924,0.260067,0.109472,0.078298,...,0.180142,0.131489,0.084056,0.108258,0.112591,0.125032,0.115088,0.111438,0.109358,0.081325
02_19_01s1c3r0,0.290731,0.266106,0.35553,0.396649,0.635214,0.552152,0.323083,0.364723,0.30788,0.288747,...,0.312643,0.302109,0.309628,0.306289,0.30722,0.311936,0.309058,0.292207,0.280615,0.278412
02_19_36s1c4r0,6.039159,6.132384,6.641051,5.897604,5.541157,5.579751,5.564905,5.541405,5.909158,5.894632,...,7.112697,7.065215,6.754524,6.712956,6.57087,6.562759,6.455663,6.417492,6.324318,6.198248


In [104]:
# append channel_id according to experiment_id to the last column of the responsivity pivot table
df_responsivity_pivot['channel_id'] = df_responsivity_pivot.index.map(lambda x: df_responsivity[df_responsivity['experiment_id'] == x]['channel_id'].values[0])
df_responsivity_pivot.head(10)

heater_setting,140,150,152,155,157,160,162,165,167,170,...,230,232,235,237,240,242,245,247,250,channel_id
experiment_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
02_17_16s1c0r0,0.289992,0.265276,0.308857,0.316474,0.301683,0.354073,0.308133,0.308787,0.253672,0.249509,...,0.351504,0.319964,0.304643,0.30134,0.315273,0.300079,0.30616,0.297775,0.302454,0
02_17_51s1c1r0,0.190347,0.201155,0.261942,0.259362,0.239436,0.21484,0.243046,0.197277,0.206084,0.218502,...,0.214454,0.215712,0.209748,0.206435,0.221081,0.231202,0.197481,0.207455,0.201596,1
02_18_26s1c2r0,0.099843,0.100285,0.123185,0.163675,0.353439,0.310796,0.265924,0.260067,0.109472,0.078298,...,0.131489,0.084056,0.108258,0.112591,0.125032,0.115088,0.111438,0.109358,0.081325,2
02_19_01s1c3r0,0.290731,0.266106,0.35553,0.396649,0.635214,0.552152,0.323083,0.364723,0.30788,0.288747,...,0.302109,0.309628,0.306289,0.30722,0.311936,0.309058,0.292207,0.280615,0.278412,3
02_19_36s1c4r0,6.039159,6.132384,6.641051,5.897604,5.541157,5.579751,5.564905,5.541405,5.909158,5.894632,...,7.065215,6.754524,6.712956,6.57087,6.562759,6.455663,6.417492,6.324318,6.198248,4
02_20_11s1c0r1,0.180546,0.16752,0.335601,0.469539,0.44098,0.371804,0.293083,0.195628,0.190061,0.17178,...,0.196156,0.176294,0.181031,0.18244,0.191652,0.167608,0.1668,0.172777,0.168669,0
02_20_47s1c1r1,0.172724,0.183373,0.290345,0.268579,0.246678,0.408043,0.347959,0.257394,0.24567,0.206467,...,0.214077,0.213215,0.207059,0.207254,0.180134,0.176348,0.181485,0.183667,0.169821,1
02_21_22s1c2r1,0.075445,0.093645,0.216286,0.235086,0.210154,0.363281,0.302005,0.212478,0.181599,0.12748,...,0.121652,0.110778,0.09481,0.103905,0.107537,0.106934,0.121252,0.082124,0.080748,2
02_21_57s1c3r1,0.217517,0.250882,0.309497,0.416828,0.347345,0.32725,0.31784,0.277075,0.272408,0.22599,...,0.24254,0.250049,0.24995,0.251612,0.26233,0.253037,0.237478,0.236406,0.220911,3
02_22_32s1c4r1,5.381885,5.568157,5.964051,5.982613,7.865407,7.211391,5.463492,5.54672,5.584767,5.735617,...,6.233153,6.065726,6.061627,5.943027,5.738385,5.609565,5.612581,5.636566,5.610314,4


In [105]:
# save df_responsivity_pivot to a csv file
df_responsivity_pivot.to_csv('feature_matrix.csv')