In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.signal import find_peaks

**Separate out experiments from one file that contains multiple experiments**


In [19]:
# read data

exp_no = 104
last_file_no = 152
file_name = str(exp_no)+"_SHT_SMD.txt"

file_path = "../data/01_raw/" + file_name
# print(file_path)
df = pd.read_csv(file_path, sep=',' , usecols=['timestamp','SHT40_temp','SHT40_Humidity','A1_Sensor', 'A1_Resistance'])
timestamp_data = df['timestamp'].values 
sht40_temp_data = df['SHT40_temp'].values
sht40_humidity_data = df['SHT40_Humidity'].values 
a1_sensor_data = df['A1_Sensor'].values 
a1_r_data = df['A1_Resistance'].values

In [None]:
# Find peaks and their properties
peaks, properties = find_peaks(a1_sensor_data, width=50, height=1)

# Get the peak heights
peak_heights = properties['peak_heights']

# Initialize lists to hold the smaller and larger peaks
smaller_peaks = []
larger_peaks = []

# Iterate over the peaks
for i in range(len(peaks) - 1):
    # If the next peak is smaller, label the current peak as a larger peak
    if peak_heights[i] > peak_heights[i + 1]:
        larger_peaks.append(peaks[i])
        smaller_peaks.append(peaks[i + 1])

# Convert lists to numpy arrays for indexing
smaller_peaks = np.array(smaller_peaks)
larger_peaks = np.array(larger_peaks)


In [29]:
# Iterate over the smaller_peaks array
# create a df to store the data
# timestamp,SHT40_temp,SHT40_Humidity,A1_Sensor,A1_Resistance
df_stacked = pd.DataFrame(columns=['exp_no', 'timestamp', 'SHT40_temp', 'SHT40_Humidity', 'A1_Sensor', 'A1_Resistance'])

for i in range(len(smaller_peaks) - 1):
    # Get the data of between the current and next smaller peak
    data_c = sht40_temp_data[smaller_peaks[i]:smaller_peaks[i + 1]]
    data_rh = sht40_humidity_data[smaller_peaks[i]:smaller_peaks[i + 1]]
    data_v = a1_sensor_data[smaller_peaks[i]:smaller_peaks[i + 1]]
    data_r = a1_r_data[smaller_peaks[i]:smaller_peaks[i + 1]]
    
    timestamps = timestamp_data[smaller_peaks[i]:smaller_peaks[i + 1]]
    relative_time = timestamps - timestamps[0]
    # create a new header called experiment number with each iteration being 1 experiment and add it to the data
    exp_no = pd.Series(i, index=range(len(data_r)))
    # create a new df with the data
    df_labelled = pd.DataFrame({'exp_no': exp_no, 'timestamp': relative_time, 'SHT40_temp': data_c, 'SHT40_Humidity': data_rh, 'A1_Sensor': data_v, 'A1_Resistance': data_r})
    # concat each df_labelled to the df_stacked
    df_stacked = pd.concat([df_stacked, df_labelled])



  df_stacked = pd.concat([df_stacked, df_labelled])


In [3]:
# Setup
exp_no = 104
file_name = f"{exp_no}_SHT_SMD.txt"
file_path = f"../data/01_raw/{file_name}"

# Read data
df = pd.read_csv(file_path, sep=',', usecols=['timestamp', 'SHT40_temp', 'SHT40_Humidity', 'A1_Sensor', 'A1_Resistance'])

# Find peaks
peaks, properties = find_peaks(df['A1_Sensor'], width=50, height=1)
peak_heights = properties['peak_heights']

# Determine smaller and larger peaks
smaller_peaks, larger_peaks = [], []
for i in range(len(peaks) - 1):
    if peak_heights[i] > peak_heights[i + 1]:
        larger_peaks.append(peaks[i])
        smaller_peaks.append(peaks[i + 1])

# Process data
df_stacked_list = []
for i in range(len(smaller_peaks) - 1):
    df_subset = df.iloc[smaller_peaks[i]:smaller_peaks[i + 1]].copy()
    df_subset['exp_no'] = i
    df_subset['timestamp'] -= df_subset['timestamp'].iloc[0]
    df_stacked_list.append(df_subset)

df_stacked = pd.concat(df_stacked_list, ignore_index=True)


In [5]:
df_stacked.head()
# write df_stacked to csv
df_stacked.to_csv(f"../data/02_intermediate/{exp_no}_stacked.csv", index=False)
