In [43]:
import csv

# Function to parse the time from the timestamp string
def parse_time(timestamp):
    return timestamp.split()[0]

# Function to parse the heart rate from the record
def parse_heart_rate(record):
    heart_rate_index = record.find('Heart Rate')
    if heart_rate_index != -1:
        return int(record[heart_rate_index + len('Heart Rate: '):].split()[0])
    else:
        return None

# Function to parse the conductivity value from the record
def parse_conductivity(record):
    return int(record.split()[-1])

# Function to generate data points with average heart rate and conductivity readings for each second
def generate_data_points1(data):
    data_points = []
    current_second = None
    conductivities = []

    for timestamp, value in data:
        second = parse_time(timestamp)
        if interval_1_start <= second <= interval_1_end or interval_2_start <= second <= interval_2_end :  # Check if the timestamp is within the specified intervals
            if second != current_second:
                if current_second is not None:
            
                    avg_conductivity = round(sum(conductivities) / len(conductivities), 1)
                    data_points.append((timestamp, avg_conductivity, label_for_interval1(current_second)))
                current_second = second
                conductivities = []

            heart_rate = parse_heart_rate(value)
            if heart_rate is not None:
                last_heart_rate = heart_rate
            else:
                conductivity = parse_conductivity(value)
                conductivities.append(conductivity)

    # Process the last second of data
    if current_second is not None:
        avg_conductivity = round(sum(conductivities) / len(conductivities), 1)
        data_points.append((timestamp, avg_conductivity, label_for_interval1(current_second)))

    return data_points

def generate_data_points2(data):
    data_points = []
    current_second = None
    conductivities = []

    for timestamp, value in data:
        second = parse_time(timestamp)
        if interval_1_start_2 <= second <= interval_1_end_2 or interval_2_start_2 <= second <= interval_2_end_2 :  # Check if the timestamp is within the specified intervals
            if second != current_second:
                if current_second is not None:
            
                    avg_conductivity = round(sum(conductivities) / len(conductivities), 1)
                    data_points.append((timestamp, avg_conductivity, label_for_interval2(current_second)))
                current_second = second
                conductivities = []

            heart_rate = parse_heart_rate(value)
            if heart_rate is not None:
                last_heart_rate = heart_rate
            else:
                conductivity = parse_conductivity(value)
                conductivities.append(conductivity)

    # Process the last second of data
    if current_second is not None:
        avg_conductivity = round(sum(conductivities) / len(conductivities), 1)
        data_points.append((timestamp, avg_conductivity, label_for_interval2(current_second)))

    return data_points


def generate_data_points3(data):
    data_points = []
    current_second = None
    conductivities = []

    for timestamp, value in data:
        second = parse_time(timestamp)
        if interval_1_start_3 <= second <= interval_1_end_3 or interval_2_start_3 <= second <= interval_2_end_3 :  # Check if the timestamp is within the specified intervals
            if second != current_second:
                if current_second is not None:
            
                    avg_conductivity = round(sum(conductivities) / len(conductivities), 1)
                    data_points.append((timestamp, avg_conductivity, label_for_interval3(current_second)))
                current_second = second
                conductivities = []

            heart_rate = parse_heart_rate(value)
            if heart_rate is not None:
                last_heart_rate = heart_rate
            else:
                conductivity = parse_conductivity(value)
                conductivities.append(conductivity)

    # Process the last second of data
    if current_second is not None:
        avg_conductivity = round(sum(conductivities) / len(conductivities), 1)
        data_points.append((timestamp, avg_conductivity, label_for_interval3(current_second)))

    return data_points

# Function to determine the label based on the timestamp
def label_for_interval1(timestamp):
    interval_1_start = "16:27:50"
    interval_1_end = "16:37:50"
    interval_2_start = "16:39:15"
    interval_2_end = "16:49:15"

    if interval_1_start <= timestamp <= interval_1_end:
        return 1
    elif interval_2_start <= timestamp <= interval_2_end:
        return 0
    else:
        return None
    
def label_for_interval2(timestamp):
    interval_1_start = "15:26:00"
    interval_1_end = "15:38:00"
    interval_2_start = "15:39:00"
    interval_2_end = "15:49:00"

    if interval_1_start <= timestamp <= interval_1_end:
        return 1
    elif interval_2_start <= timestamp <= interval_2_end:
        return 0
    else:
        return None
    
def label_for_interval3(timestamp):
    interval_1_start_3 = "16:18:00"
    interval_1_end_3 = "16:20:00"
    interval_2_start_3 = "16:20:15"
    interval_2_end_3 = "16:22:00"   

    if interval_1_start_3 <= timestamp <= interval_1_end_3:
        return 1
    elif interval_2_start_3 <= timestamp <= interval_2_end_3:
        return 0
    else:
        return None


# Read the CSV file
data1 = []
data2 = []
data3 = []
with open('arduinoToCsv/firstTestPani/outGSR.csv', newline='') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        timestamp = parse_time(row[0])
        value = row[0]
        
        data1.append((timestamp, value))
with open('arduinoToCsv/secondTestPani/outGSR-pani240321.csv', newline='') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        timestamp = parse_time(row[0])
        value = row[0]
        
        data2.append((timestamp, value))

with open('arduinoToCsv/thirdTestPani/skin_conductivity_data11apr.csv', newline='') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        timestamp = parse_time(row[0])
        value = row[0]
        
        data3.append((timestamp, value))


# Define the intervals
interval_1_start = "16:27:50"
interval_1_end = "16:37:50"
interval_2_start = "16:39:15"
interval_2_end = "16:49:15"


#define interavls for second test
interval_1_start_2 = "15:26:00"
interval_1_end_2 = "15:38:00"
interval_2_start_2 = "15:39:00"
interval_2_end_2 = "15:49:00"

#define interavls for third test
interval_1_start_3 = "16:18:00"
interval_1_end_3 = "16:20:00"
interval_2_start_3 = "16:20:15"
interval_2_end_3 = "16:22:00"

# Generate data points with average heart rate and conductivity readings for each second
data_points1 = generate_data_points1(data1)
data_points2 = generate_data_points2(data2)
data_points3 = generate_data_points3(data3)


# Print the generated data points with labels


In [44]:
#normalise data

#normalise
import numpy as np

values_1 = [value for (timestamp,value,label) in data_points1]
values_2 = [value for (timestamp,value,label) in data_points2]
values_3 = [value for (timestamp,value,label) in data_points3]
v = values_1 + values_2 + values_3
mean = np.mean(v)
std = np.std(v)

mean_1 = np.mean(values_1)
std_1 = np.std(values_1)

mean_2 = np.mean(values_2)
std_2 = np.std(values_2)

mean_3 = np.mean(values_3)
std_3 = np.std(values_3)
print(mean_1, std_1)
print(mean_2, std_2)
print(mean_3, std_3)
print(mean, std)




297.96439267886853 25.768641960557293
108.7465204236006 56.464952841477505
209.95110132158587 60.7263047178912
199.7728098873137 101.63457396620731


In [45]:
# Create a new dataset by concatenating values within each 5-second interval
length_of_data_points = 5
dataset_1 = []

labels_1= []
for iter, data in enumerate(data_points1):
    (timestamp, value, label) = data
    value = round((value - mean_1)/ std_1, 3)
    
    if iter % length_of_data_points == 0:
        new_data_point = [value]
        if iter > 0:
            labels_1.append(label_for_interval1(timestamp))
    else:
        new_data_point.append(value)
        if len(new_data_point) == length_of_data_points:
            dataset_1.append(new_data_point)
    
dataset_2 = []

labels_2 = []
for iter, data in enumerate(data_points2):
    (timestamp, value, label) = data
    value = round((value - mean_2)/ std_2, 3)
    
    if iter % length_of_data_points == 0:
        new_data_point = [value]
        if iter > 0:
            labels_2.append(label_for_interval2(timestamp))
    else:
        new_data_point.append(value)
        if len(new_data_point) == length_of_data_points:
            dataset_2.append(new_data_point)
    
dataset_3 = []

labels_3 = []
for iter, data in enumerate(data_points3):
    (timestamp, value, label) = data
    value = round((value - mean_3)/ std_3, 3)
    
    if iter % length_of_data_points == 0:
        new_data_point = [value]
        if iter > 0:
            labels_3.append(label_for_interval3(timestamp))
    else:
        new_data_point.append(value)
        if len(new_data_point) == length_of_data_points:
            dataset_3.append(new_data_point)
  
print(len(labels_2), len(dataset_2))


264 264


In [46]:
#combine data from both tests

data = dataset_1 + dataset_3 #+ dataset_3
labels = labels_1 + labels_3# + labels_3
print(len(labels))

504


In [50]:
from sklearn.model_selection import train_test_split
train_data, test_data, train_labels, test_labels= train_test_split(data, labels, test_size=0.2, random_state=42)

train_data_3, test_data_3, train_labels_3, test_labels_3= train_test_split(dataset_2, labels_2, test_size=0.2, random_state=42)

NOw with decision tree

In [51]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

max_depth = 2 # Set the maximum depth as desired
tree_clf = DecisionTreeClassifier(max_depth=max_depth, random_state=42)

# Fit the model to the training data
tree_clf.fit(train_data, train_labels)

# Predict on the test data
predictions = tree_clf.predict(test_data)

# Evaluate the accuracy of the model
accuracy = accuracy_score(test_labels, predictions)
print("Accuracy:", accuracy)


Accuracy: 0.9702970297029703


In [52]:
predictions = tree_clf.predict(train_data_3)

# Evaluate the accuracy of the model
accuracy = accuracy_score(train_labels_3, predictions)
print("Accuracy:", accuracy)

Accuracy: 0.957345971563981


In [33]:
import pickle

filename = 'decision_tree_model.pkl'
with open(filename, 'wb') as file:
    pickle.dump(tree_clf, file)