In [17]:
import csv

# Function to parse the time from the timestamp string
def parse_time(timestamp):
    return timestamp.split()[0]

# Function to parse the heart rate from the record
def parse_heart_rate(record):
    heart_rate_index = record.find('Heart Rate')
    if heart_rate_index != -1:
        return int(record[heart_rate_index + len('Heart Rate: '):].split()[0])
    else:
        return None

# Function to parse the conductivity value from the record
def parse_conductivity(record):
    return int(record.split()[-1])

# Function to generate data points with average heart rate and conductivity readings for each second
def generate_data_points1(data):
    data_points = []
    current_second = None
    conductivities = []

    for timestamp, value in data:
        second = parse_time(timestamp)
        if interval_1_start <= second <= interval_1_end or interval_2_start <= second <= interval_2_end :  # Check if the timestamp is within the specified intervals
            if second != current_second:
                if current_second is not None:
            
                    avg_conductivity = round(sum(conductivities) / len(conductivities), 1)
                    data_points.append((timestamp, avg_conductivity, label_for_interval1(current_second)))
                current_second = second
                conductivities = []

            heart_rate = parse_heart_rate(value)
            if heart_rate is not None:
                last_heart_rate = heart_rate
            else:
                conductivity = parse_conductivity(value)
                conductivities.append(conductivity)

    # Process the last second of data
    if current_second is not None:
        avg_conductivity = round(sum(conductivities) / len(conductivities), 1)
        data_points.append((timestamp, avg_conductivity, label_for_interval1(current_second)))

    return data_points

def generate_data_points2(data):
    data_points = []
    current_second = None
    conductivities = []

    for timestamp, value in data:
        second = parse_time(timestamp)
        if interval_1_start_2 <= second <= interval_1_end_2 or interval_2_start_2 <= second <= interval_2_end_2 :  # Check if the timestamp is within the specified intervals
            if second != current_second:
                if current_second is not None:
            
                    avg_conductivity = round(sum(conductivities) / len(conductivities), 1)
                    data_points.append((timestamp, avg_conductivity, label_for_interval2(current_second)))
                current_second = second
                conductivities = []

            heart_rate = parse_heart_rate(value)
            if heart_rate is not None:
                last_heart_rate = heart_rate
            else:
                conductivity = parse_conductivity(value)
                conductivities.append(conductivity)

    # Process the last second of data
    if current_second is not None:
        avg_conductivity = round(sum(conductivities) / len(conductivities), 1)
        data_points.append((timestamp, avg_conductivity, label_for_interval2(current_second)))

    return data_points

# Function to determine the label based on the timestamp
def label_for_interval1(timestamp):
    interval_1_start = "16:27:50"
    interval_1_end = "16:37:50"
    interval_2_start = "16:39:15"
    interval_2_end = "16:49:15"

    if interval_1_start <= timestamp <= interval_1_end:
        return 1
    elif interval_2_start <= timestamp <= interval_2_end:
        return 0
    else:
        return None
    
def label_for_interval2(timestamp):
    interval_1_start = "15:26:00"
    interval_1_end = "15:38:00"
    interval_2_start = "15:39:00"
    interval_2_end = "15:49:00"

    if interval_1_start <= timestamp <= interval_1_end:
        return 1
    elif interval_2_start <= timestamp <= interval_2_end:
        return 0
    else:
        return None

# Read the CSV file
data1 = []
data2 = []
with open('arduinoToCsv/firstTestPani/outGSR.csv', newline='') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        timestamp = parse_time(row[0])
        value = row[0]
        
        data1.append((timestamp, value))
with open('arduinoToCsv/secondTestPani/outGSR-pani240321.csv', newline='') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        timestamp = parse_time(row[0])
        value = row[0]
        
        data2.append((timestamp, value))


# Define the intervals
interval_1_start = "16:27:50"
interval_1_end = "16:37:50"
interval_2_start = "16:39:15"
interval_2_end = "16:49:15"


#define interavls for second test
interval_1_start_2 = "15:26:00"
interval_1_end_2 = "15:38:00"
interval_2_start_2 = "15:39:00"
interval_2_end_2 = "15:49:00"


# Generate data points with average heart rate and conductivity readings for each second
data_points1 = generate_data_points1(data1)
data_points2 = generate_data_points2(data2)


# Print the generated data points with labels


In [21]:
data_points2

[('15:26:01', 217.2, 1),
 ('15:26:02', 205.0, 1),
 ('15:26:03', 193.8, 1),
 ('15:26:04', 188.9, 1),
 ('15:26:05', 188.7, 1),
 ('15:26:06', 193.0, 1),
 ('15:26:07', 198.2, 1),
 ('15:26:08', 201.0, 1),
 ('15:26:09', 203.6, 1),
 ('15:26:10', 204.9, 1),
 ('15:26:11', 205.7, 1),
 ('15:26:12', 206.3, 1),
 ('15:26:13', 207.1, 1),
 ('15:26:14', 208.1, 1),
 ('15:26:15', 208.9, 1),
 ('15:26:16', 209.6, 1),
 ('15:26:17', 210.2, 1),
 ('15:26:18', 211.2, 1),
 ('15:26:19', 211.1, 1),
 ('15:26:20', 210.9, 1),
 ('15:26:21', 211.0, 1),
 ('15:26:22', 208.5, 1),
 ('15:26:23', 204.8, 1),
 ('15:26:24', 204.5, 1),
 ('15:26:25', 206.1, 1),
 ('15:26:26', 207.7, 1),
 ('15:26:27', 208.1, 1),
 ('15:26:28', 208.1, 1),
 ('15:26:29', 209.0, 1),
 ('15:26:30', 209.5, 1),
 ('15:26:31', 210.0, 1),
 ('15:26:32', 205.8, 1),
 ('15:26:33', 197.7, 1),
 ('15:26:34', 195.5, 1),
 ('15:26:35', 197.1, 1),
 ('15:26:36', 199.5, 1),
 ('15:26:37', 201.4, 1),
 ('15:26:38', 203.7, 1),
 ('15:26:39', 203.9, 1),
 ('15:26:40', 204.0, 1),


In [26]:
#normalise data

#normalise
import numpy as np

values_1 = [value for (timestamp,value,label) in data_points1]
mean_1 = np.mean(values_1)
std_1 = np.std(values_1)
values_2 = [value for (timestamp,value,label) in data_points2]
mean_2 = np.mean(values_2)
std_2 = np.std(values_2)

print(mean_1, std_2)

297.96439267886853 56.464952841477505


In [40]:
# Create a new dataset by concatenating values within each 5-second interval

dataset_1 = []

labels_1= []
for iter, data in enumerate(data_points1):
    (timestamp, value, label) = data
    value = round((value - mean_1)/ std_1, 3)
    
    if iter % 5 == 0:
        new_data_point = [value]
        if iter > 0:
            labels_1.append(label_for_interval1(timestamp))
    else:
        new_data_point.append(value)
        if len(new_data_point) == 5:
            dataset_1.append(new_data_point)
    
dataset_2 = []

labels_2 = []
for iter, data in enumerate(data_points2):
    (timestamp, value, label) = data
    value = round((value - mean_2)/ std_2, 3)
    
    if iter % 5 == 0:
        new_data_point = [value]
        if iter > 0:
            labels_2.append(label_for_interval2(timestamp))
    else:
        new_data_point.append(value)
        if len(new_data_point) == 5:
            dataset_2.append(new_data_point)
    
  
print(len(labels_2), len(dataset_2))


264 264


In [41]:
labels_2

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


In [42]:
#combine data from both tests

data = dataset_1 + dataset_2
labels = labels_1 + labels_2
print(labels)

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 

LogREg

In [69]:
from sklearn.model_selection import train_test_split
train_data, test_data, train_labels, test_labels = train_test_split(data, labels, test_size=0.2, random_state=42)

In [70]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Instantiate Logistic Regression model
log_reg = LogisticRegression()

# Fit the model to the training data
log_reg.fit(train_data, train_labels)

# Predict on the test data
predictions = log_reg.predict(test_data)

# Evaluate the accuracy of the model
accuracy = accuracy_score(test_labels, predictions)
print("Accuracy:", accuracy)


Accuracy: 0.9207920792079208


NOw with decision tree

In [71]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Instantiate Decision Tree classifier
tree_clf = DecisionTreeClassifier(random_state=42)

# Fit the model to the training data
tree_clf.fit(train_data, train_labels)

# Predict on the test data
predictions = tree_clf.predict(test_data)

# Evaluate the accuracy of the model
accuracy = accuracy_score(test_labels, predictions)
print("Accuracy:", accuracy)


Accuracy: 0.9504950495049505


now try  by only training on dataset 1 and testing on set 2

In [72]:
train_data, test_data, train_labels, test_labels = train_test_split(dataset_1, labels_1, test_size=0.2, random_state=42)
train_data2, test_data2, train_labels2, test_labels2 = train_test_split(dataset_2, labels_2, test_size=0.2, random_state=42)

In [68]:
log_reg = LogisticRegression()

# Fit the model to the training data
log_reg.fit(train_data, train_labels)
predictions = log_reg.predict(test_data2)

# Evaluate the accuracy of the model
accuracy = accuracy_score(test_labels2, predictions)
print("Accuracy:", accuracy)

Accuracy: 0.7358490566037735


In [73]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Instantiate Decision Tree classifier
tree_clf = DecisionTreeClassifier(random_state=42)

# Fit the model to the training data
tree_clf.fit(train_data, train_labels)

# Predict on the test data
predictions = tree_clf.predict(train_data2)

# Evaluate the accuracy of the model
accuracy = accuracy_score(train_labels2, predictions)
print("Accuracy:", accuracy)


Accuracy: 0.7677725118483413
