In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from tqdm import tqdm
import pickle


In [2]:
# Load the CSV dataset into a DataFrame
df = pd.read_csv('dataset/aruba-bysecs-full.csv')

# Convert the 'datetime' column to a pandas datetime object
df['datetime'] = pd.to_datetime(df['datetime'])

# Extract the time part and replace the 'datetime' column
df['datetime'] = df['datetime'].dt.time

# Rename the 'datetime' column to 'time'
df = df.rename(columns={'datetime': 'time'})
df.drop(columns=['label_index'], inplace=True)

# Save the DataFrame to a new CSV file
df.to_csv('new_dataset.csv', index=False)

In [24]:
data = pd.read_csv('new_dataset.csv')

# Convert time column to datetime format with appropriate format strings
data['time'] = pd.to_datetime(data['time'], format='%H:%M:%S.%f', errors='coerce')
data['time'] = data['time'].combine_first(pd.to_datetime(data['time'], format='%H:%M:%S', errors='coerce'))


label_encoder = LabelEncoder()
data['label'] = label_encoder.fit_transform(data['label'])

# Create a MinMaxScaler object
scaler = MinMaxScaler()
# Create 'time_numerical' feature as seconds since start
data['time'] = (data['time'] - data['time'].min()).dt.total_seconds()
data['time'] = scaler.fit_transform(data[['time']])

# Save the MinMaxScaler object to a file
scaler_filename = 'scaler.pkl'
with open(scaler_filename, 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)

sensor_columns = data.columns[1:-1]  # Exclude 'time' and 'label'
for column in sensor_columns:
    data[column] = data[column].apply(lambda x: 0 if x == 'OFF' else 1)

data.to_csv('pp.csv', index=False)

# data.head

In [22]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score


# Splitting data into features (X) and labels (y)
X = data.iloc[:, :-1]  # Excluding 'time', 'label', and 'label_encoded'
# print(X)
y = data['label']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Create and train an SVM model
print("Decision Tree Training")
# Create and train a Decision Tree classifier
decision_tree_model = DecisionTreeClassifier()  # You can adjust parameters as needed
decision_tree_model.fit(X_train, y_train)


Decision Tree Training


In [23]:
from sklearn.metrics import classification_report

# Save the model to a file using pickle
model_filename = 'decision_tree_model.pkl'
with open(model_filename, 'wb') as file:
    pickle.dump(decision_tree_model, file)
print(f"Model saved to {model_filename}")

# Load the model from the file
with open(model_filename, 'rb') as file:
    loaded_model = pickle.load(file)

# Make predictions
y_pred = loaded_model.predict(X_test)

# Print the classification report
class_names = label_encoder.classes_
report = classification_report(y_test, y_pred, target_names=class_names)
print("Classification Report:")
print(report)

Model saved to decision_tree_model.pkl
Classification Report:
                  precision    recall  f1-score   support

   Bed_to_Toilet       0.57      0.45      0.50       245
          Eating       0.83      0.66      0.73      2639
      Enter_Home       0.66      0.50      0.57       398
    Housekeeping       0.76      0.64      0.70      2035
      Leave_Home       0.59      0.36      0.44       320
Meal_Preparation       0.88      0.95      0.91     44732
           Relax       0.94      0.91      0.92     30358
       Respirate       0.98      0.82      0.90        73
        Sleeping       0.81      0.74      0.77      5996
     Wash_Dishes       0.89      0.68      0.77      2136
            Work       0.93      0.82      0.87      2878

        accuracy                           0.89     91810
       macro avg       0.80      0.68      0.74     91810
    weighted avg       0.89      0.89      0.89     91810

