In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
import numpy as np
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import pickle
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import MinMaxScaler
import joblib
from sklearn.metrics import classification_report



In [2]:
# Load the CSV dataset into a DataFrame
df = pd.read_csv('dataset/aruba-bysecs-full.csv')

# Convert the 'datetime' column to a pandas datetime object
df['datetime'] = pd.to_datetime(df['datetime'])

# Extract the time part and replace the 'datetime' column
df['datetime'] = df['datetime'].dt.time

# Rename the 'datetime' column to 'time'
df = df.rename(columns={'datetime': 'time'})
df.drop(columns=['label_index'], inplace=True)

# Save the DataFrame to a new CSV file
df.to_csv('new_dataset.csv', index=False)

In [3]:
data = pd.read_csv('new_dataset.csv')

# Convert time column to datetime format with appropriate format strings
data['time'] = pd.to_datetime(data['time'], format='%H:%M:%S.%f', errors='coerce')
data['time'] = data['time'].combine_first(pd.to_datetime(data['time'], format='%H:%M:%S', errors='coerce'))

# Calculate the average time difference between consecutive timestamps
time_diff = (data['time'].diff() / np.timedelta64(1, 's')).mean()

# Fill missing 'time' values by adding the average time difference
data['time'] = data['time'].fillna(method='ffill') + pd.to_timedelta(time_diff, unit='s')

label_encoder = LabelEncoder()
data['label'] = label_encoder.fit_transform(data['label'])
joblib.dump(label_encoder, 'label_encoder.mod')

# Convert 'time' column to floating point seconds since start
data['time'] = (data['time'] - data['time'].min()).dt.total_seconds()

sensor_columns = data.columns[1:-1]  # Exclude 'time' and 'label'
for column in sensor_columns:
    data[column] = data[column].apply(lambda x: 0 if x == 'OFF' else 1)

# data.to_csv('pp.csv', index=False)

data.head

<bound method NDFrame.head of             time  D001  D002  D004  M001  M002  M003  M004  M005  M006  ...  \
0         230.11     0     0     0     0     0     1     0     0     0  ...   
1         237.30     0     0     0     0     0     0     0     0     0  ...   
2        9153.25     0     0     0     0     0     1     0     0     0  ...   
3        9158.80     0     0     0     0     0     0     0     0     0  ...   
4       13341.72     0     0     0     0     0     1     0     0     0  ...   
...          ...   ...   ...   ...   ...   ...   ...   ...   ...   ...  ...   
459042  62541.60     1     0     0     0     0     0     0     0     0  ...   
459043  62545.89     1     0     0     0     0     0     0     0     0  ...   
459044  62549.25     1     0     0     0     0     0     0     0     0  ...   
459045  62567.81     1     0     0     0     0     0     0     0     0  ...   
459046  62569.80     1     0     0     0     0     0     0     0     0  ...   

        M023  M024  M

In [4]:
# Splitting data into features (X) and labels (y)
X = data.iloc[:, :-1]  # Excluding 'label'
# print(X)
y = data['label']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Create and train a Random Forest classifier
random_forest_model = RandomForestClassifier(n_estimators=100)  # You can adjust parameters as needed

model = random_forest_model.fit(X_train, y_train)


In [5]:

joblib.dump(model, 'random_forest_model.mod') 

loaded_model = joblib.load('random_forest_model.mod')

# Make predictions
y_pred = loaded_model.predict(X_test)

# Print the classification report
class_names = label_encoder.classes_
report = classification_report(y_test, y_pred, target_names=class_names)
print("Classification Report:")
print(report)

Classification Report:
                  precision    recall  f1-score   support

   Bed_to_Toilet       0.87      0.78      0.82       245
          Eating       0.91      0.89      0.90      2639
      Enter_Home       0.70      0.61      0.65       398
    Housekeeping       0.89      0.88      0.89      2035
      Leave_Home       0.61      0.59      0.60       320
Meal_Preparation       0.97      0.98      0.98     44732
           Relax       0.98      0.98      0.98     30358
       Respirate       1.00      1.00      1.00        73
        Sleeping       0.98      0.98      0.98      5996
     Wash_Dishes       0.96      0.94      0.95      2136
            Work       0.98      0.98      0.98      2878

        accuracy                           0.97     91810
       macro avg       0.90      0.87      0.88     91810
    weighted avg       0.97      0.97      0.97     91810

