In [4]:
import joblib
import numpy as np
import pandas as pd
from datetime import datetime
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping

%store -r export_df
df = export_df

%store -r export_df_long
df_long = export_df_long

# ----------------------------
# Prepare Data
# ----------------------------
x = df.drop(columns=['activity'])
y = df['activity']

x_long = df_long.drop(columns=['activity'])
y_long = df_long['activity']

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_long_encoded = label_encoder.fit_transform(y_long)

# Split data
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.1, random_state=42, shuffle=False
)

x_long_test = x_long[-100:]
y_long_test = y_long[-100:]
x_long_train = x_long[:-100]
y_long_train = y_long[:-100]

# Scale features
scaler_tabular = StandardScaler()
x_train_scaled = scaler_tabular.fit_transform(x_train)
x_test_scaled = scaler_tabular.transform(x_test)

scaler_long = StandardScaler()
x_long_train_scaled = scaler_long.fit_transform(x_long_train)
x_long_test_scaled = scaler_long.transform(x_long_test)

# Encode labels numerically
y_train_encoded = label_encoder.transform(y_train)
y_test_encoded = label_encoder.transform(y_test)
y_long_train_encoded = label_encoder.transform(y_long_train)
y_long_test_encoded = label_encoder.transform(y_long_test)

# ----------------------------
# Random Forest
# ----------------------------
forest_model = RandomForestClassifier(
    n_estimators=500,
    random_state=42,
    class_weight='balanced'  # helps with imbalanced classes
)
forest_model.fit(x_train_scaled, y_train_encoded)

# ----------------------------
# XGBoost
# ----------------------------
xgb_model = xgb.XGBClassifier(
    objective="multi:softprob",
    num_class=len(label_encoder.classes_),
    n_estimators=500,
    max_depth=8,
    learning_rate=0.15,
    gamma=1,
    reg_alpha=0.1,
    reg_lambda=1,
    subsample=0.8,
    colsample_bytree=0.8,
    eval_metric='mlogloss',
    use_label_encoder=False,
    random_state=42
)
xgb_model.fit(
    x_train_scaled,
    y_train_encoded,
    eval_set=[(x_test_scaled, y_test_encoded)],
    verbose=True
)


# ----------------------------
# Neural Network
# ----------------------------
neural_model = Sequential([
    Input(shape=(x_long_train_scaled.shape[1],)),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dense(len(label_encoder.classes_), activation='softmax')
])

neural_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

neural_model.fit(
    x_long_train_scaled,
    y_long_train_encoded,
    epochs=50,
    batch_size=32,
    validation_split=0.1,
    callbacks=[early_stop],
    verbose=1
)


[0]	validation_0-mlogloss:0.65681
[1]	validation_0-mlogloss:0.63233
[2]	validation_0-mlogloss:0.62382
[3]	validation_0-mlogloss:0.60939
[4]	validation_0-mlogloss:0.59661
[5]	validation_0-mlogloss:0.58901
[6]	validation_0-mlogloss:0.58138
[7]	validation_0-mlogloss:0.57582
[8]	validation_0-mlogloss:0.57375
[9]	validation_0-mlogloss:0.57317
[10]	validation_0-mlogloss:0.57023
[11]	validation_0-mlogloss:0.56882
[12]	validation_0-mlogloss:0.57380
[13]	validation_0-mlogloss:0.57277
[14]	validation_0-mlogloss:0.57298
[15]	validation_0-mlogloss:0.57104
[16]	validation_0-mlogloss:0.57376
[17]	validation_0-mlogloss:0.57326
[18]	validation_0-mlogloss:0.57449
[19]	validation_0-mlogloss:0.57369
[20]	validation_0-mlogloss:0.57253
[21]	validation_0-mlogloss:0.57432
[22]	validation_0-mlogloss:0.57319
[23]	validation_0-mlogloss:0.57353
[24]	validation_0-mlogloss:0.57652
[25]	validation_0-mlogloss:0.57736
[26]	validation_0-mlogloss:0.57814
[27]	validation_0-mlogloss:0.57804
[28]	validation_0-mlogloss:0.5

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


[52]	validation_0-mlogloss:0.58303
[53]	validation_0-mlogloss:0.58342
[54]	validation_0-mlogloss:0.58456
[55]	validation_0-mlogloss:0.58463
[56]	validation_0-mlogloss:0.58462
[57]	validation_0-mlogloss:0.58461
[58]	validation_0-mlogloss:0.58462
[59]	validation_0-mlogloss:0.58463
[60]	validation_0-mlogloss:0.58462
[61]	validation_0-mlogloss:0.58708
[62]	validation_0-mlogloss:0.58712
[63]	validation_0-mlogloss:0.58713
[64]	validation_0-mlogloss:0.58722
[65]	validation_0-mlogloss:0.58726
[66]	validation_0-mlogloss:0.58706
[67]	validation_0-mlogloss:0.58619
[68]	validation_0-mlogloss:0.58621
[69]	validation_0-mlogloss:0.58619
[70]	validation_0-mlogloss:0.58648
[71]	validation_0-mlogloss:0.58576
[72]	validation_0-mlogloss:0.58575
[73]	validation_0-mlogloss:0.58592
[74]	validation_0-mlogloss:0.58592
[75]	validation_0-mlogloss:0.58558
[76]	validation_0-mlogloss:0.58492
[77]	validation_0-mlogloss:0.58490
[78]	validation_0-mlogloss:0.58487
[79]	validation_0-mlogloss:0.58489
[80]	validation_0-ml

<keras.src.callbacks.history.History at 0x1d2c68625d0>

In [6]:

# ----------------------------
# Save models and preprocessing objects
# ----------------------------
joblib.dump(forest_model, 'forest_model.pkl')
joblib.dump(xgb_model, 'xgb_model.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')
joblib.dump(scaler_tabular, 'scaler_tabular.pkl')
joblib.dump(scaler_long, 'scaler_long.pkl')
neural_model.save('neural_model.keras')