In [8]:
# --- IMPORTS ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import regularizers

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, precision_score, recall_score, f1_score

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD

url = r"C:\Users\acts5\OneDrive\Test Data Files\cleaned_posture_dataset.csv"
df = pd.read_csv(url)


In [7]:
# --- LOGISTIC REGRESSION ---

# Separate features (X) and labels (y)
X = df.drop(columns=['bin_label'])
y = df['bin_label']

# Train-test split (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Initialize Logistic Regression with class weights balanced
model = LogisticRegression(class_weight='balanced', max_iter=1000, solver='liblinear')

# Train the model
model.fit(X_train, y_train)

# Predict on test data
y_pred = model.predict(X_test)

# Evaluate performance
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Confusion Matrix:
[[5745 2673]
 [ 828 2576]]

Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.68      0.77      8418
           1       0.49      0.76      0.60      3404

    accuracy                           0.70     11822
   macro avg       0.68      0.72      0.68     11822
weighted avg       0.76      0.70      0.72     11822



In [9]:
# --- RANDOM FOREST CLASSIFIER ----

model = RandomForestClassifier(class_weight='balanced', n_estimators=200, random_state=42)
model.fit(X_train, y_train)


# --- Split features and labels ---
X = df.drop(columns=['bin_label'])
y = df['bin_label']

# --- Split into train/test sets ---
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# --- Train Random Forest with class weights ---
model = RandomForestClassifier(
    n_estimators=200,          # number of trees
    class_weight='balanced',   # handle class imbalance
    max_depth=None,            # let trees grow fully
    random_state=42,
    n_jobs=-1                  # use all CPU cores
)
model.fit(X_train, y_train)

# --- Evaluate performance ---
y_pred = model.predict(X_test)

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Confusion Matrix:
[[8326   92]
 [  85 3319]]

Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99      8418
           1       0.97      0.98      0.97      3404

    accuracy                           0.99     11822
   macro avg       0.98      0.98      0.98     11822
weighted avg       0.99      0.99      0.99     11822



In [10]:
# Check feature importance
importances = model.feature_importances_
sorted_features = sorted(zip(X.columns, importances), key=lambda x: x[1], reverse=True)
for name, imp in sorted_features[:15]:
    print(f"{name}: {imp:.4f}")


sensor_8: 0.0370
sensor_0: 0.0364
sensor_16: 0.0319
sensor_56: 0.0288
sensor_48: 0.0287
sensor_40: 0.0281
sensor_1: 0.0259
sensor_32: 0.0255
total_force_N: 0.0233
sensor_20: 0.0231
sensor_9: 0.0225
sensor_25: 0.0211
sensor_24: 0.0205
sensor_52: 0.0198
sensor_28: 0.0198
