In [6]:
import os
import joblib
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [7]:
BASE_DIR = os.getcwd()

MODEL_SAVE_PATH  = os.path.join(BASE_DIR, 'models', 'fall_detection_rf.pkl')
SCALER_SAVE_PATH = os.path.join(BASE_DIR, 'models', 'scaler.pkl')

In [8]:
def load_and_prep_data(train_path, test_path):
    print("üìÇ Loading data...")

    train_data = pd.read_csv(train_path)
    test_data  = pd.read_csv(test_path)

    # Drop NaN
    train_data = train_data.dropna().reset_index(drop=True)
    test_data  = test_data.dropna().reset_index(drop=True)

    # Safety check
    assert train_data.isna().sum().sum() == 0
    assert test_data.isna().sum().sum() == 0

    X_train = train_data.drop(columns='label')
    y_train = train_data['label']

    X_test = test_data.drop(columns='label')
    y_test = test_data['label']

    return X_train, y_train, X_test, y_test


In [9]:
def train_process(X_train, y_train):
    print("‚öôÔ∏è Scaling data...")
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)

    print("üå≤ Training Random Forest...")
    param_grid = {
        'n_estimators': [200, 400, 600],
        'max_depth': [10, 30, None],
        'min_samples_split': [2, 5],
        'min_samples_leaf': [1, 2],
        'bootstrap': [True, False]
    }

    rf = RandomForestClassifier(random_state=42)

    rf_random = RandomizedSearchCV(
        rf,
        param_distributions=param_grid,
        n_iter=20,
        cv=5,
        verbose=2,
        n_jobs=-1,
        random_state=42
    )

    rf_random.fit(X_train_scaled, y_train)

    print(" Best params:", rf_random.best_params_)

    return rf_random.best_estimator_, scaler


In [10]:
def evaluate_model(model, scaler, X_test, y_test):
    print("\nüìä Evaluating model...")

    X_test_scaled = scaler.transform(X_test)
    y_pred = model.predict(X_test_scaled)

    print(f"Accuracy: {accuracy_score(y_test, y_pred):.2%}")
    print("\nConfusion Matrix:")
    print(confusion_matrix(y_test, y_pred))

    print("\nClassification Report:")
    print(classification_report(y_test, y_pred))


In [11]:
def save_artifacts(model, scaler):
    os.makedirs(os.path.dirname(MODEL_SAVE_PATH), exist_ok=True)

    joblib.dump(model, MODEL_SAVE_PATH)
    joblib.dump(scaler, SCALER_SAVE_PATH)

    print("üíæ Model & Scaler saved.")


In [12]:
TRAIN_PATH = r"E:\Data_science\extracted_feature_for_datazip\processed_test_data\Train.csv"
TEST_PATH = r"E:\Data_science\extracted_feature_for_datazip\processed_test_data\Test.csv"

X_train, y_train, X_test, y_test = load_and_prep_data(TRAIN_PATH, TEST_PATH)
model, scaler = train_process(X_train, y_train)
evaluate_model(model, scaler, X_test, y_test)
save_artifacts(model, scaler)



üìÇ Loading data...
‚öôÔ∏è Scaling data...
üå≤ Training Random Forest...
Fitting 5 folds for each of 20 candidates, totalling 100 fits
 Best params: {'n_estimators': 400, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_depth': 30, 'bootstrap': False}

üìä Evaluating model...
Accuracy: 99.26%

Confusion Matrix:
[[1442    5]
 [   6   39]]

Classification Report:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1447
           1       0.89      0.87      0.88        45

    accuracy                           0.99      1492
   macro avg       0.94      0.93      0.94      1492
weighted avg       0.99      0.99      0.99      1492

üíæ Model & Scaler saved.
