In [2]:
!pip install imbalanced-learn
!pip install librosa





[notice] A new release of pip is available: 23.0.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 23.0.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip





In [3]:
import librosa  
import os
import numpy as np

DATADIR = r'C:\Users\Abedi\OneDrive - Student Ambassadors\archive (7)\combined_datasets'
CATEGORIES = ['fall', 'not fall']  

def create_training_data():
    training_data = []
    for category in CATEGORIES:
        path = os.path.join(DATADIR, category)
        class_num = CATEGORIES.index(category)
        
        for audio_file in os.listdir(path):
            audio_path = os.path.join(path, audio_file)
            
            audio_array, sr = librosa.load(audio_path, sr=22050, duration=30)  # 
            
            
            time_features = extract_time_domain_features(audio_array)
            freq_features = extract_frequency_domain_features(audio_array)
            impact_features = extract_impact_features(audio_array, sr)
            
            combined_features = time_features + freq_features + impact_features
            training_data.append([combined_features, class_num])
    
    return training_data


In [4]:

def extract_impact_features(audio_array, sr):
    onset_env = librosa.onset.onset_strength(y=audio_array, sr=sr)
    return [np.max(onset_env), np.mean(onset_env)]


In [5]:
DATADIR = r'c:\Users\Abedi\OneDrive - Student Ambassadors\archive (7)\combined_datasets' 
  
CATEGORIES = ['fall', 'not fall']  

In [6]:
import librosa
import numpy as np
import os
from scipy.stats import skew, kurtosis
from scipy.signal import find_peaks

def extract_time_domain_features(audio_array):
    features = [
        np.mean(audio_array),          
        np.median(audio_array),         
        np.var(audio_array),            
        np.std(audio_array),            
        skew(audio_array),              
        kurtosis(audio_array),          
        librosa.feature.zero_crossing_rate(audio_array).mean(),  
        len(librosa.onset.onset_detect(y=audio_array))  
    ]
    print(f"Time features extracted: {len(features)}")  
    return features

def extract_frequency_domain_features(audio_array, sr):
    features = [
        np.mean(np.abs(librosa.stft(audio_array))),     
        librosa.feature.spectral_centroid(y=audio_array, sr=sr).mean(),  
        librosa.feature.spectral_bandwidth(y=audio_array, sr=sr).mean(),  
        librosa.feature.spectral_rolloff(y=audio_array, sr=sr).mean(),    
        librosa.feature.rms(y=audio_array).mean()       
    ]
    print(f"Frequency features extracted: {len(features)}")  
    return features



def extract_impact_features(audio_array, sr=22050):
    onset_env = librosa.onset.onset_strength(y=audio_array, sr=sr)
    return [
        np.max(onset_env), 
        np.mean(onset_env) 
    ]

def create_training_data():
    training_data = []
    for category in CATEGORIES:
        path = os.path.join(DATADIR, category)
        
        
        if not os.path.exists(path):
            print(f"⚠️ Missing directory: {path}")
            continue
            
        class_num = CATEGORIES.index(category)
        audio_files = [f for f in os.listdir(path) if f.endswith('.wav')]
        
        
        if not audio_files:
            print(f"⚠️ No WAV files in {path}")
            continue
            
        for audio_file in audio_files:
            audio_path = os.path.join(path, audio_file)
            try:
                audio_array, sr = librosa.load(audio_path, sr=22050, duration=30)
                features = extract_time_domain_features(audio_array) + extract_frequency_domain_features(audio_array, sr)
                
                
                if len(features) != 13:  
                    print(f"⚠️ Invalid features for {audio_file}: {len(features)}")
                    continue
                    
                training_data.append([features, class_num])
                
            except Exception as e:
                print(f"❌ Error processing {audio_file}: {str(e)}")
                
    return training_data



DATADIR = r'C:\Users\Abedi\OneDrive - Student Ambassadors\archive (7)\combined_datasets'
CATEGORIES = ['fall', 'not fall']

training_data = create_training_data()


Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted:

In [7]:
from sklearn.model_selection import train_test_split  
from sklearn.preprocessing import StandardScaler
import os
import librosa
import numpy as np
from scipy.stats import skew, kurtosis

DATADIR = r'C:\Users\Abedi\OneDrive - Student Ambassadors\archive (7)\combined_datasets'
CATEGORIES = ['fall', 'not fall']


training_data = create_training_data()
assert len(training_data) > 0, "No data loaded! Check paths and files."

X = [features for features, _ in training_data]
y = [label for _, label in training_data]

max_length = max(len(f) for f in X)
X_padded = np.array([np.pad(f, (0, max_length - len(f))) for f in X])

X_train, X_test, y_train, y_test = train_test_split(
    X_padded, y, 
    test_size=0.2, 
    random_state=42,
    stratify=y
)



scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


from imblearn.over_sampling import SMOTE
smote = SMOTE(random_state=42)
X_train_res, y_train_res = smote.fit_resample(X_train_scaled, y_train)


Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted: 5
Time features extracted: 8
Frequency features extracted:

In [8]:
def extract_sequential_features(audio_array, sr=22050, n_mfcc=13):
    mfccs = librosa.feature.mfcc(y=audio_array, sr=sr, n_mfcc=n_mfcc)
    return np.mean(mfccs.T, axis=0)


In [9]:
!pip install xgboost
!pip install scikit-learn






[notice] A new release of pip is available: 23.0.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 23.0.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [10]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import recall_score, precision_score, f1_score

classifiers = {
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'XGBoost': XGBClassifier(),
    'SVC': SVC(probability=True),
    'GaussianNB': GaussianNB(),
    'Random Forest': RandomForestClassifier()
}

def evaluate_fall_detector(clf, X_test, y_test):
    y_pred = clf.predict(X_test)
    return {
        'recall': recall_score(y_test, y_pred, pos_label=1),
        'precision': precision_score(y_test, y_pred, pos_label=1),
        'f1': f1_score(y_test, y_pred, pos_label=1)
    }


for name, clf in classifiers.items():
    clf.fit(X_train_scaled, y_train) 
    metrics = evaluate_fall_detector(clf, X_test_scaled, y_test)
    print(f"\n{name} Performance:")
    print(f"Recall: {metrics['recall']:.2f}")
    print(f"Precision: {metrics['precision']:.2f}")
    print(f"F1 Score: {metrics['f1']:.2f}")



Logistic Regression Performance:
Recall: 0.43
Precision: 0.38
F1 Score: 0.40

XGBoost Performance:
Recall: 0.57
Precision: 0.50
F1 Score: 0.53

SVC Performance:
Recall: 0.29
Precision: 0.50
F1 Score: 0.36

GaussianNB Performance:
Recall: 1.00
Precision: 0.54
F1 Score: 0.70

Random Forest Performance:
Recall: 0.86
Precision: 0.86
F1 Score: 0.86


In [11]:
from sklearn.metrics import recall_score, precision_score
from sklearn.linear_model import LogisticRegression  

clf = LogisticRegression().fit(X_train_scaled, y_train)

y_pred = clf.predict(X_test_scaled)  

print(f"Fall Recall: {recall_score(y_test, y_pred, pos_label=1):.2f}")
print(f"Fall Precision: {precision_score(y_test, y_pred, pos_label=1):.2f}")


Fall Recall: 0.43
Fall Precision: 0.38


In [12]:
from sklearn.metrics import recall_score, precision_score

print(f"Fall Recall: {recall_score(y_test, y_pred, pos_label=1)}")
print(f"Fall Precision: {precision_score(y_test, y_pred, pos_label=1)}")


Fall Recall: 0.42857142857142855
Fall Precision: 0.375


In [13]:
!pip install ipywidgets
!jupyter nbextension enable --py widgetsnbextension





[notice] A new release of pip is available: 23.0.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip
usage: jupyter [-h] [--version] [--config-dir] [--data-dir] [--runtime-dir]
               [--paths] [--json] [--debug]
               [subcommand]

Jupyter: Interactive Computing

positional arguments:
  subcommand     the subcommand to launch

options:
  -h, --help     show this help message and exit
  --version      show the versions of core jupyter packages and exit
  --config-dir   show Jupyter config dir
  --data-dir     show Jupyter data dir
  --runtime-dir  show Jupyter runtime dir
  --paths        show all Jupyter paths. Add --json for machine-readable
                 format.
  --json         output paths as machine-readable json
  --debug        output debug information about paths

Available subcommands: console dejavu events execute kernel kernelspec lab
labextension labhub migrate nbconvert notebook run server troubleshoot trust

Jupyter command `ju

In [14]:
!pip install matplotlib




[notice] A new release of pip is available: 23.0.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [15]:
assert 'X_train_scaled' in globals(), "Run data preprocessing first!"
print(f"X_train_scaled shape: {X_train_scaled.shape}")

feature_names = [
    
    'mean_amp', 'median_amp', 'variance', 'std_dev',
    'skewness', 'kurtosis', 'zero_cross_rate', 'onset_count',
    
    
    'stft_mean', 'spectral_centroid', 'spectral_bandwidth',
    'spectral_rolloff', 'rms_energy'
]

assert len(feature_names) == X_train_scaled.shape[1], (
    f"Feature mismatch! Data: {X_train_scaled.shape[1]} vs Names: {len(feature_names)}"
)


X_train_scaled shape: (124, 13)


In [16]:
pip install --matplotlib

Note: you may need to restart the kernel to use updated packages.



Usage:   
  c:\Users\Abedi\Human-Analytics-Research-Codes\tf_env\Scripts\python.exe -m pip install [options] <requirement specifier> [package-index-options] ...
  c:\Users\Abedi\Human-Analytics-Research-Codes\tf_env\Scripts\python.exe -m pip install [options] -r <requirements file> [package-index-options] ...
  c:\Users\Abedi\Human-Analytics-Research-Codes\tf_env\Scripts\python.exe -m pip install [options] [-e] <vcs project url> ...
  c:\Users\Abedi\Human-Analytics-Research-Codes\tf_env\Scripts\python.exe -m pip install [options] [-e] <local project path> ...
  c:\Users\Abedi\Human-Analytics-Research-Codes\tf_env\Scripts\python.exe -m pip install [options] <archive url/path> ...

no such option: --matplotlib


In [None]:
from sklearn.inspection import permutation_importance
import matplotlib.pyplot as plt
import numpy as np

xgb_model = XGBClassifier(scale_pos_weight=3.5, eval_metric='logloss')
xgb_model.fit(X_train_scaled, y_train)

result = permutation_importance(
    xgb_model, 
    X_test_scaled, 
    y_test,
    n_repeats=10,
    random_state=42,
    n_jobs=-1
)

plt.figure(figsize=(12, 8), dpi=100)
sorted_idx = result.importances_mean.argsort()
pos = np.arange(len(sorted_idx)) + .5

plt.barh(pos, result.importances_mean[sorted_idx], align='center')
plt.yticks(pos, np.array(feature_names)[sorted_idx])
plt.xlabel("Permutation Importance Score")
plt.title("Feature Importance Ranking")
plt.grid(axis='x', linestyle='--', alpha=0.7)

plt.tight_layout()
plt.savefig('feature_importance.png')
plt.close()

In [None]:
!pip uninstall numpy shap -y
!pip install numpy==1.26.4  
!pip install shap==0.44.0


Found existing installation: numpy 2.1.3
Uninstalling numpy-2.1.3:
  Successfully uninstalled numpy-2.1.3
Found existing installation: shap 0.46.0
Uninstalling shap-0.46.0:
  Successfully uninstalled shap-0.46.0
Collecting numpy==1.26.4
  Using cached numpy-1.26.4-cp310-cp310-win_amd64.whl (15.8 MB)
Installing collected packages: numpy
Successfully installed numpy-1.26.4



[notice] A new release of pip is available: 23.0.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting shap==0.44.0
  Using cached shap-0.44.0-cp310-cp310-win_amd64.whl (447 kB)
Collecting slicer==0.0.7
  Using cached slicer-0.0.7-py3-none-any.whl (14 kB)
Installing collected packages: slicer, shap
  Attempting uninstall: slicer
    Found existing installation: slicer 0.0.8
    Uninstalling slicer-0.0.8:
      Successfully uninstalled slicer-0.0.8
Successfully installed shap-0.44.0 slicer-0.0.7



[notice] A new release of pip is available: 23.0.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [20]:
import joblib

best_model = classifiers['XGBoost']

joblib.dump(best_model, 'fall_detection_model.pkl')

loaded_model = joblib.load('fall_detection_model.pkl')


In [None]:

from sklearn.model_selection import train_test_split
import numpy as np

X_train, X_test, y_train, y_test = train_test_split(
    X, 
    y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

assert 'y_train' in locals(), 
y_train = np.array(y_train)  

print("\nClass Distribution Analysis:")
print(f"Total samples: {len(y_train)}")
print(f"Fall samples (0): {np.sum(y_train == 0)}")
print(f"Non-fall samples (1): {np.sum(y_train == 1)}")
print(f"Class ratio: {np.sum(y_train == 0)/len(y_train):.2f} : {np.sum(y_train == 1)/len(y_train):.2f}")

class_weights = {
    0: len(y_train)/(2*np.sum(y_train == 0)),  # Fall class weight
    1: len(y_train)/(2*np.sum(y_train == 1))   # Non-fall weight
}

best_model = classifiers['XGBoost'].fit(
    X_train_scaled,
    y_train,
    sample_weight=np.array([class_weights[label] for label in y_train])
)



Class Distribution Analysis:
Total samples: 124
Fall samples (0): 97
Non-fall samples (1): 27
Class ratio: 0.78 : 0.22


In [25]:
import numpy as np
from sklearn.model_selection import TimeSeriesSplit

X_arr = np.array(X_padded)

tscv = TimeSeriesSplit(n_splits=5)
for train_index, test_index in tscv.split(X_arr):
    X_train_ts, X_test_ts = X_arr[train_index], X_arr[test_index]
    y_train_ts, y_test_ts = np.array(y)[train_index], np.array(y)[test_index]


In [26]:
print("Class Distribution:")
print(f"Total samples: {len(y_train)}")
print(f"Fall samples (Class 0): {sum(y_train == 0)} ({np.mean(y_train == 0)*100:.1f}%)")
print(f"Non-fall samples (Class 1): {sum(y_train == 1)} ({np.mean(y_train == 1)*100:.1f}%)")


Class Distribution:
Total samples: 124
Fall samples (Class 0): 97 (78.2%)
Non-fall samples (Class 1): 27 (21.8%)


In [31]:
from sklearn.utils.class_weight import compute_class_weight

classes = np.unique(y_train)
class_weights = compute_class_weight('balanced', classes=classes, y=y_train)
class_weight_dict = dict(zip(classes, class_weights))

best_model =  XGBClassifier(
    scale_pos_weight=3.5,
    max_depth=5,
    learning_rate=0.1,
    subsample=0.8,
    eval_metric='logloss'
).fit(X_train_scaled, y_train)


In [32]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(sampling_strategy=0.5, random_state=42)  
X_res, y_res = smote.fit_resample(X_train_scaled, y_train)



In [33]:
class FocalXGBoost(XGBClassifier):
    def _objective(self, preds, dtrain):
        labels = dtrain.get_label()
        grad = (preds - labels) * ((1 - preds) ** 2)  
        hess = (preds * (1 - preds)) * ((1 - preds) ** 2)
        return grad, hess

focal_model = FocalXGBoost().fit(X_train_scaled, y_train)


In [34]:
class_weights = len(y_train) / (2 * np.bincount(y_train))
sample_weights = np.array([class_weights[label] for label in y_train])

model = LogisticRegression(class_weight='balanced').fit(X_train_scaled, y_train)


In [35]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=5)
for train_idx, val_idx in skf.split(X_train_scaled, y_train):
    X_fold, X_val = X_train_scaled[train_idx], X_train_scaled[val_idx]
    y_fold, y_val = y_train[train_idx], y_train[val_idx]


In [36]:
from sklearn.metrics import balanced_accuracy_score, classification_report

y_pred = model.predict(X_test_scaled)
print("Balanced Accuracy:", balanced_accuracy_score(y_test, y_pred)) 
print(classification_report(y_test, y_pred, target_names=['Fall', 'Non-fall']))


Balanced Accuracy: 0.7619047619047619
              precision    recall  f1-score   support

        Fall       0.94      0.67      0.78        24
    Non-fall       0.43      0.86      0.57         7

    accuracy                           0.71        31
   macro avg       0.68      0.76      0.68        31
weighted avg       0.83      0.71      0.73        31



In [37]:
fall_weight = len(y_train)/(2 * np.sum(y_train == 0))  
non_fall_weight = len(y_train)/(2 * np.sum(y_train == 1))  

model = XGBClassifier(
    scale_pos_weight=non_fall_weight/fall_weight,  
    eval_metric='logloss'
).fit(X_train_scaled, y_train)


In [38]:
!pip install xgboost scikit-learn

import numpy as np
from sklearn.metrics import precision_recall_curve
from xgboost import XGBClassifier

def optimize_threshold(model, X_test, y_test, target_precision=0.7):
    y_probs = model.predict_proba(X_test)[:, 1]
    precisions, recalls, thresholds = precision_recall_curve(y_test, y_probs)
    
    
    valid_thresholds = thresholds[:len(precisions)-1]
    
    
    meets_precision = np.where(precisions[:-1] >= target_precision)[0]
    
    if meets_precision.size > 0:
        optimal_idx = meets_precision[0]
        optimal_threshold = valid_thresholds[optimal_idx]
        print(f"Optimal threshold: {optimal_threshold:.2f} (Index: {optimal_idx})")
        return (y_probs >= optimal_threshold).astype(int)
    else:
        print("No threshold meets precision target. Using default 0.5")
        return (y_probs >= 0.5).astype(int)

model = XGBClassifier().fit(X_train_scaled, y_train)  
y_pred_optimized = optimize_threshold(model, X_test_scaled, y_test)

print("\nOptimized Classification Report:")
print(classification_report(y_test, y_pred_optimized))


No threshold meets precision target. Using default 0.5

Optimized Classification Report:
              precision    recall  f1-score   support

           0       0.87      0.83      0.85        24
           1       0.50      0.57      0.53         7

    accuracy                           0.77        31
   macro avg       0.68      0.70      0.69        31
weighted avg       0.79      0.77      0.78        31




[notice] A new release of pip is available: 23.0.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [39]:
import numpy as np

def add_impact_features(X, accel_indices=[0, 1, 2, 3]):

    try:
        if X.ndim != 2:
            raise ValueError("Input X must be 2-dimensional")
            
        n_samples, n_features = X.shape
        
  
        accel_deriv = np.diff(X[:, accel_indices], axis=0)
        accel_deriv = np.vstack([accel_deriv[0,:], accel_deriv])  # Pad first row
        
        
        if 7 >= n_features:
            raise IndexError("Column index 7 exceeds feature dimensions")
        resultant_force = np.sqrt(X[:, 3]**2 + X[:, 7]**2)[:, np.newaxis]
        
        energy = np.sum(X[:, accel_indices]**2, axis=1)[:, np.newaxis]
        
        return np.hstack([X, accel_deriv, resultant_force, energy])
        
    except Exception as e:
        print(f"Feature engineering failed: {str(e)}")
        return X  

X_enhanced = add_impact_features(X_train_scaled)
print("Original shape:", X_train_scaled.shape)
print("Enhanced shape:", X_enhanced.shape)


Original shape: (124, 13)
Enhanced shape: (124, 19)


In [None]:
pip install --upgrade scikit-learn xgboost


Collecting scikit-learn
  Using cached scikit_learn-1.6.1-cp310-cp310-win_amd64.whl (11.1 MB)
Installing collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.5.2
    Uninstalling scikit-learn-1.5.2:
      Successfully uninstalled scikit-learn-1.5.2
Successfully installed scikit-learn-1.6.1
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.0.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
pip install scikit-learn




[notice] A new release of pip is available: 23.0.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip





In [51]:
pip install --upgrade xgboost scikit-learn


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.0.1 -> 25.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [53]:
from xgboost import XGBClassifier
from sklearn.base import BaseEstimator, ClassifierMixin

class SklearnXGBClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, **kwargs):
        self.kwargs = kwargs
        self.model = XGBClassifier(**kwargs)
    
    def fit(self, X, y, **kwargs):
        self.model.fit(X, y, **kwargs)
        return self
    
    def predict(self, X):
        return self.model.predict(X)
    
    def predict_proba(self, X):
        return self.model.predict_proba(X)
    
    def __sklearn_tags__(self):
        
        return {
            'binary_only': False,
            'requires_fit': True
        }

model = SklearnXGBClassifier(
    scale_pos_weight=3.5,
    random_state=42,
    eval_metric='logloss'
)
model.fit(X_train_scaled, y_train);


In [84]:
from sklearn.inspection import permutation_importance
import matplotlib.pyplot as plt
import numpy as np

xgb_model = XGBClassifier(scale_pos_weight=3.5, eval_metric='logloss')
xgb_model.fit(X_train_scaled, y_train)

result = permutation_importance(
    xgb_model, 
    X_test_scaled, 
    y_test,
    n_repeats=10,
    random_state=42,
    n_jobs=-1
)

plt.figure(figsize=(12, 8), dpi=100)
sorted_idx = result.importances_mean.argsort()
pos = np.arange(len(sorted_idx)) + .5

plt.barh(pos, result.importances_mean[sorted_idx], align='center')
plt.yticks(pos, np.array(feature_names)[sorted_idx])
plt.xlabel("Permutation Importance Score")
plt.title("Feature Importance Ranking")
plt.grid(axis='x', linestyle='--', alpha=0.7)

plt.tight_layout()
plt.savefig('feature_importance.png')
plt.close()

In [63]:
from sklearn.base import BaseEstimator, ClassifierMixin
import xgboost as xgb

class SklearnXGBClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, **kwargs):
        self.kwargs = kwargs
        self.model = xgb.XGBClassifier(**kwargs)

    def fit(self, X, y, **kwargs):
        self.model.fit(X, y, **kwargs)
        return self

    def predict(self, X):
        return self.model.predict(X)

    def predict_proba(self, X):
        return self.model.predict_proba(X)


In [None]:
best_model = classifiers['XGBoost'].fit(X_train_scaled, y_train)

y_probs = best_model.predict_proba(X_test_scaled)[:, 1]


print("Shape of y_probs:", y_probs.shape)
fall_threshold = 0.3  
non_fall_threshold = 0.7
if len(y_probs.shape) == 1:
    y_pred_custom = np.where(
        y_probs >= non_fall_threshold, 1,  
        np.where(1 - y_probs >= fall_threshold, 0, -1) 
    )
else:
    
    y_pred_custom = np.where(
        y_probs[:, 1] >= non_fall_threshold, 1,  
        np.where(y_probs[:, 0] >= fall_threshold, 0, -1)  
    )

print("Custom predictions shape:", y_pred_custom.shape)
print("Unique values in custom predictions:", np.unique(y_pred_custom))

from sklearn.metrics import classification_report
print("\nClassification Report for Custom Predictions:")
print(classification_report(y_test, y_pred_custom))


Shape of y_probs: (31,)
Custom predictions shape: (31,)
Unique values in custom predictions: [0 1]

Classification Report for Custom Predictions:
              precision    recall  f1-score   support

           0       0.81      0.88      0.84        24
           1       0.40      0.29      0.33         7

    accuracy                           0.74        31
   macro avg       0.60      0.58      0.59        31
weighted avg       0.72      0.74      0.73        31



Probability array shape: (31,)


In [81]:
from tensorflow.keras import backend as K

def custom_cost_function(y_true, y_pred):
    y_true = K.cast(y_true, 'float32')
    threshold = 0.5  

    steepness = 100.0  
    approx_pred = K.sigmoid(steepness * (y_pred - threshold))
    

    fp_approx = approx_pred * (1 - y_true)
    fn_approx = (1 - approx_pred) * y_true

    false_positives = K.sum(fp_approx) * 3.0
    false_negatives = K.sum(fn_approx) * 1.0

    loss = (false_positives + false_negatives) / K.cast(K.shape(y_true)[0], 'float32')
    return loss


In [82]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

lstm_model = Sequential([
    LSTM(64, input_shape=(None, X_train_scaled.shape[1]), return_sequences=True),
    Dropout(0.3),
    LSTM(32),
    Dense(1, activation='sigmoid')  
])


lstm_model.compile(
    optimizer='adam',
    loss=custom_cost_function,
    metrics=['accuracy']
)


X_train_seq = X_train_seq.astype('float32')

history = lstm_model.fit(
    X_train_seq,
    y_train[10:],  
    validation_split=0.2,
    epochs=50,
    batch_size=32,
    class_weight={0: 1.0, 1: 3.5},
    verbose=1
)


Epoch 1/50

Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
