In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.ensemble import AdaBoostClassifier  # Import AdaBoostClassifier
from sklearn.metrics import classification_report
import joblib



In [2]:
# Data loading
def load_data(file_path):
    """Load a CSV file and return a pandas DataFrame."""
    return pd.read_csv(file_path)

# Data Preprocessing
def preprocess_data(df):
    """Preprocess the DataFrame by selecting relevant features and handling missing values."""
    # Select relevant feature columns
    feature_cols = [
        'acc_X', 'acc_Y', 'acc_Z',
        'mag_X', 'mag_Y', 'mag_Z',
        'gyro_X', 'gyro_Y', 'gyro_Z'
    ]
    selected_data = df[feature_cols]

    # Handle missing values (fill with zeros)
    selected_data = selected_data.fillna(0)

    # Extract the target label column
    labels = df['activity']

    return selected_data, labels


In [3]:
# Data Normalization
def normalize_data(data):
    """Normalize the data using MinMaxScaler."""
    scaler = MinMaxScaler()
    normalized_data = pd.DataFrame(scaler.fit_transform(data), columns=data.columns)
    return normalized_data

# Feature extraction
def extract_features(window):
    """Extract statistical features from a window of data."""
    features = {}
    for col in window.columns:
        if len(window[col].unique()) > 1:  # Avoid constant columns
            features[f'{col}_mean'] = window[col].mean()
            features[f'{col}_std'] = window[col].std()
            features[f'{col}_min'] = window[col].min()
            features[f'{col}_max'] = window[col].max()
        else:
            # For constant columns
            features[f'{col}_mean'] = window[col].mean()
            features[f'{col}_std'] = 0
            features[f'{col}_min'] = window[col].min()
            features[f'{col}_max'] = window[col].max()
    return features

In [4]:
def create_feature_dataset(data, labels, window_size=50, step_size=25):
    """Create a dataset of features and corresponding labels from sliding windows."""
    X, y = [], []
    for start in range(0, len(data) - window_size, step_size):
        end = start + window_size
        window = data.iloc[start:end]
        label_window = labels.iloc[start:end]

        # Extract features from the window
        features = extract_features(window)
        X.append(features)

        # Assign the most frequent label in the window as the target label
        label = label_window.mode().iloc[0]  # Most frequent label
        y.append(label)

    # Convert to DataFrame and Series
    X = pd.DataFrame(X)
    y = pd.Series(y)

    return X, y

In [5]:
# Model training with AdaBoost
def train_model(X, y):
    """Train an AdaBoost model."""
    # Encode categorical labels into numerical format
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

    # Initialize and train the AdaBoost model
    clf = AdaBoostClassifier(n_estimators=100, random_state=42)  # Use AdaBoostClassifier
    clf.fit(X_train, y_train)

    # Evaluate the model
    y_pred = clf.predict(X_test)
    print("\nClassification Report:\n")
    print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

    return clf, label_encoder

In [6]:
# Balancing Data using SMOTE
def balance_data(X, y):
    """Balance the dataset using SMOTE."""
    smote = SMOTE(random_state=42)
    X_balanced, y_balanced = smote.fit_resample(X, y)
    return X_balanced, y_balanced

In [7]:
# Save the model
def save_model(model, label_encoder, output_path):
    """Save the trained model and label encoder to files."""
    joblib.dump(model, output_path)
    joblib.dump(label_encoder, output_path.replace('.pkl', '_label_encoder.pkl'))
    print(f"Model saved as '{output_path}'")
    print(f"Label encoder saved as '{output_path.replace('.pkl', '_label_encoder.pkl')}'")


In [8]:
# Main workflow
def main(file_path):
    # Load and preprocess data
    print("Loading data...")
    df = load_data(file_path)
    print("Data loaded successfully.")

    # Display unique activity labels
    unique_activities = df['activity'].unique()
    print("Unique activity labels:", unique_activities)

    data, labels = preprocess_data(df)
    print("Data preprocessed successfully.")

    # Normalize data
    print("Normalizing data...")
    data_normalized = normalize_data(data)
    print("Data normalized successfully.")

    # Balance data
    print("Balancing data using SMOTE...")
    X_balanced, y_balanced = balance_data(data_normalized, labels)
    print("Data balanced successfully.")

    # Extract features and labels
    print("Extracting features...")
    X, y = create_feature_dataset(X_balanced, y_balanced)
    print("Feature extraction completed.")

    # Train the model
    print("Training the model...")
    model_adaboost, label_encoder = train_model(X, y)

    # Save the model
    save_model(model_adaboost, label_encoder, 'movement_detection_adaboost_model.pkl')

    return model_adaboost, label_encoder



In [9]:
# Run the main workflow
main('/content/dataset_tot1.csv')

Loading data...
Data loaded successfully.
Unique activity labels: ['downstairs' 'running' 'standing' 'upstairs' 'walking']
Data preprocessed successfully.
Normalizing data...
Data normalized successfully.
Balancing data using SMOTE...
Data balanced successfully.
Extracting features...
Feature extraction completed.
Training the model...

Classification Report:

              precision    recall  f1-score   support

  downstairs       0.93      0.85      0.89      3785
     running       0.95      0.89      0.92      3614
    standing       0.95      0.94      0.94      3722
    upstairs       0.92      0.82      0.87      3677
     walking       0.71      0.90      0.79      3780

    accuracy                           0.88     18578
   macro avg       0.89      0.88      0.88     18578
weighted avg       0.89      0.88      0.88     18578

Model saved as 'movement_detection_adaboost_model.pkl'
Label encoder saved as 'movement_detection_adaboost_model_label_encoder.pkl'


(AdaBoostClassifier(n_estimators=100, random_state=42), LabelEncoder())