In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
from sklearn.svm import SVC
import mlflow
import numpy as np

In [4]:
df = pd.read_csv('data/HAR.csv')

In [5]:
def filter_columns(data):
    # Exclude the label column and use all other columns as inputs
    X = data.drop(columns=['Activity', 'subject']).values  # Drops the 'type' column
    return X

# Assuming df is your DataFrame
X = filter_columns(df)
Y = df['Activity'].values  # Extract the label column

# Check the number of rows
print(f"Number of rows: {len(X)}")


Number of rows: 10299


In [6]:
print(np.unique(Y, return_counts=True))

(array(['LAYING', 'SITTING', 'STANDING', 'WALKING', 'WALKING_DOWNSTAIRS',
       'WALKING_UPSTAIRS'], dtype=object), array([1944, 1777, 1906, 1722, 1406, 1544], dtype=int64))


In [7]:
#Split the data and keep 20% back for testing later
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.20)
print("Train length", len(X_train))
print("Test length", len(X_test))

Train length 8239
Test length 2060


In [11]:
models = [
    (
        "Random Forest",
        RandomForestClassifier(n_estimators=100, random_state=42),
        (X_train, Y_train),
        (X_test, Y_test)
    ),
    (
        "Logistic Regression",
        LogisticRegression(),
        (X_train, Y_train),
        (X_test, Y_test)
    ),
    (
        "SVM",
        SVC(kernel='rbf', C=1.0, gamma='scale', random_state=42),
        (X_train, Y_train),
        (X_test, Y_test)
    )
]


In [12]:
reports = []

for model_name, model, train_set, test_set in models:
    X_train = train_set[0]
    Y_train = train_set[1]
    X_test = test_set[0]
    Y_test = test_set[1]

    model.fit(X_train, Y_train)
    y_pred = model.predict(X_test)
    report = classification_report(Y_test, y_pred, output_dict=True)
    reports.append(report)
    print(reports)

[{'LAYING': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 395}, 'SITTING': {'precision': 0.9787234042553191, 'recall': 0.9633507853403142, 'f1-score': 0.970976253298153, 'support': 382}, 'STANDING': {'precision': 0.9624664879356568, 'recall': 0.9782016348773842, 'f1-score': 0.9702702702702701, 'support': 367}, 'WALKING': {'precision': 0.987460815047022, 'recall': 0.987460815047022, 'f1-score': 0.987460815047022, 'support': 319}, 'WALKING_DOWNSTAIRS': {'precision': 0.9821428571428571, 'recall': 0.9615384615384616, 'f1-score': 0.9717314487632509, 'support': 286}, 'WALKING_UPSTAIRS': {'precision': 0.9684542586750788, 'recall': 0.9871382636655949, 'f1-score': 0.9777070063694268, 'support': 311}, 'accuracy': 0.9800970873786408, 'macro avg': {'precision': 0.979874637175989, 'recall': 0.9796149934114627, 'f1-score': 0.979690965624687, 'support': 2060}, 'weighted avg': {'precision': 0.9801843073246236, 'recall': 0.9800970873786408, 'f1-score': 0.9800894132409054, 'support': 206

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


[{'LAYING': {'precision': 1.0, 'recall': 1.0, 'f1-score': 1.0, 'support': 395}, 'SITTING': {'precision': 0.9787234042553191, 'recall': 0.9633507853403142, 'f1-score': 0.970976253298153, 'support': 382}, 'STANDING': {'precision': 0.9624664879356568, 'recall': 0.9782016348773842, 'f1-score': 0.9702702702702701, 'support': 367}, 'WALKING': {'precision': 0.987460815047022, 'recall': 0.987460815047022, 'f1-score': 0.987460815047022, 'support': 319}, 'WALKING_DOWNSTAIRS': {'precision': 0.9821428571428571, 'recall': 0.9615384615384616, 'f1-score': 0.9717314487632509, 'support': 286}, 'WALKING_UPSTAIRS': {'precision': 0.9684542586750788, 'recall': 0.9871382636655949, 'f1-score': 0.9777070063694268, 'support': 311}, 'accuracy': 0.9800970873786408, 'macro avg': {'precision': 0.979874637175989, 'recall': 0.9796149934114627, 'f1-score': 0.979690965624687, 'support': 2060}, 'weighted avg': {'precision': 0.9801843073246236, 'recall': 0.9800970873786408, 'f1-score': 0.9800894132409054, 'support': 206