In [1]:
import pandas as pd
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, accuracy_score

In [2]:
# Load the dataset
data = pd.read_csv('../data/final_data.csv')
data.head()

Unnamed: 0,activity_id,heart_rate,IMU_hand_temp,IMU_hand_acc_x_sc16,IMU_hand_acc_y_sc16,IMU_hand_acc_z_sc16,IMU_hand_gyro_x,IMU_hand_gyro_y,IMU_hand_gyro_z,IMU_hand_mag_x,...,IMU_ankle_temp,IMU_ankle_acc_x_sc16,IMU_ankle_acc_y_sc16,IMU_ankle_acc_z_sc16,IMU_ankle_gyro_x,IMU_ankle_gyro_y,IMU_ankle_gyro_z,IMU_ankle_mag_x,IMU_ankle_mag_y,IMU_ankle_mag_z
0,0,-0.277411,-1.324439,1.148824,0.680922,0.501724,-0.004677,-0.002266,-0.00498,-0.513054,...,-2.711549,0.050575,-0.218933,0.682796,-0.006258,0.013808,-0.002437,-1.608949,-1.764234,-3.837308
1,0,-0.277411,-1.324439,1.16111,0.592976,0.541436,-0.129991,-0.01492,-0.003975,-0.48571,...,-2.711549,0.044306,-0.223684,0.653335,0.009692,0.058704,-0.000324,-1.597148,-1.739155,-3.837105
2,0,-0.277411,-1.324439,1.16094,0.516025,0.561338,-0.179927,-0.029838,0.00251,-0.507881,...,-2.711549,0.044108,-0.228549,0.643587,-0.040245,-0.024899,-0.005733,-1.56712,-1.714462,-3.849604
3,0,-0.277411,-1.324439,1.149257,0.516206,0.580514,-0.145904,-0.02164,0.010356,-0.471262,...,-2.711549,0.03911,-0.218872,0.6826,-0.037691,0.027704,0.010081,-1.590226,-1.775264,-3.812459
4,0,-0.277411,-1.324439,1.162568,0.532876,0.629362,-0.053622,-0.060732,0.004856,-0.503634,...,-2.711549,0.056278,-0.22387,0.682953,-0.007639,-0.019348,-0.006452,-1.559669,-1.776173,-3.812702


## Logistic Regression

In [3]:
features = data.iloc[:, 1:]  # All columns except the target 'activityID'
target = data['activity_id']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42, stratify=target)

In [4]:
# Initialize and train the logistic regression model
log_reg = LogisticRegression(max_iter=1000, solver='lbfgs')
log_reg.fit(X_train, y_train)

# Make predictions
y_pred = log_reg.predict(X_test)

# Evaluate the model
print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Accuracy Score:")
print(accuracy_score(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.96      0.97     38505
           1       0.90      0.89      0.89     37038
           2       0.84      0.83      0.83     37986
           3       0.79      0.88      0.83     47752
           4       0.75      0.74      0.75     19640
           5       0.93      0.93      0.93     32920
           6       0.75      0.76      0.76     37621
           7       0.62      0.55      0.58     23443
           8       0.60      0.50      0.54     20989
           9       0.79      0.82      0.80     35071
          10       0.88      0.91      0.89     47738
          11       0.78      0.66      0.72      9872

    accuracy                           0.82    388575
   macro avg       0.80      0.79      0.79    388575
weighted avg       0.82      0.82      0.82    388575

Accuracy Score:
0.8213420832529112


In [20]:
# Perform Grid Search with CV for logisic regression

# Define hyperparameter grid
param_grid = {
    'C': [0.1, 1, 10],                    # Regularization strength
    'penalty': [None, 'l2'],            # Regularization type
    'solver': ['lbfgs'],                  # Solver supporting l2 and none penalties
    'max_iter': [1000],                   # Maximum iterations
    'class_weight': [None, 'balanced']    # Adjust weights for imbalanced classes
}

# Initialize Logistic Regression model
log_reg = LogisticRegression()

# Perform grid search with cross-validation
grid_search = GridSearchCV(estimator=log_reg, param_grid=param_grid, cv=5, scoring='accuracy', verbose=1)
grid_search.fit(X_train, y_train)

# Display the best parameters and corresponding accuracy
print("Best Parameters:", grid_search.best_params_)
print("Best Cross-Validation Accuracy:", grid_search.best_score_)

# Evaluate on test data
y_pred = grid_search.best_estimator_.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))

# Detailed evaluation report
print("Classification Report:\n", classification_report(y_test, y_pred))

Fitting 5 folds for each of 12 candidates, totalling 60 fits




Best Parameters: {'C': 10, 'class_weight': None, 'max_iter': 1000, 'penalty': 'l2', 'solver': 'lbfgs'}
Best Cross-Validation Accuracy: 0.8422911672080374
Test Accuracy: 0.8419920629387783
Classification Report:
                    precision    recall  f1-score   support

   Nordic walking       0.78      0.78      0.78     37621
 ascending stairs       0.62      0.57      0.59     23443
          cycling       0.94      0.94      0.94     32920
descending stairs       0.62      0.51      0.56     20989
          ironing       0.90      0.93      0.91     47738
            lying       0.98      0.96      0.97     38505
     rope jumping       0.79      0.69      0.73      8594
          running       0.79      0.80      0.80     19640
          sitting       0.90      0.91      0.91     37038
         standing       0.87      0.88      0.87     37986
  vacuum cleaning       0.83      0.83      0.83     35071
          walking       0.81      0.90      0.85     47752

         accuracy  