In [1]:
import pandas as pd
%matplotlib inline
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, accuracy_score

In [2]:
# Load the dataset
data = pd.read_csv('../data/final_data.csv')
data.head()

Unnamed: 0,activityID,heart_rate,hand temperature (°C),hand acceleration X ±16g,hand acceleration Y ±16g,hand acceleration Z ±16g,hand gyroscope X,hand gyroscope Y,hand gyroscope Z,hand magnetometer X,...,ankle acceleration X ±16g,ankle acceleration Y ±16g,ankle acceleration Z ±16g,ankle gyroscope X,ankle gyroscope Y,ankle gyroscope Z,ankle magnetometer X,ankle magnetometer Y,ankle magnetometer Z,PeopleId
0,transient activities,104.0,30.0,2.37223,8.60074,3.51048,-0.092217,0.056812,-0.015845,14.6806,...,9.65918,-1.65569,-0.099797,0.0083,0.00925,-0.01758,-61.1888,-38.9599,-58.1438,1
1,transient activities,104.0,30.0,2.18837,8.5656,3.66179,-0.024413,0.047759,0.006474,14.8991,...,9.6937,-1.57902,-0.215687,-0.006577,-0.004638,0.000368,-59.8479,-38.8919,-58.5253,1
2,transient activities,104.0,30.0,2.37357,8.60107,3.54898,-0.057976,0.032574,-0.006988,14.242,...,9.58944,-1.73276,0.092914,0.003014,0.000148,0.022495,-60.7361,-39.4138,-58.3999,1
3,transient activities,104.0,30.0,2.07473,8.52853,3.66021,-0.002352,0.03281,-0.003747,14.8908,...,9.58814,-1.7704,0.054545,0.003175,-0.020301,0.011275,-60.4091,-38.7635,-58.3956,1
4,transient activities,104.0,30.0,2.22936,8.83122,3.7,0.012269,0.018305,-0.053325,15.5612,...,9.69771,-1.65625,-0.060809,0.012698,-0.014303,-0.002823,-61.5199,-39.3879,-58.2694,1


## Logistic Regression

In [None]:
features = data.iloc[:, 1:]  # All columns except the target 'activityID'
target = data['activity_id']

# Standardize the features
scaler = StandardScaler()
data_scaled = scaler.fit_transform(features)

In [18]:

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data_scaled, target, test_size=0.2, random_state=42, stratify=target)

In [19]:
# Initialize and train the logistic regression model
log_reg = LogisticRegression(max_iter=1000, solver='lbfgs')
log_reg.fit(X_train, y_train)

# Make predictions
y_pred = log_reg.predict(X_test)

# Evaluate the model
print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Accuracy Score:")
print(accuracy_score(y_test, y_pred))

Classification Report:
                   precision    recall  f1-score   support

   Nordic walking       0.78      0.78      0.78     37621
 ascending stairs       0.62      0.57      0.59     23443
          cycling       0.94      0.94      0.94     32920
descending stairs       0.62      0.51      0.56     20989
          ironing       0.90      0.93      0.91     47738
            lying       0.98      0.96      0.97     38505
     rope jumping       0.79      0.69      0.73      8594
          running       0.79      0.80      0.80     19640
          sitting       0.90      0.91      0.91     37038
         standing       0.87      0.88      0.87     37986
  vacuum cleaning       0.83      0.83      0.83     35071
          walking       0.81      0.90      0.85     47752

         accuracy                           0.84    387297
        macro avg       0.82      0.81      0.81    387297
     weighted avg       0.84      0.84      0.84    387297

Accuracy Score:
0.842033374903

In [20]:
# Perform Grid Search with CV for logisic regression

# Define hyperparameter grid
param_grid = {
    'C': [0.1, 1, 10],                    # Regularization strength
    'penalty': [None, 'l2'],            # Regularization type
    'solver': ['lbfgs'],                  # Solver supporting l2 and none penalties
    'max_iter': [1000],                   # Maximum iterations
    'class_weight': [None, 'balanced']    # Adjust weights for imbalanced classes
}

# Initialize Logistic Regression model
log_reg = LogisticRegression()

# Perform grid search with cross-validation
grid_search = GridSearchCV(estimator=log_reg, param_grid=param_grid, cv=5, scoring='accuracy', verbose=1)
grid_search.fit(X_train, y_train)

# Display the best parameters and corresponding accuracy
print("Best Parameters:", grid_search.best_params_)
print("Best Cross-Validation Accuracy:", grid_search.best_score_)

# Evaluate on test data
y_pred = grid_search.best_estimator_.predict(X_test)
print("Test Accuracy:", accuracy_score(y_test, y_pred))

# Detailed evaluation report
print("Classification Report:\n", classification_report(y_test, y_pred))

Fitting 5 folds for each of 12 candidates, totalling 60 fits




Best Parameters: {'C': 10, 'class_weight': None, 'max_iter': 1000, 'penalty': 'l2', 'solver': 'lbfgs'}
Best Cross-Validation Accuracy: 0.8422911672080374
Test Accuracy: 0.8419920629387783
Classification Report:
                    precision    recall  f1-score   support

   Nordic walking       0.78      0.78      0.78     37621
 ascending stairs       0.62      0.57      0.59     23443
          cycling       0.94      0.94      0.94     32920
descending stairs       0.62      0.51      0.56     20989
          ironing       0.90      0.93      0.91     47738
            lying       0.98      0.96      0.97     38505
     rope jumping       0.79      0.69      0.73      8594
          running       0.79      0.80      0.80     19640
          sitting       0.90      0.91      0.91     37038
         standing       0.87      0.88      0.87     37986
  vacuum cleaning       0.83      0.83      0.83     35071
          walking       0.81      0.90      0.85     47752

         accuracy  