In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
from pathlib import Path
import pandas as pd
data_directory = Path("/content/drive/MyDrive/Colab Notebooks/data")
file='/content/drive/MyDrive/Colab Notebooks/merged_dataset.csv'
try:
  merged_df = pd.read_csv(file)
except:
  merged_df = pd.DataFrame()
  for activity_folder in data_directory.iterdir():
      if activity_folder.is_dir():
          activity_name = activity_folder.name

          for file in activity_folder.iterdir():
              if file.is_file() and file.suffix == '.csv':
                  df = pd.read_csv(file)
                  df['activity'] = activity_name
                  merged_df = pd.concat([merged_df, df], ignore_index=True)
  output_path = "/content/drive/MyDrive/Colab Notebooks/merged_dataset.csv"
  merged_df.to_csv(output_path, index=False)

print(merged_df.head())
print(merged_df.shape)


   accelerometer_X  accelerometer_Y  accelerometer_Z activity
0         1.996764        -6.062118        -6.210559   stairs
1         3.438074        -7.139509         0.292093   stairs
2        12.650962         1.618480        10.716447   stairs
3         7.402872        -7.771579        -4.127603   stairs
4         2.920926        -8.571242        -7.881712   stairs
(193860, 4)


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

df = merged_df.copy()
class_distribution = df['activity'].value_counts()
print(df['activity'].value_counts())
class_distribution.describe()


running    102240
walking     55500
idle        31170
stairs       4950
Name: activity, dtype: int64


count         4.000000
mean      48465.000000
std       41367.918729
min        4950.000000
25%       24615.000000
50%       43335.000000
75%       67185.000000
max      102240.000000
Name: activity, dtype: float64

In [63]:
from sklearn.utils import resample

min_samples = 2*df['activity'].value_counts().min()

running = df[df['activity'] == 'running']
walking = df[df['activity'] == 'walking']
idle = df[df['activity'] == 'idle']
stairs = df[df['activity'] == 'stairs']

running_downsampled = resample(running, replace=False, n_samples=min_samples, random_state=42)
walking_downsampled = resample(walking, replace=False, n_samples=min_samples, random_state=42)
idle_downsampled = resample(idle, replace=False, n_samples=min_samples, random_state=42)

df_downsampled = pd.concat([running_downsampled, walking_downsampled, idle_downsampled, stairs])
df_downsampled = df_downsampled.sample(frac=1, random_state=42).reset_index(drop=True)
df_downsampled['activity'].value_counts()

9900


idle       9900
running    9900
walking    9900
stairs     4950
Name: activity, dtype: int64

In [64]:
class_weights = dict(df_downsampled['activity'].value_counts().min() / df_downsampled['activity'].value_counts())
class_weights

{'idle': 0.5, 'running': 0.5, 'walking': 0.5, 'stairs': 1.0}

In [65]:
X_train, X_test, y_train, y_test = train_test_split(df_downsampled.drop('activity', axis=1), df_downsampled['activity'], test_size=0.2, random_state=42)
print(X_train.shape, len(y_train), X_test.shape, len(y_test))

(27720, 3) 27720 (6930, 3) 6930


In [69]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC
from scipy.stats import reciprocal, uniform

param_dist = {
    'C': reciprocal(0.1, 100),
    'gamma': reciprocal(0.01, 10),
}


svm_model = SVC(kernel='rbf', class_weight=class_weights)

random_search = RandomizedSearchCV(svm_model, param_distributions=param_dist, n_iter=10, cv=5, scoring='f1_weighted', n_jobs=-1)
random_search.fit(X_train, y_train)
print("Best Parameters: ", random_search.best_params_)
print("Best Score: ", random_search.best_score_)

y_pred = random_search.predict(X_test)
print(classification_report(y_test, y_pred))


Best Parameters:  {'C': 29.756541808209167, 'gamma': 7.338340647549535}
Best Score:  0.9616588792437242
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00      1941
     running       0.93      0.99      0.96      1966
      stairs       1.00      0.99      0.99       981
     walking       0.99      0.94      0.96      2042

    accuracy                           0.98      6930
   macro avg       0.98      0.98      0.98      6930
weighted avg       0.98      0.98      0.98      6930



In [72]:
from sklearn.ensemble import RandomForestClassifier


rf_model = RandomForestClassifier(class_weight=class_weights)
rf_model.fit(X_train, y_train)

y_pred_rf = rf_model.predict(X_test)
print(classification_report(y_test, y_pred_rf))

              precision    recall  f1-score   support

        idle       1.00      1.00      1.00      1941
     running       0.99      0.96      0.97      1966
      stairs       0.99      0.99      0.99       981
     walking       0.96      0.99      0.98      2042

    accuracy                           0.98      6930
   macro avg       0.98      0.98      0.98      6930
weighted avg       0.98      0.98      0.98      6930

