In [None]:
!unzip /content/sample_data/homework.zip

In [3]:
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

def load_data_from_directory(directory):
    data_frames = []
    for activity in ['idle', 'running', 'stairs', 'walking']:
        activity_path = os.path.join(directory, activity)
        for file in os.listdir(activity_path):
            if file.endswith('.csv'):
                file_path = os.path.join(activity_path, file)
                df = pd.read_csv(file_path)
                df['Activity'] = activity
                data_frames.append(df)
    return pd.concat(data_frames, ignore_index=True)

path = '/content/data'
data = load_data_from_directory(path)

data['mean_x'] = data['accelerometer_X'].rolling(window=50).mean()
data['mean_y'] = data['accelerometer_Y'].rolling(window=50).mean()
data['mean_z'] = data['accelerometer_Z'].rolling(window=50).mean()

feature_sets = {
    "Base features": ['accelerometer_X', 'accelerometer_Y', 'accelerometer_Z'],
    "Base + Rolling Means": ['accelerometer_X', 'accelerometer_Y', 'accelerometer_Z', 'mean_x', 'mean_y', 'mean_z']
}

results = {}

for feature_name, feature_columns in feature_sets.items():
    print(f"\n=== {feature_name} ===")
    features = data[feature_columns].dropna()
    target = data.loc[features.index, 'Activity']

    X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    svm = SVC(kernel='linear')
    svm.fit(X_train, y_train)
    y_pred_svm = svm.predict(X_test)
    print("\nSVM Classification Report:")
    print(classification_report(y_test, y_pred_svm))

    rf = RandomForestClassifier(n_estimators=100)
    rf.fit(X_train, y_train)
    y_pred_rf = rf.predict(X_test)
    print("\nRandom Forest Classification Report:")
    print(classification_report(y_test, y_pred_rf))

    results[feature_name] = {
        "SVM": classification_report(y_test, y_pred_svm, output_dict=True),
        "Random Forest": classification_report(y_test, y_pred_rf, output_dict=True)
    }

print("\n=== Порівняння моделей ===")
for feature_name, result in results.items():
    print(f"\n=== {feature_name} ===")
    print(f"SVM Accuracy: {result['SVM']['accuracy']:.2f}")
    print(f"Random Forest Accuracy: {result['Random Forest']['accuracy']:.2f}")



=== Base features ===

SVM Classification Report:


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

        idle       0.81      0.78      0.80      6203
     running       0.86      0.85      0.85     20485
      stairs       0.00      0.00      0.00      1022
     walking       0.77      0.86      0.81     11062

    accuracy                           0.82     38772
   macro avg       0.61      0.62      0.62     38772
weighted avg       0.80      0.82      0.81     38772


Random Forest Classification Report:
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00      6203
     running       1.00      1.00      1.00     20485
      stairs       1.00      0.99      0.99      1022
     walking       1.00      1.00      1.00     11062

    accuracy                           1.00     38772
   macro avg       1.00      1.00      1.00     38772
weighted avg       1.00      1.00      1.00     38772



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))



=== Base + Rolling Means ===

SVM Classification Report:
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00      6191
     running       1.00      1.00      1.00     20426
      stairs       0.81      0.64      0.72       981
     walking       0.97      0.99      0.98     11165

    accuracy                           0.99     38763
   macro avg       0.95      0.91      0.92     38763
weighted avg       0.99      0.99      0.99     38763


Random Forest Classification Report:
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00      6191
     running       1.00      1.00      1.00     20426
      stairs       0.88      0.74      0.80       981
     walking       0.98      0.99      0.98     11165

    accuracy                           0.99     38763
   macro avg       0.96      0.93      0.95     38763
weighted avg       0.99      0.99      0.99     38763


=== Порівняння моделей ===

=== B

In [4]:
print(data.columns)

Index(['accelerometer_X', 'accelerometer_Y', 'accelerometer_Z', 'Activity',
       'mean_x', 'mean_y', 'mean_z'],
      dtype='object')
