<a href="https://colab.research.google.com/github/aizatulin69/goit-ds-hw-08/blob/main/Hw8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [78]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (accuracy_score, f1_score, precision_score,
                             recall_score, balanced_accuracy_score,
                             matthews_corrcoef)
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import zipfile
import os
import numpy as np

In [19]:
print(os.path.exists("data.zip"))
print(zipfile.is_zipfile("data.zip"))

def compute_features(df):
    features = pd.DataFrame()
    for col in df.select_dtypes(include=np.number).columns:
        s = df[col]
        features[col + "_mean"] = [s.mean()]
        features[col + "_std"] = [s.std()]
        features[col + "_var"] = [s.var()]
        features[col + "_median"] = [s.median()]
        features[col + "_range"] = [s.max() - s.min()]
        features[col + "_max"] = [s.max()]
        features[col + "_min"] = [s.min()]
        features[col + "_rms"] = [np.sqrt(np.mean(s**2))]
        features[col + "_abs_mean_dev"] = [np.mean(np.abs(s - s.mean()))]
        features[col + "_energy"] = [(s**2).sum()]
        ps = np.abs(s)/np.sum(np.abs(s)) if np.sum(np.abs(s)) != 0 else np.array([1])
        features[col + "_entropy"] = [-np.sum(ps * np.log2(ps + 1e-9))]
        features[col + "_skew"] = [s.skew()]
        features[col + "_kurtosis"] = [s.kurtosis()]
        features[col + "_iqr"] = [s.quantile(0.75) - s.quantile(0.25)]
    return features

def df_from_zip_folder(folder_name, zip_path="data.zip"):
    dfs = []
    with zipfile.ZipFile(zip_path) as z:
        for name in z.namelist():
            if name.startswith(folder_name + "/") and name.endswith(".csv"):
                with z.open(name) as f:
                    df = pd.read_csv(f)
                    features = compute_features(df)
                    dfs.append(features)
    return pd.concat(dfs, ignore_index=True)

df_idle = df_from_zip_folder("idle")
df_running = df_from_zip_folder("running")
df_stairs = df_from_zip_folder("stairs")
df_walking = df_from_zip_folder("walking")



True
True


In [57]:
df_idle['label'] = 'idle'
df_running['label'] = 'running'
df_stairs['label'] = 'stairs'
df_walking['label'] = 'walking'
df_all = pd.concat([df_idle, df_running, df_stairs, df_walking], ignore_index=True)
X = df_all.drop(columns=['label'])
y = df_all['label']
mean_cols = [c for c in X.columns if c.endswith('_mean')]
scaler = StandardScaler()
X[mean_cols] = scaler.fit_transform(X[mean_cols])
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [58]:
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale')
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)

In [59]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)

In [79]:
print("SVM Accuracy:          ", accuracy_score(y_test, y_pred_svm))
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print("\nSVM F1:          ", f1_score(y_test, y_pred_svm, average='weighted'))
print("Random Forest F1:", f1_score(y_test, y_pred_rf, average='weighted'))
print("\nSVM precision:          ", precision_score(y_test, y_pred_svm, average='weighted'))
print("Random Forest precision:", precision_score(y_test, y_pred_rf, average='weighted'))
print("\nSVM balanced:          ", balanced_accuracy_score(y_test, y_pred_svm))
print("Random Forest balanced:", balanced_accuracy_score(y_test, y_pred_rf))
print("\nSVM mcc          ", matthews_corrcoef(y_test, y_pred_svm))
print("random forest mcc", matthews_corrcoef(y_test, y_pred_rf))
print("\nSVM recall          ", recall_score(y_test, y_pred_rf, average='weighted'))
print("Random forest recall", recall_score(y_test, y_pred_rf, average='weighted'))


SVM Accuracy:           0.9574632637277649
Random Forest Accuracy: 0.9992266047950503

SVM F1:           0.9456789113859556
Random Forest F1: 0.9992211774602787

SVM precision:           0.9353760040156219
Random Forest precision: 0.9992286894182442

SVM balanced:           0.7400808433066498
Random Forest balanced: 0.9924242424242424

SVM mcc           0.9314553664112425
random forest mcc 0.998739731246133

SVM recall           0.9992266047950503
Random forest recall 0.9992266047950503


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
