## Importando dados

In [1]:
import os
import pandas as pd

base_path = 'DatasetDesafio/'

categories = ['ADL', 'Fall']
dataframes = {}

adl_counter = 1
fall_counter = 1
testeadl_counter = 1
testefall_counter = 1

for category in categories:

    category_path = os.path.join(base_path, category)

    for split in ['Train', 'Test']:

        split_path = os.path.join(category_path, split)
        
        
        for file_name in os.listdir(split_path):
            if file_name.endswith('.csv'):
                file_path = os.path.join(split_path, file_name)
                df = pd.read_csv(file_path)
                df.columns = ['X', 'Y', 'Z', 'W', 'V'] 
                
                if category == 'ADL' and split == 'Train':
                    name = f'adl{adl_counter:02d}'
                    adl_counter += 1
                elif category == 'Fall' and split == 'Train':
                    name = f'fall{fall_counter:02d}'
                    fall_counter += 1
                elif category == 'ADL' and split == 'Test':
                    name = f'testeadl{testeadl_counter:02d}'
                    testeadl_counter += 1
                elif category == 'Fall' and split == 'Test':
                    name = f'testefall{testefall_counter:02d}'
                    testefall_counter += 1
                
                dataframes[name] = df

## Extraindo features

In [2]:
import numpy as np

def extract_statistical_features(df, axis_labels=['X', 'Y', 'Z', 'W', 'V']):
    features = {}

    for axis in axis_labels:
        signal = df[axis]
        
        features[f'{axis}_mean'] = np.mean(signal)                          # Média
        features[f'{axis}_median'] = np.median(signal)                      # Mediana
        features[f'{axis}_std'] = np.std(signal)                            # Desvio padrão
        features[f'{axis}_var'] = np.var(signal)                            # Variância
        features[f'{axis}_min'] = np.min(signal)                            # Mínimo
        features[f'{axis}_max'] = np.max(signal)                            # Máximo
        features[f'{axis}_range'] = np.max(signal) - np.min(signal)         # Amplitude (Máximo - Mínimo)
        features[f'{axis}_energy'] = np.sum(signal**2)                      # Energia
        features[f'{axis}_sum'] = np.sum(signal)                            # Soma cumulativa
        features[f'{axis}_mav'] = np.mean(np.abs(signal))                   # Valor absoluto médio (MAV)
        features[f'{axis}_skewness'] = pd.Series(signal).skew()             # Skewness (Assimetria)
        features[f'{axis}_kurtosis'] = pd.Series(signal).kurtosis()         # Kurtosis (Curtose)
        features[f'{axis}_rms'] = np.sqrt(np.mean(signal**2))               # Root Mean Square (RMS)
    
    return features
    
all_features = {}

for name, df in dataframes.items():
    features = extract_statistical_features(df)
    all_features[name] = features


features_list = []
labels = []

test_features_list = []
test_labels = []

for name, features in all_features.items():

    if name.startswith('testeadl') or name.startswith('testefall'):
    
        test_features_list.append(features)
        
        if name.startswith('testeadl'):
            test_labels.append('adl')
        elif name.startswith('testefall'):
            test_labels.append('fall')

    else:
        features_list.append(features)

        if name.startswith('adl'):
            labels.append('adl')
        elif name.startswith('fall'):
            labels.append('fall')

X_train = pd.DataFrame(features_list)

Y_train = pd.Series(labels, name='Label')

X_test = pd.DataFrame(test_features_list)

Y_test = pd.Series(test_labels, name='Label')


## Classificação

In [3]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report

logistic_clf = LogisticRegression(max_iter=1000)
svm_clf = SVC()
rf_clf = RandomForestClassifier()
gb_clf = GradientBoostingClassifier()
knn_clf = KNeighborsClassifier()
ada_clf = AdaBoostClassifier()

logistic_clf.fit(X_train, Y_train)
svm_clf.fit(X_train, Y_train)
rf_clf.fit(X_train, Y_train)
gb_clf.fit(X_train, Y_train)
knn_clf.fit(X_train, Y_train)
ada_clf.fit(X_train, Y_train)

y_train_pred_logistic = logistic_clf.predict(X_train)
y_test_pred_logistic = logistic_clf.predict(X_test)

y_train_pred_svm = svm_clf.predict(X_train)
y_test_pred_svm = svm_clf.predict(X_test)

y_train_pred_rf = rf_clf.predict(X_train)
y_test_pred_rf = rf_clf.predict(X_test)

y_train_pred_gb = gb_clf.predict(X_train)
y_test_pred_gb = gb_clf.predict(X_test)

y_train_pred_knn = knn_clf.predict(X_train)
y_test_pred_knn = knn_clf.predict(X_test)

y_train_pred_ada = ada_clf.predict(X_train)
y_test_pred_ada = ada_clf.predict(X_test)

print("Logistic Regression")
print("Treinamento")
print("Accuracy:", accuracy_score(Y_train, y_train_pred_logistic))
print("Classification Report:\n", classification_report(Y_train, y_train_pred_logistic))
print("Teste")
print("Accuracy:", accuracy_score(Y_test, y_test_pred_logistic))
print("Classification Report:\n", classification_report(Y_test, y_test_pred_logistic))


print("\nSVM")
print("Treinamento")
print("Accuracy:", accuracy_score(Y_train, y_train_pred_svm))
print("Classification Report:\n", classification_report(Y_train, y_train_pred_svm))
print("Teste")
print("Accuracy:", accuracy_score(Y_test, y_test_pred_svm))
print("Classification Report:\n", classification_report(Y_test, y_test_pred_svm))


print("\nRandom Forest")
print("Treinamento")
print("Accuracy:", accuracy_score(Y_train, y_train_pred_rf))
print("Classification Report:\n", classification_report(Y_train, y_train_pred_rf))
print("Teste")
print("Accuracy:", accuracy_score(Y_test, y_test_pred_rf))
print("Classification Report:\n", classification_report(Y_test, y_test_pred_rf))


print("\nGradient Boosting")
print("Treinamento")
print("Accuracy:", accuracy_score(Y_train, y_train_pred_gb))
print("Classification Report:\n", classification_report(Y_train, y_train_pred_gb))
print("Teste")
print("Accuracy:", accuracy_score(Y_test, y_test_pred_gb))
print("Classification Report:\n", classification_report(Y_test, y_test_pred_gb))


print("\nK-Nearest Neighbors")
print("Treinamento")
print("Accuracy:", accuracy_score(Y_train, y_train_pred_knn))
print("Classification Report:\n", classification_report(Y_train, y_train_pred_knn))
print("Teste")
print("Accuracy:", accuracy_score(Y_test, y_test_pred_knn))
print("Classification Report:\n", classification_report(Y_test, y_test_pred_knn))


print("\nAdaBoost")
print("Treinamento")
print("Accuracy:", accuracy_score(Y_train, y_train_pred_ada))
print("Classification Report:\n", classification_report(Y_train, y_train_pred_ada))
print("Teste")
print("Accuracy:", accuracy_score(Y_test, y_test_pred_ada))
print("Classification Report:\n", classification_report(Y_test, y_test_pred_ada))

Logistic Regression
Treinamento
Accuracy: 0.8620689655172413
Classification Report:
               precision    recall  f1-score   support

         adl       0.96      0.79      0.87        34
        fall       0.77      0.96      0.85        24

    accuracy                           0.86        58
   macro avg       0.87      0.88      0.86        58
weighted avg       0.88      0.86      0.86        58

Teste
Accuracy: 0.8333333333333334
Classification Report:
               precision    recall  f1-score   support

         adl       0.83      0.83      0.83         6
        fall       0.83      0.83      0.83         6

    accuracy                           0.83        12
   macro avg       0.83      0.83      0.83        12
weighted avg       0.83      0.83      0.83        12


SVM
Treinamento
Accuracy: 0.8448275862068966
Classification Report:
               precision    recall  f1-score   support

         adl       0.96      0.76      0.85        34
        fall       0.74