# AIoT Project

In [1]:
import os

# basic data engineering
import pandas as pd
import numpy as np
import scipy

# plotting
import matplotlib.pyplot as plt
import seaborn as sns

# db
import pymongo

# configs & other
import yaml
from tqdm.notebook import tqdm_notebook
from datetime import datetime
from time import time

from psynlig import pca_explained_variance_bar

# utils processing
from utils import sliding_window_pd
from utils import apply_filter
from utils import filter_instances
from utils import flatten_instances_df
from utils import df_rebase
from utils import rename_df_column_values
from utils import clean_numeric_columns

# utils visualization
from utils_visual import plot_instance_time_domain
from utils_visual import plot_instance_3d
from utils_visual import plot_np_instance
from utils_visual import plot_heatmap
from utils_visual import plot_scatter_pca

%load_ext autoreload
%autoreload 2

Start time of execution

In [2]:
time_start = time()

## Load configuration

In [3]:
config_path = os.path.join(os.getcwd(), "config.yml")

with open(config_path) as file:
    config = yaml.load(file, Loader=yaml.FullLoader)

In [4]:
client = pymongo.MongoClient(config["client"])

In [5]:
db = client[config["db"]]
coll = db[config["col"]]

In [6]:
found_keys = coll.distinct("label")
print("Existing DB keys:", found_keys)

Existing DB keys: ['clockwise_circle_01.csv', 'clockwise_circle_02.csv', 'clockwise_circle_03.csv', 'clockwise_circle_04.csv', 'clockwise_circle_05.csv', 'clockwise_circle_06.csv', 'clockwise_circle_07.csv', 'clockwise_circle_08.csv', 'clockwise_circle_09.csv', 'clockwise_circle_10.csv', 'counter_clockwise_circle_01.csv', 'counter_clockwise_circle_02.csv', 'counter_clockwise_circle_03.csv', 'counter_clockwise_circle_04.csv', 'counter_clockwise_circle_05.csv', 'counter_clockwise_circle_06.csv', 'counter_clockwise_circle_07.csv', 'counter_clockwise_circle_08.csv', 'counter_clockwise_circle_09.csv', 'counter_clockwise_circle_10.csv', 'down_01.csv', 'down_02.csv', 'down_03.csv', 'down_04.csv', 'down_05.csv', 'down_06.csv', 'down_07.csv', 'down_08.csv', 'down_09.csv', 'down_10.csv', 'left_01.csv', 'left_02.csv', 'left_03.csv', 'left_04.csv', 'left_05.csv', 'left_06.csv', 'left_07.csv', 'left_08.csv', 'left_09.csv', 'left_10.csv', 'right_01.csv', 'right_02.csv', 'right_03.csv', 'right_04.csv

## Apply filter

In [7]:
# Fetch all documents from the collection
documents = coll.find()

df_list = []
for doc in documents:
    data = doc['data']
    df = pd.DataFrame(data)
    # Add label and datetime to the DataFrame
    #df['label'] = doc['label']  # Adding the label
    #df['datetime'] = doc['datetime']  # Adding the datetime
    df_list.append(df)

# Clean each DataFrame
df_list = [clean_numeric_columns(df) for df in df_list]

print(f"Total DataFrames: {len(df_list)}") 
print(df_list[0])    

filtered_dataframes_list = filter_instances(df_list, 4, 0.08, "lowpass")# 0.08 is the critical frequency for a sampling rate of 100hz




The value '1.067.805' was found in DataFrame 60, Row 225, Column 4 (gyr_y)


ValueError: could not convert string to float: '1.067.805'

## Transform the list of DataFrames to NumPy array

Transform the list of DataFrames to NumPy array that contains the windows: (instances, x, y)

## Flatten the 2D window instances

Flatten the X NumPy array that contains the 2D window instances

## Train/Test split

In [17]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, shuffle=True, random_state=42)

## Scaling

In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler

## Transform to 2D again

## Dimensionality Reduction with PCA using the 1D (flattened) data

In [None]:
# add transformers
from sklearn.decomposition import PCA

### PCA with 2 Components

In [None]:
pca2d.fit(data)

In [None]:
pca_explained_variance_bar(pca_object, alpha=0.8)

In [None]:
plot_scatter_pca()

### PCA with 3 Components

### PCA with X% of the variance of the dataset, for training the statistical AI Models

## Classifier - Statistical Learning

### Apply simple classifier

In [None]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

### Evaluate simple classifier

In [None]:
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

In [None]:
from sklearn.metrics import classification_report

### Apply optimization with Grid Search and Cross-validation

In [None]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

### Evaluate optimized classifier

## Classifier - Neural Network

In [8]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Dense, Dropout, Flatten

In [None]:
input_data_shape = X_train_2d[0].shape
print("Type of the input shape object:", type(input_data_shape))
X_train_2d[0].shape

In [None]:
y_np_array = np.array(y)
n_outputs = len(np.unique(y_np_array))
print("Number of outputs (classes) the model to predict:", n_outputs)

### Create the Neural Network (NN) Architecture and instantiate the model

In [None]:
model = Sequential()

"""
BUILD YOUR MODEL ARCHITECTURE HERE
"""

model.add(Dense(n_outputs, activation="softmax"))

Plot the Architecture of ot the TensorFlow model

Plot the summary of the TensorFlow model

### Build the NN model

In [None]:
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["ADD METRIC"])

In [None]:
from utils import encode_labels

### Train the NN model

### Evaluate the model on the test data

### Plot and interpret the learning curves: Loss and Accuracy based on the training and validation sets