# Features EDA

In [None]:
import pandas as pd
import seaborn as sb

from sklearn.decomposition import PCA
import matplotlib.pylab as plt

## Correlations

### Time domain

In [None]:
features = pd.read_csv('../../datasets/features_data/td_features_no_filter.csv')
td_columns = ['mean', 'std', 'skew', 'kurt', 'rms', 'pp', 'crest', 'margin', 'impulse', 'shape']
rows = features[
     (features['axis'] == 'ax')
   & (features['fault'] == 'horizontal-misalignment')
][td_columns]
rows.corr()

In [None]:
sb.heatmap(rows.corr(), annot=True)

In [None]:
ax = rows.var().plot(kind='barh', xlabel='Variance', ylabel='Feature')

In [None]:
# TODO: pairplot with color for fault
fig, ax = plt.subplots(figsize=(6, 6))

colors = {
    'horizontal-misalignment': 'tab:blue', 
    'imbalance':'tab:orange', 
    'vertical-misalignment': 'tab:purple', 
    'normal': 'tab:green'
}

for key, group in features.groupby('fault'):
    group.plot(ax=ax, kind='scatter', x='kurt', y='margin', label=key, color=colors.get(key, 'tab:brown'))
plt.show()

In [None]:
ax = plt.figure().add_subplot(projection='3d')

colors = {
    'horizontal-misalignment': 'tab:blue', 
    'imbalance':'tab:orange', 
    'vertical-misalignment': 'tab:purple', 
    'normal': 'tab:green'
}

for key, group in features.groupby('fault'):
    ax.scatter(group['kurt'], group['margin'], group['rpm'], label=key, color=colors.get(key, 'tab:brown'))

plt.xlabel('Kurtosis')
plt.ylabel('Margin')
plt.show()

#### PCA on time domain features

In [None]:
matrix = features[td_columns].to_numpy()

pca = PCA(n_components=2)
result = pca.fit_transform(matrix)

print(pca.explained_variance_ratio_)
# print(pca.singular_values_)
print(pca.components_)
plt.scatter(result.T[0], result.T[1], s=1)
plt.show()

In [None]:
wp_features = pd.read_csv('../../datasets/features_data/wpd_features_no_filter.csv')
columns = [
    col for col in wp_features.columns 
    if col not in ('fault', 'severity', 'seq', 'rpm', 'axis', 'feature')
]

fig, ax = plt.subplots(figsize=(6, 6))

for key, group in features.groupby('fault'):
    group.plot(ax=ax, kind='scatter', x='kurt', y='margin', label=key, color=colors.get(key, 'tab:brown'))
plt.show()