## Isolation Forest: valores normais

In [None]:
import pandas as pd
from sklearn.ensemble import IsolationForest

# Importar base de dados e filtrar por apenas um dispositivo
sensores_df = pd.read_csv('Base de deslocamento normal.csv', parse_dates=['timestamp'])
sensores_df = sensores_df.loc[sensores_df['device_id'] == 1]

# Verificar os dados
print(sensores_df.head())

# Segregar o df para um novo, somente com as colunas para análise do modelo
X = sensores_df[['x', 'y']]

# Criação do modelo com taxa de contaminação de 0.005 e criação de coluna com dados de anomalia
iso_forest = IsolationForest(contamination=0.005, random_state=42)
sensores_df['anomaly'] = iso_forest.fit_predict(X)

# Nova coluna com o score das anomalias, quanto menor o valor, melhor
sensores_df['anomaly_score'] = iso_forest.decision_function(X)

            timestamp  device_id     x     y
0 2024-01-01 00:00:00          1  1.80  0.00
1 2024-01-01 00:01:00          1  0.08  1.68
2 2024-01-01 00:02:00          1  0.60  0.00
3 2024-01-01 00:03:00          1  0.00  0.00
4 2024-01-01 00:04:00          1  0.90  0.00



X does not have valid feature names, but IsolationForest was fitted with feature names



                timestamp  device_id     x     y  anomaly  anomaly_score
53    2024-01-01 00:53:00          1  1.99  0.16       -1      -0.004080
177   2024-01-01 02:57:00          1  1.81  1.96       -1      -0.007705
864   2024-01-01 14:24:00          1  1.91  1.93       -1      -0.023327
1021  2024-01-01 17:01:00          1  1.99  1.30       -1      -0.000897
1218  2024-01-01 20:18:00          1  1.83  1.92       -1      -0.009008
...                   ...        ...   ...   ...      ...            ...
41454 2024-01-29 18:54:00          1  1.78  2.00       -1      -0.016370
41637 2024-01-29 21:57:00          1  1.91  1.91       -1      -0.016771
42044 2024-01-30 04:44:00          1  1.90  1.93       -1      -0.021475
42633 2024-01-30 14:33:00          1  2.00  1.74       -1      -0.012288
42841 2024-01-30 18:01:00          1  2.00  1.81       -1      -0.016798

[216 rows x 6 columns]


In [None]:
import plotly.express as px
sensores_df['anomaly'] = sensores_df['anomaly'].astype(str)
fig = px.scatter(sensores_df, x="timestamp", y="x", color="anomaly", hover_data=['anomaly'])
fig.show()

fig = px.scatter(sensores_df, x="timestamp", y="y", color="anomaly", hover_data=['anomaly'])
fig.show()

## Isolation Forest: Simulação de movimento

In [None]:
import pandas as pd
from sklearn.ensemble import IsolationForest

# Importar base de dados e filtrar por apenas um dispositivo
sensores_d_df = pd.read_csv('Base de deslocamento com deslizamento.csv', parse_dates=['timestamp'])
sensores_d_df = sensores_d_df.loc[sensores_d_df['device_id'] == 1]

# Verificar os dados
print(sensores_d_df.head())

# Segregar o df para um novo, somente com as colunas para análise do modelo
X = sensores_d_df[['x', 'y']]

# Criação do modelo com taxa de contaminação de 0.005 e criação de coluna com dados de anomalia
iso_forest = IsolationForest(contamination=0.005, random_state=42)
sensores_d_df['anomaly'] = iso_forest.fit_predict(X)

# Nova coluna com o score das anomalias, quanto menor o valor, melhor
sensores_d_df['anomaly_score'] = iso_forest.decision_function(X)

            timestamp  device_id     x     y
0 2024-01-01 00:00:00          1  0.94  1.51
1 2024-01-01 00:01:00          1  1.10  1.47
2 2024-01-01 00:02:00          1  0.96  1.45
3 2024-01-01 00:03:00          1  0.99  1.53
4 2024-01-01 00:04:00          1  1.05  1.43



X does not have valid feature names, but IsolationForest was fitted with feature names



                timestamp  device_id         x         y  anomaly  \
14026 2024-01-10 17:46:00          1   1446.75   1446.75       -1   
14027 2024-01-10 17:47:00          1   1591.22   1591.22       -1   
14028 2024-01-10 17:48:00          1   1735.70   1735.70       -1   
14029 2024-01-10 17:49:00          1   1880.17   1880.17       -1   
14030 2024-01-10 17:50:00          1   2024.65   2024.65       -1   
...                   ...        ...       ...       ...      ...   
14237 2024-01-10 21:17:00          1  31930.96  31930.96       -1   
14238 2024-01-10 21:18:00          1  32075.43  32075.43       -1   
14239 2024-01-10 21:19:00          1  32219.91  32219.91       -1   
14240 2024-01-10 21:20:00          1  32364.38  32364.38       -1   
14241 2024-01-10 21:21:00          1  32508.86  32508.86       -1   

       anomaly_score  
14026      -0.001381  
14027      -0.001381  
14028      -0.001381  
14029      -0.001381  
14030      -0.004164  
...              ...  
14237     

In [None]:
import plotly.express as px
sensores_d_df['anomaly'] = sensores_d_df['anomaly'].astype(str)
fig = px.scatter(sensores_d_df, x="timestamp", y="x", color="anomaly", hover_data=['anomaly'])
fig.show()

fig = px.scatter(sensores_d_df, x="timestamp", y="y", color="anomaly", hover_data=['anomaly'])
fig.show()