In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima.model import ARIMA
from sklearn.ensemble import IsolationForest
import matplotlib.pyplot as plt

# Load the dataset
dataset_path = 'wsn_loc_dataset.csv'  # Replace with your actual file path
data = pd.read_csv(dataset_path)

# Convert the timestamp column to datetime format (if applicable)
data['timestamp'] = pd.to_datetime(data['timestamp'])

# Sort the data by timestamp
data = data.sort_values(by='timestamp')

# Select a feature for time-series analysis (e.g., 'feature1')
time_series = data['feature1'].values

# Split the data into training and testing sets
train_size = int(len(time_series) * 0.8)
train, test = time_series[:train_size], time_series[train_size:]

# Fit the ARIMA model
model = ARIMA(train, order=(5, 1, 0))  # Example order (p, d, q)
model_fit = model.fit()

# Forecast future values
forecast = model_fit.forecast(steps=len(test))

# Plot the ARIMA results
plt.plot(test, label='Actual')
plt.plot(forecast, label='Forecast')
plt.xlabel('Time')
plt.ylabel('Feature Value')
plt.title('ARIMA Forecast')
plt.legend()
plt.show()

# Calculate residuals (difference between actual and forecasted values)
residuals = test - forecast
residuals = residuals.reshape(-1, 1)

# Fit the Isolation Forest model
iso_forest = IsolationForest(contamination=0.05)  # Adjust contamination based on expected outlier percentage
iso_forest.fit(residuals)

# Predict anomalies (1 for inliers, -1 for outliers)
anomalies = iso_forest.predict(residuals)

# Plot the anomalies
plt.scatter(range(len(residuals)), residuals, c=anomalies, cmap='coolwarm')
plt.xlabel('Time')
plt.ylabel('Residuals')
plt.title('Anomalies Detected by Isolation Forest')
plt.colorbar(label='Anomaly (1: Normal, -1: Outlier)')
plt.show()

# Add forecast and anomaly information to the original dataset
data['forecast'] = np.nan
data['anomaly'] = np.nan
data.iloc[train_size:, data.columns.get_loc('forecast')] = forecast
data.iloc[train_size:, data.columns.get_loc('anomaly')] = anomalies

# Display rows with anomalies
anomaly_data = data[data['anomaly'] == -1]
print("Anomalies Detected:")
print(anomaly_data)