# Models

In [None]:
import pandas as pd
import glob

# Assuming all CSV files are in the same directory
files = glob.glob('./A1Benchmark/*.csv')

# Initialize an empty DataFrame
all_data = pd.DataFrame()

# Loop through all CSV files and concatenate the data
for file in files:
    # Assuming the files have a header, if not, set header=None
    data = pd.read_csv(file)
    all_data = pd.concat([all_data, data], axis=0)

# Resetting the index of the concatenated DataFrame
all_data = all_data.reset_index(drop=True)

# Display the resulting DataFrame
print(all_data)

In [None]:
all_data.to_csv('A1Bench_all.csv', sep= ',', index=False , encoding='utf-8')
df = pd.read_csv('./A1Bench_all.csv')

In [None]:
df

# SVMs Model

### SVMs Supervised Learning

In [None]:
# changing 
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

df = pd.read_csv('./file2.csv')
# df= pd.read_csv('./A3Benchmark/A3Benchmark-TS1.csv')

# Preprocessing
scaler = StandardScaler()
df[['value', 'trend', 'noise', 'seasonality1', 'seasonality2', 'seasonality3']] = scaler.fit_transform(df[['value', 'trend', 'noise', 'seasonality1', 'seasonality2', 'seasonality3']])

# Define input features and target
X = df[['timestamps','value','changepoint', 'trend', 'noise', 'seasonality1', 'seasonality2', 'seasonality3']]
y = df['anomaly']

# spliting the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=40)

# training our model
clf = SVC(kernel='sigmoid', C=1.0) # rbf , sigmoid
clf.fit(X_train, y_train)

# making predictions
y_pred = clf.predict(X_test)

# evaluating the model
accuracy = accuracy_score(y_test, y_pred)
classification_report_str = classification_report(y_test, y_pred, zero_division=np.nan)

print(f"Accuracy: {accuracy}")
print("\nClassification Report:")
print(classification_report_str)

# One Class SVMs Model

In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import OneClassSVM
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Load your dataset
# Assuming df is your DataFrame
# You might need to preprocess your data before using it
df = pd.read_csv("./A1Benchmark/real_1.csv")

# Drop the "timestamp" column as it's not useful for the SVM model
df = df.drop("timestamp", axis=1)

# Split the data into training and testing sets
X_train, X_test = train_test_split(df, test_size=0.2, random_state=42)

# Train one-class SVM model
model = OneClassSVM(nu=0.006)  # You may need to tune the nu parameter based on your dataset
model.fit(X_train)

# Predict on the test set
predictions = model.predict(X_test)
# Convert predictions to 0 for normal, 1 for anomaly
predictions[predictions == 1] = 0
predictions[predictions == -1] = 1

# Evaluate the model
print("Classification Report:")
print(classification_report(X_test["is_anomaly"], predictions))

# Create a confusion matrix
cm = confusion_matrix(X_test["is_anomaly"], predictions)

# Plot the confusion matrix using seaborn
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Normal', 'Anomaly'], yticklabels=['Normal', 'Anomaly'])
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import OneClassSVM
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay
import seaborn as sns
import matplotlib.pyplot as plt
import warnings

# Filter out warnings
warnings.simplefilter("ignore", category=UserWarning)
warnings.simplefilter("ignore", category=TypeError)

# Load your undersampled dataset
undersampled_df = pd.read_csv('./A1Bench_all.csv')

# Separate features and target variable
X_train, X_test, y_train, y_test = train_test_split(
    undersampled_df.drop('is_anomaly', axis=1),
    undersampled_df['is_anomaly'],
    test_size=0.2,
    random_state=42
)

# Filter out only the normal instances (class 0) for training
X_train_normal = X_train[y_train == 0]

# Define the parameter grid for GridSearchCV
param_grid = {'nu': [0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.2, 0.3]}

# Create the One-Class SVM model
model = OneClassSVM()

# Create GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, scoring='accuracy', cv=5)
grid_search.fit(X_train_normal)

# Get the best parameter from the grid search
best_nu = grid_search.best_params_['nu']

# Create and train the One-Class SVM model with the best parameter
final_model = OneClassSVM(nu=best_nu)
final_model.fit(X_train_normal)

# Make predictions on the test set
y_pred = final_model.predict(X_test)

# Convert predictions to 0 for normal instances and 1 for anomalies
y_pred[y_pred == 1] = 0  # Predicted normal instances
y_pred[y_pred == -1] = 1  # Predicted anomalies

# Print classification report and confusion matrix
print(classification_report(y_test, y_pred))
cm = confusion_matrix(y_test, y_pred)
# print(cm)
disp_cm = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0, 1])
disp_cm.plot()
