In [1]:
#code for generating dataset

import pandas as pd
import numpy as np
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler 
from sklearn.metrics import classification_report

def generate_normal_data(num_samples=46729):
    data = {
        'cpu_usage': np.random.uniform(0, 80, num_samples),  
        'network_in': np.random.uniform(500, 2000, num_samples), 
        'network_out': np.random.uniform(500, 2000, num_samples), 
        'disk_read': np.random.uniform(300, 1500, num_samples),  
        'disk_write': np.random.uniform(300, 1500, num_samples)  
    }
    df = pd.DataFrame(data)
    df['label'] = 0  
    return df

def inject_anomalies(df, num_anomalies=27832):
    anomalies = {
        'cpu_usage': np.random.uniform(90, 100, num_anomalies),  
        'network_in': np.random.uniform(5000, 10000, num_anomalies),  
        'network_out': np.random.uniform(5000, 10000, num_anomalies),  
        'disk_read': np.random.uniform(4000, 8000, num_anomalies),  
        'disk_write': np.random.uniform(4000, 8000, num_anomalies)  
    }
    df_anomalies = pd.DataFrame(anomalies)
    df_anomalies['label'] = 1  
    return pd.concat([df, df_anomalies], ignore_index=True)


df_normal = generate_normal_data()
df = inject_anomalies(df_normal)
df = df.sample(frac=1, random_state=42).reset_index(drop=True)
df.to_csv('test.csv', index=False)
print("done")

In [11]:
import pandas as pd
import numpy as np
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import joblib

In [12]:
df = pd.read_csv('test.csv')

X = df[['cpu_usage', 'network_in', 'network_out', 'disk_read', 'disk_write']]
y = df['label']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [13]:
svm_model = OneClassSVM(gamma='auto', nu=0.05) 
svm_model.fit(X_scaled[y == 0]) 

y_pred = svm_model.predict(X_scaled)
y_pred = np.where(y_pred == 1, 0, 1)

In [82]:
print("Confusion Matrix:")
print(confusion_matrix(y, y_pred))

print("Classification Report:")
print(classification_report(y, y_pred, target_names=['Normal', 'Anomaly']))

Confusion Matrix (Resampled):
[[ 629  325]
 [   2 2003]]

Classification Report (Resampled):
              precision    recall  f1-score   support

           0       1.00      0.66      0.79       954
           1       0.86      1.00      0.92      2005

    accuracy                           0.89      2959
   macro avg       0.93      0.83      0.86      2959
weighted avg       0.90      0.89      0.88      2959

Class distribution after resampling:
1    2005
0     954
Name: count, dtype: int64


In [68]:
joblib.dump(svm_model, 'test.pkl')
joblib.dump(scaler, 'scaler.pkl')

['scaler.joblib']

In [14]:
#testing model on new data
import pandas as pd
import numpy as np
from sklearn.svm import OneClassSVM
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
import joblib

test_df = pd.read_csv('test1.csv')

X_test = test_df[['cpu_usage', 'network_in', 'network_out', 'disk_read', 'disk_write']]
y_true = test_df['label']


svm_model = joblib.load('svm_model.pkl')
scaler = joblib.load('scaler.pkl')

X_test_scaled = scaler.transform(X_test)

y_pred = svm_model.predict(X_test_scaled)
y_pred = np.where(y_pred == 1, 0, 1)  

print("Confusion Matrix:")
print(confusion_matrix(y_true, y_pred))

print("Classification Report:")
print(classification_report(y_true, y_pred, target_names=['Normal', 'Anomaly'], zero_division=0))



Confusion Matrix:
[[1914   86]
 [   0  438]]
Classification Report:
              precision    recall  f1-score   support

      Normal       1.00      0.96      0.98      2000
     Anomaly       0.84      1.00      0.91       438

    accuracy                           0.96      2438
   macro avg       0.92      0.98      0.94      2438
weighted avg       0.97      0.96      0.97      2438

