# 1. Importing The Libraries

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

# 2. Data Loading

In [4]:
# Loading the anomalies dataset to a DataFrame
anomalies_data = pd.read_csv("Data/Anomalies_Data.csv")
anomalies_data

Unnamed: 0,real ExhGasInletTempA,real ExhGasOutletTemp,real LOInletPress,real LOOutletTemp,real RPM,Anomaly
0,407.5,361.0,3.580,75.35,19455.0,0
1,418.0,355.0,3.530,76.90,22935.0,0
2,402.0,347.0,3.630,76.60,17400.0,0
3,393.0,351.0,3.615,75.00,16515.0,0
4,419.0,362.0,3.530,76.05,22635.0,0
...,...,...,...,...,...,...
790,392.0,348.0,3.630,75.00,16590.0,0
791,403.5,359.0,3.575,75.15,18915.0,0
792,414.5,355.5,3.540,76.60,22350.0,0
793,395.0,346.0,3.630,76.00,16920.0,0


In [5]:
# Number of Rows and Columns in this data
anomalies_data.shape

(795, 6)

In [6]:
# Getting the statistical measures of the data
anomalies_data.describe()

Unnamed: 0,real ExhGasInletTempA,real ExhGasOutletTemp,real LOInletPress,real LOOutletTemp,real RPM,Anomaly
count,795.0,795.0,795.0,795.0,795.0,795.0
mean,404.038365,353.132075,3.588119,75.738239,19434.0,0.012579
std,11.224528,5.541137,0.04455,0.891029,2604.376531,0.111517
min,379.5,343.0,3.455,73.8,15015.0,0.0
25%,393.5,348.0,3.54,75.0,16845.0,0.0
50%,404.5,353.0,3.59,75.7,19020.0,0.0
75%,414.0,358.5,3.63,76.4,22140.0,0.0
max,434.0,368.0,3.67,78.85,25680.0,1.0


In [7]:
anomalies_data['Anomaly'].value_counts()

Anomaly
0    785
1     10
Name: count, dtype: int64

### 0 => Normal Data
### 1 => Abnormal Data = Anomaly

In [8]:
anomalies_data.groupby('Anomaly').mean()

Unnamed: 0_level_0,real ExhGasInletTempA,real ExhGasOutletTemp,real LOInletPress,real LOOutletTemp,real RPM
Anomaly,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,403.828662,353.033121,3.588599,75.721656,19421.598726
1,420.5,360.9,3.5505,77.04,20407.5


In [9]:
# Separating The Data and Labels
X = anomalies_data.drop(columns='Anomaly', axis=1)
Y = anomalies_data['Anomaly']

In [10]:
X

Unnamed: 0,real ExhGasInletTempA,real ExhGasOutletTemp,real LOInletPress,real LOOutletTemp,real RPM
0,407.5,361.0,3.580,75.35,19455.0
1,418.0,355.0,3.530,76.90,22935.0
2,402.0,347.0,3.630,76.60,17400.0
3,393.0,351.0,3.615,75.00,16515.0
4,419.0,362.0,3.530,76.05,22635.0
...,...,...,...,...,...
790,392.0,348.0,3.630,75.00,16590.0
791,403.5,359.0,3.575,75.15,18915.0
792,414.5,355.5,3.540,76.60,22350.0
793,395.0,346.0,3.630,76.00,16920.0


In [11]:
Y

0      0
1      0
2      0
3      0
4      0
      ..
790    0
791    0
792    0
793    0
794    0
Name: Anomaly, Length: 795, dtype: int64

# 3. Split The Data

In [12]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, stratify=Y, random_state=2)

In [13]:
print("Data: ", X.shape)
print("X Train:", X_train.shape)
print("X Test:", X_test.shape)

Data:  (795, 5)
X Train: (636, 5)
X Test: (159, 5)


# 4. Training Model

In [14]:
classifier = svm.SVC(kernel='linear')

In [15]:
# Training The Classifier
classifier.fit(X_train, Y_train)

# 5. Evaluating The Model

In [16]:
# Accuracy score on the training Data
X_train_Prediction = classifier.predict(X_train)
training_data_accuracy = accuracy_score(X_train_Prediction, Y_train)

In [17]:
print("Accuracy Score of the training Data:", training_data_accuracy)

Accuracy Score of the training Data: 0.9937106918238994


# 6. Making Predicition System

In [18]:
# Input Test Data
# real ExhGasInletTempA	real ExhGasOutletTemp	real LOInletPress	real LOOutletTemp	real RPM

#input_Test_Data = (7, 3.5, 3.595, 5.25, 50.0) 
input_Test_Data = (328.0, 318.5, 3.260, 72.50, 11580.0) 

# Changing the input Data to numpy array
input_Test_Data_As_Numpy_Array = np.asarray(input_Test_Data)

# Reshape the array as we are predicting for one Instance
input_Test_Data_Reshaped = input_Test_Data_As_Numpy_Array.reshape(1, -1)

prediction = classifier.predict(input_Test_Data_Reshaped)
print(prediction)

if (prediction[0] == 0):
    print("Normal Data")
else :
    print("Anomaly Detected")

[0]
Normal Data




# 7. Saving The Model

In [19]:
import pickle

In [20]:
filename = 'Data/trained_model.sav'
pickle.dump(classifier, open(filename, 'wb'))

In [21]:
# Loading the saved Model
loaded_model = pickle.load(open('Data/trained_model.sav', 'rb'))

In [22]:
input_Test_Data = (7, 3.5, 3.595, 5.25, 50.0) 
#input_Test_Data = (328.0, 318.5, 3.260, 72.50, 11580.0) 

# Changing the input Data to numpy array
input_Test_Data_As_Numpy_Array = np.asarray(input_Test_Data)

# Reshape the array as we are predicting for one Instance
input_Test_Data_Reshaped = input_Test_Data_As_Numpy_Array.reshape(1, -1)

prediction = loaded_model.predict(input_Test_Data_Reshaped)
print(prediction)

if (prediction[0] == 0):
    print("Normal Data")
else :
    print("Anomaly Detected")

[0]
Normal Data


