In [None]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split
from sklearn import metrics
import matplotlib.pyplot as plt
from joblib import dump

In [None]:
print('ML algorithm that detect anomally in electrical system in python')

In [None]:
# read the dataset
power_grid_df = pd.read_csv('sp_03_data.csv')
power_grid_df.info()

In [None]:
power_grid_df

In [None]:
# Load and preprocess the data for X
X = power_grid_df.drop(columns=["id", "file_id", "FeederIdentifier", "CreationTimeStamp", "createdAt", 
                                "discoName", "substation", "feeder"])
X

In [None]:
y = power_grid_df["discoName"]
y

In [None]:
# split the data into 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Define the model
# model = Pipeline([
#     ('scaler', StandardScaler()),
#     ('classifier', RandomForestClassifier())
# ])


# model = Pipeline([
#     ('scaler', StandardScaler()),
#     ('classifier', LinearSVC(C=0.025))
# ])

# model = make_pipeline(StandardScaler(),
#                       LinearSVC(random_state=42, C=0.025)
#         )
model = make_pipeline(StandardScaler(),
                      SVC(gamma="scale", probability=True, random_state=42)
        )

In [None]:
# Train the model
model.fit(X_train, y_train)

In [None]:
# persist the trained model
dump(model, 'trained-model-svc.joblib')

In [None]:
# Make predictions on the test set
y_pred = model.predict(X_test)
y_pred

In [None]:
# Evaluate the model's performance
accuracy = metrics.accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy}')

In [None]:
recall = metrics.recall_score(y_test, y_pred, zero_division=1, average="weighted")
print(f'Recall Score: {recall}')

In [None]:
precision = metrics.precision_score(y_test, y_pred, zero_division=1, average="weighted")
print(f'Precision Score: {precision}')

In [None]:
# Evaluate the model classification report
report = metrics.classification_report(y_test, y_pred, zero_division=1)
report

In [None]:
# plot confusion matrix chart

cm = metrics.confusion_matrix(y_test, y_pred, labels=model.classes_)
disp = metrics.ConfusionMatrixDisplay(confusion_matrix=cm,
                              display_labels=model.classes_)
disp.plot()
plt.show()

In [None]:
# Determine the ROC curve

y_pred_proba = model.predict_proba(X_test) [::,1]

# Calculate false and true positive rates
false_positive_rate, true_positive_rate, _ = metrics.roc_curve(y_test, y_pred_proba)

# Calculate model AUC score
auc = metrics.roc_auc_score(y_test, y_pred_proba)

# plot the ROC curve
plt.plot(false_positive_rate, true_positive_rate,label="AUC="+str(auc))
plt.title('ROC Curve')
plt.ylabel('True Positive Rate')
plt.xlabel('false Positive Rate')
plt.legend(loc=4)