In [6]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score

# Load the normal data
df_normal = pd.read_csv('normall.csv')

# Extract feature columns
X_normal = df_normal[['bearingA_x', 'bearingA_y', 'bearingB_x', 'bearingB_y']].values

# Standardize the data (important for k-means)
scaler = StandardScaler()
X_normal_scaled = scaler.fit_transform(X_normal)

# Determine the optimal number of clusters using the silhouette score
def find_optimal_k(data, k_range):
    best_score = -1
    best_k = None

    for k in k_range:
        kmeans = KMeans(n_clusters=k, random_state=42)
        labels = kmeans.fit_predict(data)
        score = silhouette_score(data, labels)
        if score > best_score:
            best_score = score
            best_k = k
    return best_k

# Define the range of k values to try
k_range = range(2, 10)

# Find the optimal number of clusters
optimal_k = find_optimal_k(X_normal_scaled, k_range)
print(f"Optimal number of clusters: {optimal_k}")

# Train the k-means model with the optimal number of clusters
kmeans = KMeans(n_clusters=optimal_k, random_state=42)
kmeans.fit(X_normal_scaled)

# Calculate the distance threshold for anomalies
# Here, we use the maximum distance to any centroid from the training data
distances = kmeans.transform(X_normal_scaled)
min_distances = distances.min(axis=1)
threshold = min_distances.max()
print(f"Distance threshold for anomalies: {threshold}")

# Function to detect anomalies
def is_anomaly(sample, kmeans_model, scaler, threshold):
    sample_scaled = scaler.transform([sample])
    distances = kmeans_model.transform(sample_scaled)
    min_distance = distances.min(axis=1)[0]
    return min_distance > threshold

# Example test samples (assumed normal and anomalous samples)
new_sample_normal = [-0.469915051151626, 0.275878711735454, -0.0275614966845328, -0.0887079422926568]  # Example normal data
new_sample_anomaly = [0.189630843,0.012816744,-0.022304104,0.020324712] # Example anomalous data

# Check if the samples are anomalies
print("Is the normal sample an anomaly?", is_anomaly(new_sample_normal, kmeans, scaler, threshold))
print("Is the anomalous sample an anomaly?", is_anomaly(new_sample_anomaly, kmeans, scaler, threshold))


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Optimal number of clusters: 2
Distance threshold for anomalies: 4.759725249255874
Is the normal sample an anomaly? True
Is the anomalous sample an anomaly? False


  super()._check_params_vs_input(X, default_n_init=10)


In [3]:
#kmeans
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score, accuracy_score

df_normal = pd.read_csv('normall.csv')
print(df_normal.shape)
X_normal = df_normal[['bearingA_x', 'bearingA_y', 'bearingB_x', 'bearingB_y']].values

scaler = StandardScaler()
X_normal_scaled = scaler.fit_transform(X_normal)

def find_optimal_k(data, k_range):
    best_score = -1
    best_k = None

    for k in k_range:
        kmeans = KMeans(n_clusters=k, random_state=42)
        labels = kmeans.fit_predict(data)
        score = silhouette_score(data, labels)
        if score > best_score:
            best_score = score
            best_k = k
    return best_k

k_range = range(2, 10)

optimal_k = find_optimal_k(X_normal_scaled, k_range)
print(f"Optimal number of clusters: {optimal_k}")

kmeans = KMeans(n_clusters=optimal_k, random_state=42)
kmeans.fit(X_normal_scaled)


distances = kmeans.transform(X_normal_scaled)
min_distances = distances.min(axis=1)
threshold = min_distances.max()
print(f"Distance threshold for anomalies: {threshold}")

def is_anomaly(sample, kmeans_model, scaler, threshold):
    sample_scaled = scaler.transform([sample])
    distances = kmeans_model.transform(sample_scaled)
    min_distance = distances.min(axis=1)[0]
    return min_distance > threshold

df_anomaly = pd.read_csv('anomlaydetection.csv')
print(df_anomaly.shape)
X_anomaly = df_anomaly[['bearingA_x', 'bearingA_y', 'bearingB_x', 'bearingB_y']].values
y_true = df_anomaly['Anomaly'].values

X_anomaly_scaled = scaler.transform(X_anomaly)

y_pred = [is_anomaly(sample, kmeans, scaler, threshold) for sample in X_anomaly]

y_pred = np.array(y_pred, dtype=int)

y_true = np.array(y_true, dtype=int)

accuracy = accuracy_score(y_true, y_pred)

print(f"Accuracy of the model on the anomaly detection dataset: {accuracy:.2f}")


(4999, 4)


  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)
  super()._check_params_vs_input(X, default_n_init=10)


Optimal number of clusters: 2
Distance threshold for anomalies: 4.759725249255874
(595, 5)


  super()._check_params_vs_input(X, default_n_init=10)


Accuracy of the model on the anomaly detection dataset: 1.00


In [6]:
import pandas
data=pandas.read_csv("normall.csv")
print(data.head)

<bound method NDFrame.head of      bearingA_x  bearingA_y  bearingB_x  bearingB_y
0     -0.138363    0.028935   -0.019773   -0.002564
1     -0.101087    0.012587   -0.005409    0.015794
2     -0.105067   -0.003972    0.027830    0.026102
3     -0.181645    0.080939   -0.012655    0.050237
4     -0.153244    0.031137   -0.042393    0.046297
..          ...         ...         ...         ...
695   -0.025956    0.018290   -0.002704   -0.002209
696   -0.017061   -0.001114    0.011258   -0.053202
697    0.008415    0.011302    0.010797   -0.060995
698    0.021866    0.014057   -0.018469   -0.014107
699    0.041340   -0.001673    0.009491   -0.017855

[700 rows x 4 columns]>


In [10]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest

# Load the normal data
df_normal = pd.read_csv('normall.csv')

# Extract feature columns
X_normal = df_normal[['bearingA_x', 'bearingA_y', 'bearingB_x', 'bearingB_y']].values

# Standardize the data (important for Isolation Forest)
scaler = StandardScaler()
X_normal_scaled = scaler.fit_transform(X_normal)

# Define the contamination rate (adjust as needed)
contamination_rate = 0.02  # Experiment with different values

# Train the Isolation Forest model with the contamination rate
isolation_forest = IsolationForest(contamination=contamination_rate, random_state=42)
isolation_forest.fit(X_normal_scaled)

# Function to detect anomalies using Isolation Forest
def is_anomaly_if(sample, isolation_forest_model, scaler):
    sample_scaled = scaler.transform([sample])
    prediction = isolation_forest_model.predict(sample_scaled)
    return prediction == -1  # -1 indicates an anomaly according to Isolation Forest

# Example test samples (assumed normal and anomalous samples)
new_sample_normal = [ -0.138363, 0.028935 ,  -0.019773 ,  -0.002564]  # Example normal data
new_sample_anomaly = [-0.323513804, 0.2427902, -0.564276067,-0.74642926] 

# Check if the samples are anomalies using Isolation Forest
print("Is the normal sample an anomaly?", is_anomaly_if(new_sample_normal, isolation_forest, scaler))
print("Is the anomalous sample an anomaly?", is_anomaly_if(new_sample_anomaly, isolation_forest, scaler))


Is the normal sample an anomaly? [False]
Is the anomalous sample an anomaly? [ True]


In [None]:
#isolationforest
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from sklearn.metrics import accuracy_score

# Load the normal data
df_normal = pd.read_csv('Dataset/vibration_normal_5.csv')

# Extract feature columns
X_normal = df_normal[['bearingA_x', 'bearingA_y', 'bearingB_x', 'bearingB_y']].values

# Standardize the data (important for Isolation Forest)
scaler = StandardScaler()
X_normal_scaled = scaler.fit_transform(X_normal)

# Define the contamination rate (adjust as needed)
contamination_rate = 0.02  # Experiment with different values

# Train the Isolation Forest model with the contamination rate
isolation_forest = IsolationForest(contamination=contamination_rate, random_state=42)
isolation_forest.fit(X_normal_scaled)

# Function to detect anomalies using Isolation Forest
def is_anomaly_if(sample, isolation_forest_model, scaler):
    sample_scaled = scaler.transform([sample])
    prediction = isolation_forest_model.predict(sample_scaled)
    return prediction == -1  # -1 indicates an anomaly according to Isolation Forest

# Load the anomaly detection data
df_anomaly = pd.read_csv('Dataset/anomlaydetection.csv')

# Extract features and true labels
X_anomaly = df_anomaly[['bearingA_x', 'bearingA_y', 'bearingB_x', 'bearingB_y']].values
y_true = df_anomaly['Anomaly'].values

# Standardize the anomaly detection data
X_anomaly_scaled = scaler.transform(X_anomaly)

# Predict anomalies
y_pred = [is_anomaly_if(sample, isolation_forest, scaler) for sample in X_anomaly]

# Convert boolean predictions to integers (0 for normal, 1 for anomaly)
y_pred = np.array(y_pred, dtype=int)

# Convert true labels to integers (0 for normal, 1 for anomaly)
y_true = np.array(y_true, dtype=int)

# Calculate accuracy
accuracy = accuracy_score(y_true, y_pred)

print(f"Accuracy of the model on the anomaly detection dataset: {accuracy:.2f}")


: 

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score

# Calculate precision
precision = precision_score(y_true, y_pred)
# Calculate recall
recall = recall_score(y_true, y_pred)
# Calculate F1-score
f1 = f1_score(y_true, y_pred)

print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")
