<a href="https://colab.research.google.com/github/innovate-data/PDM/blob/main/Equipment_Monitoring%F0%9F%96%B2%EF%B8%8F%7CANN_%F0%9F%A7%A0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
dnkumars_industrial_equipment_monitoring_dataset_path = kagglehub.dataset_download('dnkumars/industrial-equipment-monitoring-dataset')

print('Data source import complete.')


<div align="center" style="background-color: #091057; padding: 20px; border-radius: 10px;">
  <h1 style="color: #A0D683;">Loading Libraries</h1>
</div>

In [None]:
import pandas as pd
import numpy as np
import folium
from folium.plugins import MarkerCluster
import plotly.express as px
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Input
from sklearn.ensemble import IsolationForest
from IPython.display import IFrame
from plotly.offline import plot

<div align="center" style="background-color: #77CDFF; padding: 20px; border-radius: 10px;">
  <h1 style="color: D9EAFD5;">Data Loading</h1>
</div>

In [None]:
data = pd.read_csv("/kaggle/input/industrial-equipment-monitoring-dataset/equipment_anomaly_data.csv")

In [None]:
data.head()

In [None]:
data.isnull().sum()

<div align="center" style="background-color: #72BF78; padding: 20px; border-radius: 10px;">
  <h1 style="color: #433878;">Exploratory Data Analysis</h1>

</div>

## Map

In [None]:
city_coordinates = {
    "New York": [40.7128, -74.0060],
    "Houston": [29.7604, -95.3698],
    "Chicago": [41.8781, -87.6298],
    "San Francisco": [37.7749, -122.4194],
    "Atlanta": [33.7490, -84.3880]
}
data["city"] = data["location"]
data["latitude"] = data["city"].map(lambda x: city_coordinates[x][0])
data["longitude"] = data["city"].map(lambda x: city_coordinates[x][1])
m = folium.Map(location=[37.0902, -95.7129], zoom_start=4)
marker_cluster = MarkerCluster().add_to(m)
for i, row in data.iterrows():
    folium.Marker(
        location=[row["latitude"], row["longitude"]],
        popup=f"City: {row['city']}, Equipment: {row['equipment']}"
    ).add_to(marker_cluster)

m

## Univariate Analysis

In [None]:
fig1 = px.histogram(data, x="temperature", title="Temperature Distribution")
filename="hist1.html"
plot(fig1, filename=filename, auto_open=False)
display(IFrame(filename, width=800, height=600))

In [None]:
fig2 = px.box(data, y='humidity',color='equipment', title="Humidity Distribution")
filename="box.html"
plot(fig2, filename=filename, auto_open=False)
display(IFrame(filename, width=800, height=600))

## Bivariate Analysis

In [None]:
fig3 = px.scatter(data, x="temperature", y="pressure", color="faulty", title="Temperature vs Pressure")
filename="scatter1.html"
plot(fig3, filename=filename, auto_open=False)
display(IFrame(filename, width=800, height=600))

In [None]:
data.columns

In [None]:
corr_matrix = data.drop(['city', 'latitude', 'longitude', 'equipment','location'],axis=1).corr()
fig4 = px.imshow(corr_matrix, text_auto=True, title="Correlation Heatmap")
filename="corr.html"
plot(fig4, filename=filename, auto_open=False)
display(IFrame(filename, width=800, height=800))

<div align="center" style="background-color: #FFE3E3; padding: 20px; border-radius: 10px;">
  <h1 style="color: #091057;">Data Preprocessing</h1>

</div>

In [None]:
label_encoder = LabelEncoder()
data['equipment'] = label_encoder.fit_transform(data['equipment'])
data['location'] = label_encoder.fit_transform(data['location'])

In [None]:
data.head()

In [None]:
scaler = StandardScaler()
numerical_cols = ['temperature', 'pressure', 'vibration', 'humidity']
data[numerical_cols] = scaler.fit_transform(data[numerical_cols])

In [None]:
data.head()

<div align="center" style="background-color: #FFD7C4; padding: 20px; border-radius: 10px;">
  <h1 style="color: #001F3F;">Model</h1>

</div>

In [None]:
X = data.drop(columns=["faulty",'city'])
y = data["faulty"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))

In [None]:
cm = confusion_matrix(y_test, y_pred)
fig5 = go.Figure(data=go.Heatmap(
    z=cm,
    x=["Predicted 0", "Predicted 1"],
    y=["Actual 0", "Actual 1"],
    colorscale='Viridis',
    text=np.round(cm, 2),
    hoverinfo="z"
))
fig5.update_traces(
    texttemplate="%{text}",
    textfont={"size": 12},
    showscale=True
)

fig5.update_layout(
    title="Confusion Matrix Heatmap",
    xaxis_title="Predicted Label",
    yaxis_title="True Label",
    autosize=True)
filename = "cm.html"
plot(fig5, filename=filename, auto_open=False)
IFrame(filename, width=800, height=600)

## Clustering

In [None]:
kmeans = KMeans(n_clusters=2, random_state=42)
clusters = kmeans.fit_predict(X)
feature_x = 'temperature'
feature_y = 'pressure'
fig7 = px.scatter(
    X,
    x=feature_x,
    y=feature_y,
    color=clusters.astype(str),
    title=f"KMeans Clustering ({feature_x} vs {feature_y})",
    labels={feature_x: feature_x.capitalize(), feature_y: feature_y.capitalize()}
)
filename = "clust_no_pca.html"
plot(fig7, filename=filename, auto_open=False)
IFrame(filename, width=800, height=600)

## Anomaly Detection

In [None]:
iso_forest = IsolationForest(contamination=0.1, random_state=42)
data["anomaly"] = iso_forest.fit_predict(X)
fig8 = px.scatter(data, x="temperature", y="pressure", color="anomaly", title="Anomaly Detection")
filename="ano.html"
plot(fig8, filename=filename, auto_open=False)
display(IFrame(filename, width=800, height=600))

In [None]:
model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(X_train, y_train, validation_split=0.2, epochs=50, batch_size=16, verbose=1)
loss, accuracy = model.evaluate(X_test, y_test)

In [None]:
print(f"Test Accuracy: {accuracy}")

In [None]:
fig10 = go.Figure()
fig10.add_trace(go.Scatter(y=history.history['accuracy'], mode='lines', name='Training Accuracy'))
fig10.add_trace(go.Scatter(y=history.history['val_accuracy'], mode='lines', name='Validation Accuracy'))
fig10.update_layout(title="Model Accuracy Over Epochs", xaxis_title="Epochs", yaxis_title="Accuracy")
filename="ann.html"
plot(fig10, filename=filename, auto_open=False)
display(IFrame(filename, width=800, height=600))

<div align="center" style="background-color: #C4E1F6; padding: 20px; border-radius: 10px;">
  <h1 style="color: blue;">Thank You 🙇‍♂️ for Visiting My Notebook!</h1>

  <p style="font-size: 18px; color: black;">
    If you found this content valuable, please consider giving it a upvote <span style="color: blue;">👍</span>.
    <br>Your support is greatly appreciated and motivates me to continue developing more valuable and informative notebooks
  </p>
</div>
