In [22]:
# Import Libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split
import os
import matplotlib.pyplot as plt

# Load all 317 files into a single DataFrame
# Read the data in chunks
datafolder = 'data/s3Files/'
data_files = os.listdir(datafolder)
df = pd.DataFrame()
for file in data_files:
    # print(file)
    df = pd.concat([df,pd.read_csv(datafolder+file,header=None,sep=',')])
df.drop(columns=[0,4],inplace=True)
df.dropna(inplace=True)
df.columns = ['current_1', 'current_2', 'current_3']



In [23]:
df.reset_index(drop=True,inplace=True)

In [27]:
# Data Preprocessing
freq = 1/10000 # Assuming the data is collected at 15K samples per second
n_samples = len(df)
index = pd.date_range(start='2023-02-27 00:00:00', periods=n_samples, freq=pd.Timedelta(freq, unit='s'))
df = df.set_index(index)
df_resampled = df.resample('100ms').mean().ffill()



In [28]:
df_resampled.shape

(3170, 3)

In [29]:
# Feature Engineering
X = df_resampled[['current_1', 'current_2', 'current_3']]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns=X.columns, index=X.index)




In [30]:
# Model Training
model = IsolationForest(contamination=0.01)
model.fit(X_scaled)
df_resampled['anomaly'] = pd.Series(model.predict(X_scaled), index=df_resampled.index)



In [31]:
X_scaled.iloc[-1:].values

array([[ 0.32351313, -0.53869646, -0.01462173]])

In [32]:

# Real-time Monitoring
X_latest = X_scaled.iloc[-1:].values
if model.predict(X_latest)[0] == -1:
    print("Anomaly Detected!")
    # Send Alert to Maintenance Team
else:
    print("Motor is Running Normally.")


Motor is Running Normally.




In [38]:
# Import Libraries
import os
import numpy as np
import pandas as pd

# Load Data
data_dir ='data/s3Files/'
file_names = os.listdir(data_dir)
n_files = len(file_names)
anomaly_file_indices = np.random.choice(range(n_files), size=2, replace=False)

# Introduce Anomalies
for i in anomaly_file_indices:
    file_path = os.path.join(data_dir, file_names[i])
    df = pd.read_csv(file_path, header=None,sep=',')
    df.drop(columns=[0,4],inplace=True)
    df.dropna(inplace=True)
    df.columns = ['current_1', 'current_2', 'current_3']
    n_samples = len(df)
    anomaly_start = int(n_samples/5)
    anomaly_end = int(n_samples)
    df.loc[anomaly_start:anomaly_end, 'current_1'] += np.random.normal(loc=0, scale=1, size=anomaly_end-anomaly_start)
    df.loc[anomaly_start:anomaly_end, 'current_2'] += np.random.normal(loc=0, scale=1, size=anomaly_end-anomaly_start)
    df.loc[anomaly_start:anomaly_end, 'current_3'] += np.random.normal(loc=0, scale=1, size=anomaly_end-anomaly_start)
    df.to_csv(f'./anamalous/{i}', index=False, header=False)

print("Anomalies introduced in files: ", anomaly_file_indices)


Anomalies introduced in files:  [307 127]


In [40]:
datafolder = './anamalous/'
data_files = os.listdir(datafolder)
df = pd.DataFrame()
for file in data_files:
    # print(file)
    df = pd.concat([df,pd.read_csv(datafolder+file,header=None,sep=',')])
df.dropna(inplace=True)
df.columns = ['current_1', 'current_2', 'current_3']


In [43]:
# Data Preprocessing
freq = 1/10000 # Assuming the data is collected at 15K samples per second
n_samples = len(df)
index = pd.date_range(start='2023-02-27 00:00:00', periods=n_samples, freq=pd.Timedelta(freq, unit='s'))
df = df.set_index(index)
df_resampled = df.resample('100ms').mean().ffill()


In [44]:
# Feature Engineering
X = df_resampled[['current_1', 'current_2', 'current_3']]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns=X.columns, index=X.index)


In [47]:
pred = model.predict(X_scaled)




In [48]:
pred

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])