<a href="https://colab.research.google.com/github/aaditya9803/ml/blob/main/Condition_monitoring_of_hydraulic_systems_(different_approach).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


## Importing files from Google drive



In [34]:
!pip install -U -q PyDrive
!pip install -U -q dask
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
import os

# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

# List of file names.
file_names = ['CE.txt', 'CP.txt', 'EPS1.txt', 'FS1.txt', 'FS2.txt',
              'PS1.txt', 'PS2.txt', 'PS3.txt', 'PS4.txt', 'PS5.txt',
              'PS6.txt', 'SE.txt', 'TS1.txt', 'TS2.txt', 'TS3.txt',
              'TS4.txt', 'VS1.txt','profile.txt']

# Get the ID of the current folder (where the notebook is located).
# Assuming the notebook is in "My Drive/Colab Notebooks" or similar
# by default.
# Alternatively, you can get a specific folder ID or use a method to find your folder
current_folder_id = '1_lAGYQ1p9oY9_OM0LMAL3gbKGBY7HuM5' # Replace with your specific folder ID if needed


# Iterate through the file names and download each file.
for file_name in file_names:
    # List files in the current folder.
    file_list = drive.ListFile({'q': "'{}' in parents and trashed=false".format(current_folder_id)}).GetList()

    # Find the file with the matching name.
    file_to_download = next((file for file in file_list if file['title'] == file_name), None)

    if file_to_download:
        # Download the file.
        print(f"Downloading {file_name}...")
        file_to_download.GetContentFile(file_name)  # Downloads to Colab environment
    else:
        print(f"File {file_name} not found in the folder.")

print("All files downloaded (if found).")

Downloading CE.txt...
Downloading CP.txt...
Downloading EPS1.txt...
Downloading FS1.txt...
Downloading FS2.txt...
Downloading PS1.txt...
Downloading PS2.txt...
Downloading PS3.txt...
Downloading PS4.txt...
Downloading PS5.txt...
Downloading PS6.txt...
Downloading SE.txt...
Downloading TS1.txt...
Downloading TS2.txt...
Downloading TS3.txt...
Downloading TS4.txt...
Downloading VS1.txt...
Downloading profile.txt...
All files downloaded (if found).


# Importing python libraries

In [70]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime as dt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelBinarizer
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb
import os
from sklearn.model_selection import KFold, cross_val_predict, cross_val_score
from sklearn.metrics import mean_squared_error, r2_score
from tensorflow import keras
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.svm import SVC
from sklearn.metrics import confusion_matrix, classification_report, f1_score, precision_score, accuracy_score
import pickle
from google.colab import files
import gc
from tensorflow.keras.regularizers import l1_l2
from sklearn.utils.class_weight import compute_class_weight


## Features and Targets

In [71]:
def read_file(filename):
    return pd.read_csv((filename), sep='\t', header=None)

pressureFile1 = read_file(filename='PS1.txt')
pressureFile2 = read_file(filename='PS2.txt')
pressureFile3 = read_file(filename='PS3.txt')
pressureFile4 = read_file(filename='PS4.txt')
pressureFile5 = read_file(filename='PS5.txt')
pressureFile6 = read_file(filename='PS6.txt')


volumeFlow1 = read_file(filename='FS1.txt')
volumeFlow2 = read_file(filename='FS2.txt')


temperature1 = read_file(filename='TS1.txt')
temperature2 = read_file(filename='TS2.txt')
temperature3 = read_file(filename='TS3.txt')
temperature4 = read_file(filename='TS4.txt')

pump1 = read_file(filename='EPS1.txt')
vibration1 = read_file(filename='VS1.txt')
coolingE1 = read_file(filename='CE.txt')
coolingP1 = read_file(filename='CP.txt')
effFactor1 = read_file(filename='SE.txt')

profile = read_file(filename='profile.txt')


# for the targets

y_coolerCondition = pd.DataFrame(profile.iloc[:, 0])
y_valveCondition = pd.DataFrame(profile.iloc[:, 1])
y_pumpLeak = pd.DataFrame(profile.iloc[:, 2])
y_hydraulicAcc = pd.DataFrame(profile.iloc[:, 3])
y_stableFlag = pd.DataFrame(profile.iloc[:, 4])

y_pumpLeak = pd.Series(y_pumpLeak.values.flatten(), name='pumpLeak')
y_coolerCondition = pd.Series(y_coolerCondition.values.flatten(), name='coolerCondition')
y_valveCondition = pd.Series(y_valveCondition.values.flatten(), name='valveCondition')
y_hydraulicAcc = pd.Series(y_hydraulicAcc.values.flatten(), name='hydraulicAcc')
y_stableFlag = pd.Series(y_stableFlag.values.flatten(), name='stableFlag')

In [72]:
pressureFile1.shape

(2205, 6000)

##Functions to Upsample and Downsample

In [73]:
#converts 100hz to 10hz
def mean_conversion_100hz(df, chunk_size=10, prefix=''):
    num_chunks = df.shape[1] // chunk_size
    columns = [f'{prefix}_{i}' for i in range(num_chunks)]
    df_mean_chunks = pd.DataFrame(index=df.index, columns=columns)

    for index, row in df.iterrows():
        mean_chunks = row.values.reshape(-1, chunk_size).mean(axis=1)
        df_mean_chunks.loc[index] = mean_chunks

    return df_mean_chunks


#converts 100hz to 1z
def mean_conversion_1hz(df, chunk_size=100, prefix=''):
    num_chunks = df.shape[1] // chunk_size
    columns = [f'{prefix}_{i}' for i in range(num_chunks)]
    df_mean_chunks = pd.DataFrame(index=df.index, columns=columns)

    for index, row in df.iterrows():
        mean_chunks = row.values.reshape(-1, chunk_size).mean(axis=1)
        df_mean_chunks.loc[index] = mean_chunks

    return df_mean_chunks
    expanded_row = np.repeat(row.values, repeat_count)
    all_expanded_rows.append(pd.Series(expanded_row))
    df_expanded = pd.concat(all_expanded_rows, ignore_index=True)  # Concatenate all rows at once

    return df_expanded

#For mean of the row (mean of the cycle)
def mean_conversion(df):
    df1 = pd.DataFrame()
    df1['mean'] = df.mean(axis=1)
    return df1

#For Upsampling
def repeat_values(df, repeat_count=10):
    # For single-column DataFrames, repeat values horizontally
    if df.shape[1] == 1:  # Check if single-column
        df_expanded = pd.DataFrame(
            np.repeat(df.values, repeat_count, axis=1), index=df.index
        )
    else:
        # Otherwise, repeat each value in a row horizontally for all columns
        df_expanded = df.apply(lambda row: np.tile(row.values, repeat_count), axis=1)
        df_expanded = pd.DataFrame(df_expanded.tolist(), index=df.index)

    return df_expanded


# Functions to Train Models

## Using XGB

In [74]:
def xgb_(X, y, num_folds=5):

    kf = KFold(n_splits=num_folds, shuffle=True, random_state=24)

    f1_scores = []
    accuracies = []
    precisions = []

    for fold_idx, (train_idx, test_idx) in enumerate(kf.split(X)):
        print(f"Fold {fold_idx + 1}:")

        X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
        y_train, y_test = y.iloc[train_idx], y.iloc[test_idx]

        # Initialize and train the XGBoost model with regularization
        xgb1 = xgb.XGBClassifier(
            objective="binary:logistic",
            random_state=42,
            alpha=2.0,
            reg_lambda=2.0,
            max_depth=4,
            min_child_weight=10,
            subsample=0.8,
            colsample_bytree=0.8
        )

        # Fit the model
        xgb1.fit(X_train.to_numpy(), y_train.to_numpy().ravel())

        # Make predictions
        y_pred = xgb1.predict(X_test.to_numpy())

        # Ensure y_test is 1D for comparison
        y_test = y_test.to_numpy().ravel().astype(int)
        y_pred = y_pred.astype(int)

        # Calculate and store F1-score for this fold
        f1 = f1_score(y_test, y_pred, average='weighted')
        f1_scores.append(f1)

        # Calculate and store accuracy and precision for this fold
        accuracy = accuracy_score(y_test, y_pred)
        accuracies.append(accuracy)
        precision = precision_score(y_test, y_pred, average='weighted')
        precisions.append(precision)

        print(classification_report(y_test, y_pred))
        print(f"Fold {fold_idx + 1} F1-score: {f1:.4f}")
        print(f"Fold {fold_idx + 1} Accuracy: {accuracy:.4f}")
        print(f"Fold {fold_idx + 1} Precision: {precision:.4f}")

        # Calculate and print confusion matrix
        cm = confusion_matrix(y_test, y_pred)
        print("Confusion Matrix:")
        print(cm)

        print("-" * 30)

    # Calculate and print average metrics across all folds
    avg_f1 = np.mean(f1_scores)
    avg_accuracy = np.mean(accuracies)
    avg_precision = np.mean(precisions)

    print(f"Average F1-score across {num_folds} folds: {avg_f1:.4f}")
    print(f"Average Accuracy across {num_folds} folds: {avg_accuracy:.4f}")
    print(f"Average Precision across {num_folds} folds: {avg_precision:.4f}")

    # Final model: Train on the entire dataset
    print("Training final model on the entire dataset...")
    X_train_final, X_test_final, y_train_final, y_test_final = train_test_split(X, y, test_size=0.2, random_state=42)

    # Final model with regularization
    final_model = xgb.XGBClassifier(
        objective="binary:logistic",
        random_state=42,
        alpha=1.0,
        reg_lambda=1.0,
        max_depth=5,
        min_child_weight=10,
        subsample=0.8,
        colsample_bytree=0.8
    )

    # Train on the entire dataset
    final_model.fit(X_train_final.to_numpy(), y_train_final.to_numpy().ravel())

    # Evaluate the final model on the test set
    y_pred_final = final_model.predict(X_test_final.to_numpy())

    # Calculate accuracy, precision, confusion matrix for final model
    final_accuracy = accuracy_score(y_test_final, y_pred_final)
    final_precision = precision_score(y_test_final, y_pred_final, average='weighted')
    final_cm = confusion_matrix(y_test_final, y_pred_final)

    print(f"Final Model Accuracy: {final_accuracy:.4f}")
    print(f"Final Model Precision: {final_precision:.4f}")
    print("Final Model Confusion Matrix:")
    print(final_cm)

    # Save the final model
    with open('XGB_final_model.pkl', 'wb') as f:
        pickle.dump(final_model, f)
    files.download('XGB_final_model.pkl')



## Using SVM

In [75]:
def svm(X, y):
    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # Create the SVM model
    model = SVC(kernel='rbf', C=1, gamma='auto')

    # Perform cross-validation with predictions to calculate precision and accuracy
    y_pred_cv = cross_val_predict(model, X_train, y_train, cv=5)

    # Calculate precision for each fold
    precisions = []
    accuracies = []
    for fold in range(5):
        y_true_fold = y_train[fold::5]
        y_pred_fold = y_pred_cv[fold::5]
        precision = precision_score(y_true_fold, y_pred_fold, average='binary')
        precisions.append(precision)

        #accuracy for each fold
        accuracy = accuracy_score(y_true_fold, y_pred_fold)
        accuracies.append(accuracy)

    # average precision, average accuracy
    print("Precision scores for each fold:", precisions)
    print("Average precision:", np.mean(precisions))
    print("Average accuracy:", np.mean(accuracies))

    # Compute and print confusion matrix for cross-validation
    cm = confusion_matrix(y_train, y_pred_cv)
    print("Confusion matrix for training set:\n", cm)

    # Train the model on the entire training set
    model.fit(X_train, y_train)

    # Make predictions on the test set and evaluate
    y_pred = model.predict(X_test)

    # Evaluate on the test set
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='binary')
    cm_test = confusion_matrix(y_test, y_pred)

    print(f"Test set accuracy: {accuracy}")
    print(f"Test set precision: {precision}")
    print("Test set confusion matrix:\n", cm_test)

    with open('svm_model.pkl', 'wb') as f:
        pickle.dump(model, f)

    files.download('svm_model.pkl')

## Using Random forest

In [76]:
def randomforest(X, y, num_folds=5):
    #Split into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    #cross-validation on the training set
    kf = KFold(n_splits=num_folds, shuffle=True, random_state=24)

    precisions = []
    accuracies = []

    for fold_idx, (train_idx, val_idx) in enumerate(kf.split(X_train)):
        X_train_fold, X_val_fold = X_train.iloc[train_idx], X_train.iloc[val_idx]
        y_train_fold, y_val_fold = y_train.iloc[train_idx], y_train.iloc[val_idx]

        rf_model = RandomForestClassifier(
            n_estimators=100,
            random_state=42,
            max_depth=8,
            min_samples_leaf=3,
            max_features="sqrt"
        )
        rf_model.fit(X_train_fold, y_train_fold)
        y_pred_val = rf_model.predict(X_val_fold)

        #precision and accuracy for each fold
        fold_precision = precision_score(y_val_fold, y_pred_val, average='binary')
        fold_accuracy = accuracy_score(y_val_fold, y_pred_val)

        precisions.append(fold_precision)
        accuracies.append(fold_accuracy)

        print(f"Fold {fold_idx + 1} Classification Report:")
        print(classification_report(y_val_fold, y_pred_val))

    #Print average precision and accuracy for cross-validation
    print(f"Average Precision across {num_folds} folds: {np.mean(precisions)}")
    print(f"Average Accuracy across {num_folds} folds: {np.mean(accuracies)}")

    #Train the final model on the full training data
    final_rf_model = RandomForestClassifier(
        n_estimators=100,
        random_state=42,
        max_depth=8,  # Use the same tuned parameters
        min_samples_leaf=3,
        max_features="sqrt"
    )
    final_rf_model.fit(X_train, y_train)

    # Evaluate the final model on the test set
    y_test_pred = final_rf_model.predict(X_test)
    print("Final Test Set Evaluation:")
    print(classification_report(y_test, y_test_pred))

    # Calculate accuracy and precision for the final model
    final_accuracy = accuracy_score(y_test, y_test_pred)
    final_precision = precision_score(y_test, y_test_pred, average='binary')  # Use 'binary' or 'macro' based on the task

    print(f"Final Model Accuracy: {final_accuracy}")
    print(f"Final Model Precision: {final_precision}")

    # Print confusion matrix for the test set
    cm_test = confusion_matrix(y_test, y_test_pred)
    print("Confusion Matrix - Final Model (Test Set):")
    print(cm_test)

    with open('rf_model.pkl', 'wb') as f:
      pickle.dump(final_rf_model, f)

    files.download('rf_model.pkl')

## Using Neural Networks

In [77]:
def neuralnetwork(X, y, num_folds=5, patience=5, test_size=0.2):
    # Step 1: Split data into train and test sets
    X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size=test_size, random_state=42)

    # Convert DataFrames to numpy arrays if they are pandas DataFrames
    X_train_full = X_train_full.values
    X_test = X_test.values
    y_train_full = y_train_full.values
    y_test = y_test.values

    # Initialize lists to store accuracy and precision for each fold
    accuracies = []
    precisions = []

    # Step 2: Perform cross-validation on the training set
    kf = KFold(n_splits=num_folds, shuffle=True, random_state=24)
    models = []  # List to store models from each fold

    for fold_idx, (train_idx, val_idx) in enumerate(kf.split(X_train_full)):
        X_train, X_val = X_train_full[train_idx], X_train_full[val_idx]
        y_train, y_val = y_train_full[train_idx], y_train_full[val_idx]

        # Flatten y_train to ensure it's a 1D array
        y_train = y_train.flatten()
        y_val = y_val.flatten()

        # Build the model
        model = keras.Sequential([
            keras.layers.Input(shape=(X_train.shape[1],)),
            keras.layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l1_l2(l1=0.01, l2=0.01)),
            keras.layers.Dense(32, activation='relu', kernel_regularizer=keras.regularizers.l1_l2(l1=0.01, l2=0.01)),
            keras.layers.Dense(3, activation='softmax')  # 3 outputs for 3 classes (multiclass classification)
        ])
        model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

        # EarlyStopping callback
        early_stopping = EarlyStopping(
            monitor='val_loss',
            patience=patience,
            restore_best_weights=True
        )

        # Train the model
        model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_val, y_val), callbacks=[early_stopping])

        # Append the trained model to the list
        models.append(model)

        # Evaluate on the validation set
        y_val_pred = model.predict(X_val)
        y_val_pred_classes = np.argmax(y_val_pred, axis=1)  # Get the predicted class

        # Compute accuracy and precision for this fold
        fold_accuracy = accuracy_score(y_val, y_val_pred_classes)
        fold_precision = precision_score(y_val, y_val_pred_classes, average='weighted', zero_division=0)

        # Store the accuracy and precision for this fold
        accuracies.append(fold_accuracy)
        precisions.append(fold_precision)

        # Metrics for each fold
        print(f"Fold {fold_idx + 1}:")
        print("Validation Confusion Matrix:")
        print(confusion_matrix(y_val, y_val_pred_classes))
        print(classification_report(y_val, y_val_pred_classes))

    # Print average accuracy and precision across all folds
    print(f"Average Accuracy across all folds: {np.mean(accuracies):.4f}")
    print(f"Average Precision across all folds: {np.mean(precisions):.4f}")

    # Step 3: Train the final model on the full training set
    final_model = keras.Sequential([
        keras.layers.Input(shape=(X_train_full.shape[1],)),
        keras.layers.Dense(64, activation='relu', kernel_regularizer=keras.regularizers.l1_l2(l1=0.01, l2=0.01)),
        keras.layers.Dense(32, activation='relu', kernel_regularizer=keras.regularizers.l1_l2(l1=0.01, l2=0.01)),
        keras.layers.Dense(3, activation='softmax')  # 3 outputs for 3 classes
    ])
    final_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Train the final model
    early_stopping_final = EarlyStopping(
        monitor='val_loss',
        patience=patience,
        restore_best_weights=True
    )
    final_model.fit(X_train_full, y_train_full, epochs=100, batch_size=32, validation_split=0.2, callbacks=[early_stopping_final])

    # Step 4: Evaluate the final model on the test set
    y_test_pred = final_model.predict(X_test) # if scaled therr will be X test scaled
    y_test_pred_classes = np.argmax(y_test_pred, axis=1)

    print("\nFinal Model Evaluation on Test Set:")
    print("Confusion Matrix:")
    print(confusion_matrix(y_test, y_test_pred_classes))
    print(classification_report(y_test, y_test_pred_classes))

    with open('nn_model.pkl', 'wb') as f:
      pickle.dump(final_model, f)

    files.download('nn_model.pkl')

# Approach 1 - Downsampling to 1hz

In [49]:
#100HZ TO 1HZ
PS1 = mean_conversion_1hz(pressureFile1)
PS1.columns = [f'PS1_{i}' for i in range(PS1.shape[1])]

PS2 = mean_conversion_1hz(pressureFile2)
PS2.columns = [f'PS2_{i}' for i in range(PS2.shape[1])]

PS3 = mean_conversion_1hz(pressureFile3)
PS3.columns = [f'PS3_{i}' for i in range(PS3.shape[1])]

PS4 = mean_conversion_1hz(pressureFile4)
PS4.columns = [f'PS4_{i}' for i in range(PS4.shape[1])]

PS5 = mean_conversion_1hz(pressureFile5)
PS5.columns = [f'PS5_{i}' for i in range(PS5.shape[1])]

PS6 = mean_conversion_1hz(pressureFile6)
PS6.columns = [f'PS6_{i}' for i in range(PS6.shape[1])]

P1 = mean_conversion_1hz(pump1)
P1.columns = [f'P1_{i}' for i in range(P1.shape[1])]

#10hz to 1hz

FS1 = pd.DataFrame(mean_conversion_100hz(volumeFlow1))
FS1.columns = [f'FS1_{i}' for i in range(FS1.shape[1])]

FS2 = pd.DataFrame(mean_conversion_100hz(volumeFlow2))
FS2.columns = [f'FS2_{i}' for i in range(FS2.shape[1])]

#Leaving 1HZ as it is

TS1 = pd.DataFrame(temperature1)
TS1.columns = [f'TS1_{i}' for i in range(TS1.shape[1])]

TS2 = pd.DataFrame(temperature2)
TS2.columns = [f'TS2_{i}' for i in range(TS2.shape[1])]

TS3 = pd.DataFrame(temperature3)
TS3.columns = [f'TS3_{i}' for i in range(TS3.shape[1])]

TS4 = pd.DataFrame(temperature4)
TS4.columns = [f'TS4_{i}' for i in range(TS4.shape[1])]

VS1 = pd.DataFrame(vibration1)
VS1.columns = [f'VS1_{i}' for i in range(VS1.shape[1])]

CE1 = pd.DataFrame(coolingE1)
CE1.columns = [f'CE1_{i}' for i in range(CE1.shape[1])]

CP1 = pd.DataFrame(coolingP1)
CP1.columns = [f'CP1_{i}' for i in range(CP1.shape[1])]

SE1 = pd.DataFrame(effFactor1)
SE1.columns = [f'SE1_{i}' for i in range(SE1.shape[1])]

In [50]:
print(PS1.shape)
print(PS2.shape)
print(PS3.shape)
print(PS4.shape)
print(PS5.shape)
print(PS6.shape)
print(P1.shape)
print(FS1.shape)
print(FS2.shape)
print(TS1.shape)
print(TS2.shape)
print(TS3.shape)
print(TS4.shape)
print(VS1.shape)
print(CE1.shape)
print(CP1.shape)
print(SE1.shape)

X = pd.concat([PS1, PS2, PS3, PS4, PS5, PS6, FS1, FS2, TS1, TS2, TS3, TS4, P1, VS1, CE1, CP1, SE1], axis=1)
# del PS1, PS2, PS3, PS4, PS5, PS6, pressureFile3, pressureFile4, pressureFile5, pressureFile6, pressureFile1, pressureFile2, FS1, FS2, volumeFlow1, volumeFlow2
# del temperature1, temperature2, temperature3, temperature4, pump1, vibration1, coolingE1, coolingP1, effFactor1
# del profile, TS1, TS2, TS3, TS4, P1, VS1, CE1, CP1, SE1
gc.collect()

(2205, 60)
(2205, 60)
(2205, 60)
(2205, 60)
(2205, 60)
(2205, 60)
(2205, 60)
(2205, 60)
(2205, 60)
(2205, 60)
(2205, 60)
(2205, 60)
(2205, 60)
(2205, 60)
(2205, 60)
(2205, 60)
(2205, 60)


124

In [51]:
scaler = MinMaxScaler()
X = scaler.fit_transform(X)
X = pd.DataFrame(X)
print(X.shape)

(2205, 1020)


In [52]:
xgb_(X, y_stableFlag)

Fold 1:
              precision    recall  f1-score   support

           0       0.97      0.99      0.98       310
           1       0.97      0.93      0.95       131

    accuracy                           0.97       441
   macro avg       0.97      0.96      0.96       441
weighted avg       0.97      0.97      0.97       441

Fold 1 F1-score: 0.9704
Fold 1 Accuracy: 0.9705
Fold 1 Precision: 0.9705
Confusion Matrix:
[[306   4]
 [  9 122]]
------------------------------
Fold 2:
              precision    recall  f1-score   support

           0       0.95      0.99      0.97       277
           1       0.97      0.92      0.95       164

    accuracy                           0.96       441
   macro avg       0.96      0.95      0.96       441
weighted avg       0.96      0.96      0.96       441

Fold 2 F1-score: 0.9612
Fold 2 Accuracy: 0.9615
Fold 2 Precision: 0.9619
Confusion Matrix:
[[273   4]
 [ 13 151]]
------------------------------
Fold 3:
              precision    recal

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [53]:
svm(X,y_stableFlag)

Precision scores for each fold: [0.8024691358024691, 0.865979381443299, 0.7959183673469388, 0.7551020408163265, 0.7551020408163265]
Average precision: 0.7949141932450718
Average accuracy: 0.8301706384230656
Confusion matrix for training set:
 [[906  97]
 [165 375]]
Test set accuracy: 0.8595166163141994
Test set precision: 0.7374517374517374
Test set confusion matrix:
 [[378  68]
 [ 25 191]]


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [54]:
randomforest(X,y_stableFlag)

Fold 1 Classification Report:
              precision    recall  f1-score   support

           0       0.96      1.00      0.98       220
           1       1.00      0.92      0.96       133

    accuracy                           0.97       353
   macro avg       0.98      0.96      0.97       353
weighted avg       0.97      0.97      0.97       353

Fold 2 Classification Report:
              precision    recall  f1-score   support

           0       0.99      1.00      0.99       234
           1       1.00      0.97      0.99       119

    accuracy                           0.99       353
   macro avg       0.99      0.99      0.99       353
weighted avg       0.99      0.99      0.99       353

Fold 3 Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.99       232
           1       1.00      0.94      0.97       121

    accuracy                           0.98       353
   macro avg       0.99      0.97     

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [55]:
neuralnetwork(X,y_stableFlag)

Epoch 1/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.6370 - loss: 23.6368 - val_accuracy: 0.7309 - val_loss: 9.4082
Epoch 2/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.7701 - loss: 6.9525 - val_accuracy: 0.7252 - val_loss: 3.0452
Epoch 3/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.7020 - loss: 2.7588 - val_accuracy: 0.6969 - val_loss: 1.9541
Epoch 4/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.7479 - loss: 1.8091 - val_accuracy: 0.6941 - val_loss: 1.4612
Epoch 5/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step - accuracy: 0.8019 - loss: 1.3178 - val_accuracy: 0.8414 - val_loss: 1.0814
Epoch 6/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8223 - loss: 1.0258 - val_accuracy: 0.7592 - val_loss: 1.0193
Epoch 7/100
[1m45/45[0m [32m━

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Approach 2 - Upsampling and Downsampling to 10hz

In [56]:
#100HZ TO 10HZ
PS1 = mean_conversion_100hz(pressureFile1)
PS1.columns = [f'PS1_{i}' for i in range(PS1.shape[1])]

PS2 = mean_conversion_100hz(pressureFile2)
PS2.columns = [f'PS2_{i}' for i in range(PS2.shape[1])]

PS3 = mean_conversion_100hz(pressureFile3)
PS3.columns = [f'PS3_{i}' for i in range(PS3.shape[1])]

PS4 = mean_conversion_100hz(pressureFile4)
PS4.columns = [f'PS4_{i}' for i in range(PS4.shape[1])]

PS5 = mean_conversion_100hz(pressureFile5)
PS5.columns = [f'PS5_{i}' for i in range(PS5.shape[1])]

PS6 = mean_conversion_100hz(pressureFile6)
PS6.columns = [f'PS6_{i}' for i in range(PS6.shape[1])]

P1 = mean_conversion_100hz(pump1)
P1.columns = [f'P1_{i}' for i in range(P1.shape[1])]

#Leaving 10HZ as it is

FS1 = pd.DataFrame(volumeFlow1)
FS1.columns = [f'FS1_{i}' for i in range(FS1.shape[1])]

FS2 = pd.DataFrame(volumeFlow2)
FS2.columns = [f'FS2_{i}' for i in range(FS2.shape[1])]

#1HZ to 10HZ


TS1 = repeat_values(temperature1)
TS1.columns = [f'TS1_{i}' for i in range(TS1.shape[1])]

TS2 = repeat_values(temperature1)
TS2.columns = [f'TS1_{i}' for i in range(TS2.shape[1])]

TS3 = repeat_values(temperature1)
TS3.columns = [f'TS1_{i}' for i in range(TS3.shape[1])]

TS4 = repeat_values(temperature1)
TS4.columns = [f'TS1_{i}' for i in range(TS4.shape[1])]

VS1 = repeat_values(vibration1)
VS1.columns = [f'VS1_{i}' for i in range(VS1.shape[1])]

CE1 = repeat_values(coolingE1)
CE1.columns = [f'CE1_{i}' for i in range(CE1.shape[1])]

CP1 = repeat_values(coolingP1)
CP1.columns = [f'CP1_{i}' for i in range(CP1.shape[1])]

SE1 = repeat_values(effFactor1)
SE1.columns = [f'SE1_{i}' for i in range(SE1.shape[1])]


In [57]:
print(PS1.shape)
print(PS2.shape)
print(PS3.shape)
print(PS4.shape)
print(PS5.shape)
print(PS6.shape)
print(P1.shape)
print(FS1.shape)
print(FS2.shape)
print(TS1.shape)
print(TS2.shape)
print(TS3.shape)
print(TS4.shape)
print(VS1.shape)
print(CE1.shape)
print(CP1.shape)
print(SE1.shape)

X = pd.concat([PS1, PS2, PS3, PS4, PS5, PS6, FS1, FS2, TS1, TS2, TS3, TS4, P1, VS1, CE1, CP1, SE1], axis=1)
# del PS1, PS2, PS3, PS4, PS5, PS6, pressureFile3, pressureFile4, pressureFile5, pressureFile6, pressureFile1, pressureFile2, FS1, FS2, volumeFlow1, volumeFlow2
# del temperature1, temperature2, temperature3, temperature4, pump1, vibration1, coolingE1, coolingP1, effFactor1
# del profile, TS1, TS2, TS3, TS4, P1, VS1, CE1, CP1, SE1
gc.collect()

(2205, 600)
(2205, 600)
(2205, 600)
(2205, 600)
(2205, 600)
(2205, 600)
(2205, 600)
(2205, 600)
(2205, 600)
(2205, 600)
(2205, 600)
(2205, 600)
(2205, 600)
(2205, 600)
(2205, 600)
(2205, 600)
(2205, 600)


29329

In [58]:
scaler = MinMaxScaler()
X = scaler.fit_transform(X)
X = pd.DataFrame(X)
print(X.shape)

(2205, 10200)


In [59]:
xgb_(X, y_stableFlag)

Fold 1:
              precision    recall  f1-score   support

           0       0.97      0.99      0.98       310
           1       0.97      0.94      0.95       131

    accuracy                           0.97       441
   macro avg       0.97      0.96      0.97       441
weighted avg       0.97      0.97      0.97       441

Fold 1 F1-score: 0.9727
Fold 1 Accuracy: 0.9728
Fold 1 Precision: 0.9727
Confusion Matrix:
[[306   4]
 [  8 123]]
------------------------------
Fold 2:
              precision    recall  f1-score   support

           0       0.95      0.97      0.96       277
           1       0.96      0.91      0.93       164

    accuracy                           0.95       441
   macro avg       0.95      0.94      0.95       441
weighted avg       0.95      0.95      0.95       441

Fold 2 F1-score: 0.9498
Fold 2 Accuracy: 0.9501
Fold 2 Precision: 0.9503
Confusion Matrix:
[[270   7]
 [ 15 149]]
------------------------------
Fold 3:
              precision    recal

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [60]:
svm(X,y_stableFlag)

Precision scores for each fold: [0.9130434782608695, 0.8979591836734694, 0.8363636363636363, 0.7543859649122807, 0.8125]
Average precision: 0.842850452642051
Average accuracy: 0.7621233135796242
Confusion matrix for training set:
 [[962  41]
 [326 214]]
Test set accuracy: 0.9018126888217523
Test set precision: 0.8296943231441049
Test set confusion matrix:
 [[407  39]
 [ 26 190]]


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [61]:
randomforest(X,y_stableFlag)

Fold 1 Classification Report:
              precision    recall  f1-score   support

           0       0.96      1.00      0.98       220
           1       1.00      0.93      0.96       133

    accuracy                           0.97       353
   macro avg       0.98      0.97      0.97       353
weighted avg       0.98      0.97      0.97       353

Fold 2 Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.99      0.99       234
           1       0.98      0.97      0.98       119

    accuracy                           0.99       353
   macro avg       0.99      0.98      0.98       353
weighted avg       0.99      0.99      0.99       353

Fold 3 Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.99       232
           1       1.00      0.94      0.97       121

    accuracy                           0.98       353
   macro avg       0.99      0.97     

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [62]:
neuralnetwork(X,y_stableFlag)

Epoch 1/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - accuracy: 0.6143 - loss: 50.5408 - val_accuracy: 0.6232 - val_loss: 7.2022
Epoch 2/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.6407 - loss: 5.6054 - val_accuracy: 0.6232 - val_loss: 3.5394
Epoch 3/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.7023 - loss: 3.3782 - val_accuracy: 0.4731 - val_loss: 3.1462
Epoch 4/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step - accuracy: 0.6636 - loss: 2.8848 - val_accuracy: 0.7507 - val_loss: 2.5650
Epoch 5/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.7351 - loss: 2.3516 - val_accuracy: 0.6261 - val_loss: 2.0978
Epoch 6/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.7618 - loss: 2.0444 - val_accuracy: 0.6544 - val_loss: 1.9039
Epoch 7/100
[1m45/45[0m 

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Approach 3 - Mean of a cycle for each sensor value

In [78]:
PS1 = pd.DataFrame(mean_conversion(pressureFile1))
PS1.columns = ['PS1'] # Rename the column to 'PS1'

PS2 = pd.DataFrame(mean_conversion(pressureFile2))
PS2.columns = ['PS2']

PS3 = pd.DataFrame(mean_conversion(pressureFile3))
PS3.columns = ['PS3']

PS4 = pd.DataFrame(mean_conversion(pressureFile4))
PS4.columns = ['PS4']

PS5 = pd.DataFrame(mean_conversion(pressureFile5))
PS5.columns = ['PS5']

PS6 = pd.DataFrame(mean_conversion(pressureFile6))
PS6.columns = ['PS6']

FS1 = pd.DataFrame(mean_conversion(volumeFlow1))
FS1.columns = ['FS1']

FS2 = pd.DataFrame(mean_conversion(volumeFlow2))
FS2.columns = ['FS2']

TS1 = pd.DataFrame(mean_conversion(temperature1))
TS1.columns = ['TS1']

TS2 = pd.DataFrame(mean_conversion(temperature2))
TS2.columns = ['TS2']

TS3 = pd.DataFrame(mean_conversion(temperature3))
TS3.columns = ['TS3']

TS4 = pd.DataFrame(mean_conversion(temperature4))
TS4.columns = ['TS4']

P1 = pd.DataFrame(mean_conversion(pump1))
P1.columns = ['P1']

VS1 = pd.DataFrame(mean_conversion(vibration1))
VS1.columns = ['VS1']

CE1 = pd.DataFrame(mean_conversion(coolingE1))
CE1.columns = ['CE1']

CP1 = pd.DataFrame(mean_conversion(coolingP1))
CP1.columns = ['CP1']

SE1 = pd.DataFrame(mean_conversion(effFactor1))
SE1.columns = ['SE1']

In [79]:
print(PS1.shape)
print(PS2.shape)
print(PS3.shape)
print(PS4.shape)
print(PS5.shape)
print(PS6.shape)
print(P1.shape)
print(FS1.shape)
print(FS2.shape)
print(TS1.shape)
print(TS2.shape)
print(TS3.shape)
print(TS4.shape)
print(VS1.shape)
print(CE1.shape)
print(CP1.shape)
print(SE1.shape)

X = pd.concat([PS1, PS2, PS3, PS4, PS5, PS6, FS1, FS2, TS1, TS2, TS3, TS4, P1, VS1, CE1, CP1, SE1], axis=1)
# del PS1, PS2, PS3, PS4, PS5, PS6, pressureFile3, pressureFile4, pressureFile5, pressureFile6, pressureFile1, pressureFile2, FS1, FS2, volumeFlow1, volumeFlow2
# del temperature1, temperature2, temperature3, temperature4, pump1, vibration1, coolingE1, coolingP1, effFactor1
# del profile, TS1, TS2, TS3, TS4, P1, VS1, CE1, CP1, SE1
gc.collect()

(2205, 1)
(2205, 1)
(2205, 1)
(2205, 1)
(2205, 1)
(2205, 1)
(2205, 1)
(2205, 1)
(2205, 1)
(2205, 1)
(2205, 1)
(2205, 1)
(2205, 1)
(2205, 1)
(2205, 1)
(2205, 1)
(2205, 1)


25726

In [80]:
scaler = MinMaxScaler()
X = scaler.fit_transform(X)
X = pd.DataFrame(X)
print(X.shape)

(2205, 17)


In [81]:
xgb_(X, y_stableFlag)

Fold 1:
              precision    recall  f1-score   support

           0       0.96      0.99      0.98       310
           1       0.98      0.90      0.94       131

    accuracy                           0.97       441
   macro avg       0.97      0.95      0.96       441
weighted avg       0.97      0.97      0.97       441

Fold 1 F1-score: 0.9655
Fold 1 Accuracy: 0.9660
Fold 1 Precision: 0.9666
Confusion Matrix:
[[308   2]
 [ 13 118]]
------------------------------
Fold 2:
              precision    recall  f1-score   support

           0       0.95      1.00      0.97       277
           1       0.99      0.91      0.95       164

    accuracy                           0.97       441
   macro avg       0.97      0.96      0.96       441
weighted avg       0.97      0.97      0.97       441

Fold 2 F1-score: 0.9657
Fold 2 Accuracy: 0.9660
Fold 2 Precision: 0.9672
Confusion Matrix:
[[276   1]
 [ 14 150]]
------------------------------
Fold 3:
              precision    recal

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [82]:
randomforest(X,y_stableFlag)

Fold 1 Classification Report:
              precision    recall  f1-score   support

           0       0.95      1.00      0.98       220
           1       1.00      0.92      0.96       133

    accuracy                           0.97       353
   macro avg       0.98      0.96      0.97       353
weighted avg       0.97      0.97      0.97       353

Fold 2 Classification Report:
              precision    recall  f1-score   support

           0       0.97      1.00      0.98       234
           1       0.99      0.94      0.97       119

    accuracy                           0.98       353
   macro avg       0.98      0.97      0.97       353
weighted avg       0.98      0.98      0.98       353

Fold 3 Classification Report:
              precision    recall  f1-score   support

           0       0.95      1.00      0.97       232
           1       0.99      0.90      0.94       121

    accuracy                           0.96       353
   macro avg       0.97      0.95     

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [83]:
svm(X,y_stableFlag)

Precision scores for each fold: [0.9565217391304348, 1.0, 0.9393939393939394, 0.90625, 0.96]
Average precision: 0.9524331357048748
Average accuracy: 0.7290925902576388
Confusion matrix for training set:
 [[996   7]
 [411 129]]
Test set accuracy: 0.7507552870090635
Test set precision: 0.9180327868852459
Test set confusion matrix:
 [[441   5]
 [160  56]]


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [84]:
neuralnetwork(X,y_stableFlag)

Epoch 1/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.5616 - loss: 5.3362 - val_accuracy: 0.6232 - val_loss: 4.1475
Epoch 2/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6626 - loss: 3.8213 - val_accuracy: 0.6232 - val_loss: 3.0020
Epoch 3/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6545 - loss: 2.7516 - val_accuracy: 0.6232 - val_loss: 2.1332
Epoch 4/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6691 - loss: 1.9323 - val_accuracy: 0.6232 - val_loss: 1.4965
Epoch 5/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6722 - loss: 1.3595 - val_accuracy: 0.6232 - val_loss: 1.0927
Epoch 6/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6435 - loss: 1.0177 - val_accuracy: 0.6232 - val_loss: 0.8880
Epoch 7/100
[1m45/45[0m [32m━━━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.4986 - loss: 5.3759 - val_accuracy: 0.6629 - val_loss: 4.1677
Epoch 2/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6583 - loss: 3.8586 - val_accuracy: 0.6629 - val_loss: 3.0136
Epoch 3/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6341 - loss: 2.7988 - val_accuracy: 0.6629 - val_loss: 2.1586
Epoch 4/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6661 - loss: 1.9900 - val_accuracy: 0.6629 - val_loss: 1.5483
Epoch 5/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6456 - loss: 1.4464 - val_accuracy: 0.6629 - val_loss: 1.1478
Epoch 6/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6551 - loss: 1.0893 - val_accuracy: 0.6629 - val_loss: 0.9225
Epoch 7/100
[1m45/45[0m [32m━━━━━━━━━━━━━━━

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>