In [34]:
import numpy as np
import pandas as pd

In [35]:
df_horizontal = pd.read_csv("anomaly_horizontal.csv")
df_vertical = pd.read_csv("anomaly_vertical.csv")
df_axial = pd.read_csv("anomaly_axial.csv")

In [36]:
l1 = [df_horizontal,df_vertical,df_axial]
for i in l1:
    print (i.columns)

Index(['CF2_MNDE_H_VBT', 'anomaly'], dtype='object')
Index(['CF2_MNDE_V_VBT', 'anomaly'], dtype='object')
Index(['CF2_MOTOR_NDE_AXIAL_VIB', 'anomaly'], dtype='object')


In [37]:
import pandas as pd



# Combine the DataFrames into a single DataFrame
combined_df = pd.concat([
    df_horizontal[['CF2_MNDE_H_VBT', 'anomaly']],
    df_vertical[['CF2_MNDE_V_VBT', 'anomaly']],
    df_axial[['CF2_MOTOR_NDE_AXIAL_VIB', 'anomaly']]
], axis=1)

# Rename columns to avoid conflicts
combined_df.columns = ['CF2_MNDE_H_VBT', 'anomaly_H', 'CF2_MNDE_V_VBT', 'anomaly_V', 'CF2_MOTOR_NDE_AXIAL_VIB', 'anomaly_A']

# Define the condition based on the anomaly values
def determine_condition(row):
    anomaly_values = [row['anomaly_H'], row['anomaly_V'], row['anomaly_A']]
    negative_count = anomaly_values.count(-1)
    if negative_count == 0:
        return 'Good'
    elif negative_count == 1:
        return 'Moderate'
    elif negative_count == 2:
        return 'Critical'
    elif negative_count == 3:
        return 'Failure'

# Apply the condition function to each row
combined_df['condition'] = combined_df.apply(determine_condition, axis=1)

# Drop the anomaly columns if not needed anymore
combined_df = combined_df.drop(columns=['anomaly_H', 'anomaly_V', 'anomaly_A'])

# Display the final DataFrame
combined_df


Unnamed: 0,CF2_MNDE_H_VBT,CF2_MNDE_V_VBT,CF2_MOTOR_NDE_AXIAL_VIB,condition
0,0.650,0.420,1.000,Good
1,0.646,0.422,1.010,Good
2,0.642,0.424,1.020,Good
3,0.638,0.426,1.030,Good
4,0.634,0.428,1.040,Good
...,...,...,...,...
2591991,0.654,0.408,0.866,Good
2591992,0.648,0.406,0.862,Good
2591993,0.642,0.404,0.858,Good
2591994,0.636,0.402,0.854,Good


In [38]:
combined_df["condition"].value_counts()

condition
Good        2179880
Moderate     196458
Failure      143975
Critical      71683
Name: count, dtype: int64

In [39]:
from sklearn.model_selection import train_test_split
X = combined_df[['CF2_MNDE_H_VBT', 'CF2_MNDE_V_VBT', 'CF2_MOTOR_NDE_AXIAL_VIB']]
y = combined_df['condition']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)



In [40]:
from catboost import CatBoostClassifier
model = CatBoostClassifier(iterations=100, learning_rate=0.1, depth=6, verbose=False)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)



In [41]:
from sklearn.metrics import confusion_matrix, accuracy_score
cf = confusion_matrix(y_test,y_pred)
acc = accuracy_score(y_test,y_pred)

print(acc)
print(cf)

0.9978395033944231
[[ 21220     34      1    250]
 [    41  43152      0      0]
 [     0      0 653835    129]
 [    70      0   1155  57712]]


In [42]:
import joblib
model_filename = 'catboost_model.joblib'
joblib.dump(model, model_filename)

['catboost_model.joblib']

In [50]:
loaded_model = joblib.load('catboost_model.joblib')

new_data = pd.DataFrame({
    'CF2_MNDE_H_VBT': [6.695],  # Example value, replace with actual data
    'CF2_MNDE_V_VBT': [0.40],  # Example value, replace with actual data
    'CF2_MOTOR_NDE_AXIAL_VIB': [0.86]  # Example value, replace with actual data
})

predicted_label = loaded_model.predict(new_data)

print(f"Predicted Condition: {predicted_label[0]}")

Predicted Condition: ['Moderate']


In [52]:
combined_df.to_csv("train.csv",index = False)