<a href="https://colab.research.google.com/github/dineshdinz12/Machine_learning/blob/main/Oil_Well_Pump.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
np.random.seed(42)
n_samples = 1000

data = pd.DataFrame({
    'pump_speed': np.random.uniform(10, 20, n_samples),
    'motor_current': np.random.uniform(20, 50, n_samples),
    'vibration': np.random.uniform(0.1, 2.0, n_samples),
    'production_rate': np.random.uniform(100, 500, n_samples),
    'motor_temperature': np.random.uniform(50, 100, n_samples),
    'days_since_maintenance': np.random.randint(0, 365, n_samples)
})


data['maintenance_needed'] = (
    (data['vibration'] > 1.5) |
    (data['motor_current'] > 45) |
    (data['motor_temperature'] > 90) |
    (data['days_since_maintenance'] > 300)
).astype(int)


X = data.drop('maintenance_needed', axis=1)
y = data['maintenance_needed']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)


y_pred = rf_model.predict(X_test_scaled)


print("Classification Report:")
print(classification_report(y_test, y_pred))


cm = confusion_matrix(y_test, y_pred)

plt.show()

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.98      0.99       213
           1       0.99      1.00      0.99       287

    accuracy                           0.99       500
   macro avg       0.99      0.99      0.99       500
weighted avg       0.99      0.99      0.99       500



In [None]:

def predict_maintenance(new_data):
    new_data_scaled = scaler.transform(new_data)
    prediction = rf_model.predict(new_data_scaled)
    probability = rf_model.predict_proba(new_data_scaled)[:, 1]
    return prediction, probability


new_pump_data = pd.DataFrame({
    'pump_speed': [15],
    'motor_current': [45],
    'vibration': [1.2],
    'production_rate': [300],
    'motor_temperature': [80],
    'days_since_maintenance': [325]
})

prediction, probability = predict_maintenance(new_pump_data)
print("\nNew Pump Data:")
print(new_pump_data)
print(f"Maintenance Needed: {'Yes' if prediction[0] == 1 else 'No'}")
print(f"Probability of Needing Maintenance: {probability[0]:.2f}")


New Pump Data:
   pump_speed  motor_current  vibration  production_rate  motor_temperature  \
0          15             45        1.2              300                 80   

   days_since_maintenance  
0                     325  
Maintenance Needed: Yes
Probability of Needing Maintenance: 0.95
