In [None]:
!pip install pandas scikit-learn matplotlib seaborn

# Predictive Maintenance of Nuclear Reactors

## Introduction

In this notebook, we will develop a machine learning model to predict potential failures in nuclear reactors by analyzing sensor data from the AI4I 2020 Predictive Maintenance Dataset. This predictive maintenance approach can help in timely maintenance and prevent catastrophic failures.

### Objectives
- Data Collection and Preprocessing
- Exploratory Data Analysis (EDA)
- Model Selection and Training
- Model Evaluation
- Conclusion and Future Work

### Evaluation Parameters
- Accuracy
- Precision
- Recall
- F1-Score
- ROC-AUC

### Scoring Method
Each model will be scored based on the weighted average of the above metrics, with specific weights assigned based on the hackathon criteria.

In [None]:

## Data Collection
import pandas as pd

# Load the dataset
data_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00601/ai4i2020.csv'
df = pd.read_csv(data_url)

# Display the first few rows of the dataset
df.head()


In [None]:

## Data Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Handle missing values
df = df.dropna()

# Encode categorical variables if any
df = pd.get_dummies(df)

# Feature scaling
scaler = StandardScaler()
scaled_features = scaler.fit_transform(df.drop('Machine failure', axis=1))

# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(scaled_features, df['Machine failure'], test_size=0.2, random_state=42)


In [None]:

## Exploratory Data Analysis (EDA)
import matplotlib.pyplot as plt
import seaborn as sns

# Visualizing sensor data
sns.pairplot(df)
plt.show()


In [None]:

# Identifying patterns and anomalies
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.show()


In [None]:

## Model Selection
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Instantiate the models
rf_model = RandomForestClassifier(random_state=42)
svc_model = SVC(random_state=42, probability=True)


In [None]:

## Model Training
# Train the Random Forest model
rf_model.fit(X_train, y_train)

# Train the SVM model
svc_model.fit(X_train, y_train)


In [None]:

## Model Evaluation
def evaluate_model(model, X_test, y_test):
    predictions = model.predict(X_test)
    probas = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else None
    accuracy = accuracy_score(y_test, predictions)
    precision = precision_score(y_test, predictions)
    recall = recall_score(y_test, predictions)
    f1 = f1_score(y_test, predictions)
    roc_auc = roc_auc_score(y_test, probas) if probas is not None else None
    
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1-Score: {f1:.4f}")
    if roc_auc:
        print(f"ROC-AUC: {roc_auc:.4f}")
    return accuracy, precision, recall, f1, roc_auc


In [None]:

# Evaluate Random Forest model
print("Random Forest Model Performance:")
rf_metrics = evaluate_model(rf_model, X_test, y_test)

# Evaluate SVM model
print("\nSVM Model Performance:")
svc_metrics = evaluate_model(svc_model, X_test, y_test)

In [None]:

# Evaluate SVM model
print("\nSVM Model Performance:")
svc_metrics = evaluate_model(svc_model, X_test, y_test)