In [None]:
# Predictive Maintenance for Base Stations - Notebook

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
import joblib

# 1. Load dataset
df = pd.read_csv('base_station_dataset.csv', parse_dates=['timestamp'])
print("Shape:", df.shape)
df.head()

# 2. Exploratory Data Analysis (EDA)
print(df.describe())

# Plot failure distribution
sns.countplot(x='failure', data=df)
plt.title('Failure Class Distribution')
plt.show()

# Correlation heatmap
plt.figure(figsize=(10,6))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Feature Correlation Heatmap')
plt.show()

# 3. Feature Engineering
# Drop timestamp for modeling (can extract features later if needed)
X = df.drop(columns=['failure','timestamp'])
y = df['failure']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42)

# 4. Model Training
clf = RandomForestClassifier(n_estimators=200, class_weight='balanced', random_state=42)
clf.fit(X_train, y_train)

# 5. Evaluation
y_pred = clf.predict(X_test)
y_proba = clf.predict_proba(X_test)[:,1]

print(classification_report(y_test, y_pred))
print("ROC AUC:", roc_auc_score(y_test, y_proba))

cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted'); plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()

# 6. Save model
joblib.dump(clf, 'rf_model.joblib')
print("Model saved as rf_model.joblib")
