In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline  # Added import for Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

# Set random seed for reproducibility
np.random.seed(42)

# Load the dataset (replace with actual path or URL)
# Example: df = pd.read_csv('https://www.kaggle.com/datasets/fedesoriano/synchronous-machinedataset')
# For demonstration, assume dataset is loaded locally
# Replace 'SynchronousMachine.csv' with the actual file path
df = pd.read_csv('SynchronousMachine.csv')  # Update this path

# Rename columns as per the document (Page 24)
df.columns = ['load_current', 'power_factor', 'power_factor_error', 'excitation_current_change', 'excitation_current']

# Prepare features (X) and target (y)
X = df.drop('excitation_current', axis=1)
y = df['excitation_current']

# Split data into training and test sets (80-20 split, Page 10)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize models
models = {
    'Linear Regression': LinearRegression(),
    'Decision Tree': DecisionTreeRegressor(random_state=42),
    'Random Forest': RandomForestRegressor(random_state=42),
    'XGBoost': XGBRegressor(random_state=42)
}

# Dictionary to store results
results = {}

# Train and evaluate each model
for name, model in models.items():
    # Create pipeline with standardization (Page 24)
    pipeline = Pipeline([
        ('scaler', StandardScaler()),
        ('model', model)
    ])
    
    # Train model
    pipeline.fit(X_train, y_train)
    
    # Make predictions
    y_pred = pipeline.predict(X_test)
    
    # Calculate metrics
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage
    
    # Store results
    results[name] = {
        'R-squared': r2,
        'MAE': mae,
        'MSE': mse,
        'RMSE': rmse,
        'MAPE': mape
    }

# Display results
print("Model Performance Metrics:")
for name, metrics in results.items():
    print(f"\n{name}:")
    print(f"R-squared: {metrics['R-squared']:.6f}")
    print(f"MAE: {metrics['MAE']:.6f}")
    print(f"MSE: {metrics['MSE']:.6f}")
    print(f"RMSE: {metrics['RMSE']:.6f}")
    print(f"MAPE: {metrics['MAPE']:.6f}%")

Model Performance Metrics:

Linear Regression:
R-squared: 1.000000
MAE: 0.000000
MSE: 0.000000
RMSE: 0.000000
MAPE: 0.000000%

Decision Tree:
R-squared: 0.999695
MAE: 0.001911
MSE: 0.000010
RMSE: 0.003204
MAPE: 0.122591%

Random Forest:
R-squared: 0.999828
MAE: 0.001302
MSE: 0.000006
RMSE: 0.002407
MAPE: 0.083307%

XGBoost:
R-squared: 0.999514
MAE: 0.002743
MSE: 0.000016
RMSE: 0.004042
MAPE: 0.174964%
