In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_csv("/content/Fuel_cell_performance_data-Full.csv")

# Drop unnecessary columns
data = data.drop(columns=['Target1', 'Target2', 'Target3', 'Target5'])

# Define features and target
X = data.drop(columns=['Target4'])
y = data['Target4']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize models
models = {
    "Linear Regression": LinearRegression(),
    "Random Forest": RandomForestRegressor(random_state=42),
    "Support Vector Regressor": SVR()
}

# Dictionary to store results
results = []

# Train and evaluate each model
for model_name, model in models.items():
    # Train the model
    model.fit(X_train, y_train)

    # Make predictions
    y_pred = model.predict(X_test)

    # Evaluate the model
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    # Store the results
    results.append({
        "Model": model_name,
        "Mean Squared Error": mse,
        "R-squared": r2
    })

    # Plot actual vs predicted
    plt.figure(figsize=(8, 6))
    plt.scatter(y_test, y_pred, alpha=0.6, label="Predictions")
    plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], color='red', linestyle='--', label="Ideal Fit")
    plt.title(f"{model_name}: Actual vs Predicted")
    plt.xlabel("Actual Values")
    plt.ylabel("Predicted Values")
    plt.legend()
    plt.savefig(f"{model_name}_plot.png")
    plt.close()

# Convert results to DataFrame and save as CSV
results_df = pd.DataFrame(results)
results_df.to_csv("model_results.csv", index=False)

# Identify the best model based on R-squared
best_model = results_df.loc[results_df['R-squared'].idxmax()]
best_model_name = best_model['Model']
best_model_r2 = best_model['R-squared']

print(f"The best model is {best_model_name} with an R-squared value of {best_model_r2:.4f}.")


The best model is Linear Regression with an R-squared value of -0.0170.
