In [3]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np

# Loading the dataset
file_path = 'cereal.csv'
data = pd.read_csv(file_path)

# Dropping the 'name' and 'mfr' columns as they are not needed for the analysis
data.drop(['name', 'mfr'], axis=1, inplace=True)

# Converting the 'type' column to binary values: 0 for 'C' and 1 for 'H'
data['type'] = data['type'].apply(lambda x: 0 if x == 'C' else 1)

# Defining the feature variables and the target variable
X = data.drop('rating', axis=1)  # Feature variables
y = data['rating']  # Target variable

# Splitting the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initializing the models
rf_model = RandomForestRegressor(random_state=42)
svm_model = SVR()
gb_model = GradientBoostingRegressor(random_state=42)

# Training the models
rf_model.fit(X_train, y_train)
svm_model.fit(X_train, y_train)
gb_model.fit(X_train, y_train)

# Making predictions
rf_predictions = rf_model.predict(X_test)
svm_predictions = svm_model.predict(X_test)
gb_predictions = gb_model.predict(X_test)

# Defining a function to calculate metrics
def calculate_metrics(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_true, y_pred)
    r2 = r2_score(y_true, y_pred)
    return mse, rmse, mae, r2

# Calculate metrics for each model
rf_mse, rf_rmse, rf_mae, rf_r2 = calculate_metrics(y_test, rf_predictions)
svm_mse, svm_rmse, svm_mae, svm_r2 = calculate_metrics(y_test, svm_predictions)
gb_mse, gb_rmse, gb_mae, gb_r2 = calculate_metrics(y_test, gb_predictions)

# Print the metrics
print("Random Forest Metrics: MSE = {:.2f}, RMSE = {:.2f}, MAE = {:.2f}, R2 = {:.2f}".format(rf_mse, rf_rmse, rf_mae, rf_r2))
print("SVM Metrics: MSE = {:.2f}, RMSE = {:.2f}, MAE = {:.2f}, R2 = {:.2f}".format(svm_mse, svm_rmse, svm_mae, svm_r2))
print("Gradient Boosting Metrics: MSE = {:.2f}, RMSE = {:.2f}, MAE = {:.2f}, R2 = {:.2f}".format(gb_mse, gb_rmse, gb_mae, gb_r2))

Random Forest Metrics: MSE = 37.00, RMSE = 6.08, MAE = 5.12, R2 = 0.83
SVM Metrics: MSE = 191.51, RMSE = 13.84, MAE = 11.35, R2 = 0.13
Gradient Boosting Metrics: MSE = 25.50, RMSE = 5.05, MAE = 3.89, R2 = 0.88
