In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor, VotingRegressor
from skelm import ELMRegressor
from xgboost import XGBRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.base import BaseEstimator, RegressorMixin
import warnings
warnings.filterwarnings('ignore')

# Load the dataset
df = pd.read_csv("/content/e-shop clothing 2008.csv", delimiter=';')

# Data cleaning
# Drop unnecessary columns
columns_to_drop = ['year', 'month', 'day', 'session ID', 'model photography', 'page']
df.drop(columns_to_drop, axis=1, inplace=True)

# Check if 'price 2' column is present before dropping it
if 'price 2' in df.columns:
    df.drop('price 2', axis=1, inplace=True)

# Data preprocessing
# Perform label encoding for categorical variables
encoders = {}
for col in df.columns:
    if df[col].dtype == 'object':
        label_encoder = LabelEncoder()
        df[col] = label_encoder.fit_transform(df[col])
        encoders[col] = label_encoder

# Separate features and target variable
X = df.drop('price', axis=1)
y = df['price']

# Selecting best features
selected_features = ['page 1 (main category)', 'page 2 (clothing model)', 'colour', 'location']
X_selected = X[selected_features]

# Splitting the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Model Building
# XGBoost
xgb_model = XGBRegressor()
xgb_model.fit(X_train, y_train)

# Extreme Machine Learning Model (ELM)
elm_model = ELMRegressor()
elm_model.fit(X_train, y_train)

# Basic Deep Learning Model with two layers (Neural Network)
model = Sequential([
    Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(16, activation='relu'),
    Dense(1)
])
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(X_train, y_train, epochs=100, verbose=0)

# Wrapper class for Keras model
class KerasRegressorWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, model):
        self.model = model

    def fit(self, X, y):
        self.model.fit(X, y, epochs=100, verbose=0)
        return self

    def predict(self, X):
        return self.model.predict(X).flatten()

# Create wrapper for the Keras model
keras_wrapper = KerasRegressorWrapper(model)

# Ensemble model containing the top 3 models overall
voting_model = VotingRegressor(estimators=[
    ('XGBoost', xgb_model),
    ('ELM', elm_model),
    ('NeuralNetwork', keras_wrapper)
])

# Fit the voting regressor on the training data
voting_model.fit(X_train, y_train)

1. **XGBoost (Extreme Gradient Boosting)**:
   - XGBoost is an implementation of gradient boosting decision trees designed for speed and performance.
   - It builds multiple decision trees sequentially, where each tree corrects the errors of the previous one.
   - XGBoost is known for its scalability, efficiency, and effectiveness in various machine learning competitions. (Source: [XGBoost Documentation](https://xgboost.readthedocs.io/en/latest/))

2. **ELM (Extreme Learning Machine)**:
   - ELM is a single-layer feedforward neural network with randomly generated hidden layer weights and analytically computed output weights.
   - It is particularly suited for large-scale learning tasks due to its fast training speed and good generalization performance.
   - ELM is often used in regression and classification tasks, especially in scenarios with a large number of input features. (Source: [ELM Paper](https://ieeexplore.ieee.org/document/6255770))

3. **Neural Network**:
   - Neural networks are a class of machine learning models inspired by the structure and function of the human brain.
   - They consist of interconnected layers of nodes (neurons) that process input data and learn to make predictions through iterative training.
   - Neural networks are highly flexible and can model complex patterns in data, making them suitable for a wide range of tasks including regression, classification, and pattern recognition. (Source: [Neural Networks Overview](https://builtin.com/data-science/neural-network))

These models offer various strengths and are chosen based on their performance and suitability for the given task.

In [None]:
# Evaluate the ensemble model
y_pred = voting_model.predict(X_test)
ensemble_mse = mean_squared_error(y_test, y_pred)
ensemble_mae = mean_absolute_error(y_test, y_pred)
ensemble_rmse = mean_squared_error(y_test, y_pred, squared=False)
ensemble_r2 = r2_score(y_test, y_pred)
# Evaluation metrics
print("Ensemble Model MSE:", ensemble_mse)
print("Ensemble Model MAE:", ensemble_mae)
print("Ensemble Model RMSE:", ensemble_rmse)
print("Ensemble Model R2 Score:", ensemble_r2)
# Evaluate the ensemble model
ensemble_score = voting_model.score(X_test, y_test)
print("Ensemble Model Score:", ensemble_score)

Ensemble Model MSE: 26.032677640973557
Ensemble Model MAE: 4.102363093551126
Ensemble Model RMSE: 5.102222813732614
Ensemble Model R2 Score: 0.8355782724940249
Ensemble Model Score: 0.8355782724940249
