In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor, VotingRegressor
from skelm import ELMRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.base import BaseEstimator, RegressorMixin

import warnings
warnings.filterwarnings('ignore')

# Load the dataset
df = pd.read_csv("/content/e-shop clothing 2008.csv", delimiter=';')

# Drop unnecessary columns
columns_to_drop = ['year', 'month', 'day', 'session ID', 'model photography', 'page']
df.drop(columns_to_drop, axis=1, inplace=True)

# Check if 'price 2' column is present before dropping it
if 'price 2' in df.columns:
    df.drop('price 2', axis=1, inplace=True)

# Perform label encoding for categorical variables
encoders = {}
for col in df.columns:
    if df[col].dtype == 'object':
        label_encoder = LabelEncoder()
        df[col] = label_encoder.fit_transform(df[col])
        encoders[col] = label_encoder

# Separate features and target variable
X = df.drop('price', axis=1)
y = df['price']

# Selecting best features
selected_features = ['page 1 (main category)', 'page 2 (clothing model)', 'colour', 'location']
X_selected = X[selected_features]

# Splitting the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = MinMaxScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

# Wrapper class for Keras model
class KerasRegressorWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, model):
        self.model = model

    def fit(self, X, y):
        self.model.fit(X, y, epochs=100, verbose=0)
        return self

    def predict(self, X):
        return self.model.predict(X).flatten()

# Model Building
# XGBoost
xgb_model = XGBRegressor()
xgb_model.fit(X_train, y_train)

# Extreme Machine Learning Model (ELM)
elm_model = ELMRegressor()
elm_model.fit(X_train, y_train)

# Basic Deep Learning Model with two layers (Neural Network)
model = Sequential([
    Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(16, activation='relu'),
    Dense(1)
])
model.compile(optimizer='adam', loss='mean_squared_error')

# Create wrapper for the Keras model
keras_wrapper = KerasRegressorWrapper(model)

# Ensemble model containing the top 3 models overall
voting_model = VotingRegressor(estimators=[
    ('XGBoost', xgb_model),
    ('ELM', elm_model),
    ('NeuralNetwork', keras_wrapper)
])

# Fit the voting regressor on the training data
voting_model.fit(X_train, y_train)

# Evaluate the models
models = {
    "XGBoost": xgb_model,
    "ELM": elm_model,
    "NeuralNetwork": keras_wrapper,
    "Ensemble": voting_model
}

results = {}
for name, model in models.items():
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    results[name] = {"MSE": mse, "R2": r2}

# Presenting the test results
results_df = pd.DataFrame(results).T
print("Test Results:")
print(results_df)



Test Results:
                       MSE         R2
XGBoost           0.003746   0.999976
ELM             103.544449   0.346016
NeuralNetwork  2060.526055 -12.014230
Ensemble         31.752322   0.799453


In [None]:
# Evaluate the Score of models
for name, model in models.items():
  print(name," Model Score : \n", model.score(X_test, y_test))

XGBoost  Model Score : 
 0.9999763409973748
ELM  Model Score : 
 0.34601590429350826
NeuralNetwork  Model Score : 
 -12.014229969501123
Ensemble  Model Score : 
 0.7994531471928811


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor, VotingRegressor
from skelm import ELMRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.base import BaseEstimator, RegressorMixin

import warnings
warnings.filterwarnings('ignore')

# Load the dataset
df = pd.read_csv("/content/e-shop clothing 2008.csv", delimiter=';')

# Data cleaning
# Drop unnecessary columns
columns_to_drop = ['year', 'month', 'day', 'session ID', 'model photography', 'page']
df.drop(columns_to_drop, axis=1, inplace=True)

# Check for missing values
missing_values = df.isnull().sum()
if missing_values.any():
    print("Missing values detected. Handling missing values...")
    df.fillna(method='ffill', inplace=True)  # Forward fill missing values

# Check if 'price 2' column is present before dropping it
if 'price 2' in df.columns:
    df.drop('price 2', axis=1, inplace=True)

# Data preprocessing
# Perform label encoding for categorical variables
encoders = {}
for col in df.columns:
    if df[col].dtype == 'object':
        label_encoder = LabelEncoder()
        df[col] = label_encoder.fit_transform(df[col])
        encoders[col] = label_encoder

# Separate features and target variable
X = df.drop('price', axis=1)
y = df['price']

# Selecting best features
selected_features = ['page 1 (main category)', 'page 2 (clothing model)', 'colour', 'location']
X_selected = X[selected_features]

# Splitting the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_selected, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = MinMaxScaler()
X_train = pd.DataFrame(scaler.fit_transform(X_train), columns=X_train.columns)
X_test = pd.DataFrame(scaler.transform(X_test), columns=X_test.columns)

# Wrapper class for Keras model
class KerasRegressorWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, model):
        self.model = model

    def fit(self, X, y):
        self.model.fit(X, y, epochs=100, verbose=0)
        return self

    def predict(self, X):
        return self.model.predict(X).flatten()

# Model Building
# XGBoost
xgb_model = XGBRegressor()
xgb_model.fit(X_train, y_train)

# Extreme Machine Learning Model (ELM)
elm_model = ELMRegressor()
elm_model.fit(X_train, y_train)

# Basic Deep Learning Model with two layers (Neural Network)
model = Sequential([
    Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(16, activation='relu'),
    Dense(1)
])
model.compile(optimizer='adam', loss='mean_squared_error')

# Create wrapper for the Keras model
keras_wrapper = KerasRegressorWrapper(model)

# Ensemble model containing the top 3 models overall
voting_model = VotingRegressor(estimators=[
    ('XGBoost', xgb_model),
    ('ELM', elm_model),
    ('NeuralNetwork', keras_wrapper)
])

# Fit the voting regressor on the training data
voting_model.fit(X_train, y_train)

# Evaluate the models
models = {
    "XGBoost": xgb_model,
    "ELM": elm_model,
    "NeuralNetwork": keras_wrapper,
    "Ensemble": voting_model
}

results = {}
for name, model in models.items():
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    results[name] = {"MSE": mse, "R2": r2}

# Presenting the test results
results_df = pd.DataFrame(results).T
print("Test Results:")
print(results_df)



Test Results:
                       MSE         R2
XGBoost           0.003746   0.999976
ELM             102.310890   0.353807
NeuralNetwork  2075.365198 -12.107954
Ensemble         25.973726   0.835951


In [None]:
# Evaluate the Score of models
for name, model in models.items():
    print(name," Model Score : \n", model.score(X_test, y_test))

XGBoost  Model Score : 
 0.9999763409973748
ELM  Model Score : 
 0.353807027808155
NeuralNetwork  Model Score : 
 -12.107953618321693
Ensemble  Model Score : 
 0.8359506102563614
