In [3]:
import numpy as np
import pandas as pd


In [4]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [5]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

In [6]:
from catboost import CatBoostRegressor

In [7]:
from xgboost import XGBRegressor

In [8]:
data = pd.read_csv('data/smartwatches_cleaned.csv')

In [9]:
data.shape

(388, 36)

In [10]:
data.drop(['Unnamed: 0'], axis=1, inplace=True)

In [11]:
# data.head()

In [12]:
X = data.drop(['Discount Price'], axis=1)
y = data['Discount Price']

X_train, X_test, y_train , y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f'train_df: {X_train.shape}')
print(f'test_df: {X_test.shape}')
print(f'train_target: {y_train.shape}')
print(f'test_target: {y_test.shape}')

train_df: (310, 34)
test_df: (78, 34)
train_target: (310,)
test_target: (78,)


In [13]:
regressors = [
    LinearRegression(),
    DecisionTreeRegressor(),
    RandomForestRegressor(),
    GradientBoostingRegressor(),
    CatBoostRegressor(verbose=0),
    XGBRegressor()
]

In [14]:
results = {}

for regressor in regressors:
    model_name = regressor.__class__.__name__
    regressor.fit(X_train, y_train)
    y_pred = regressor.predict(X_test)
    
    # Calculate metrics
    mae = mean_squared_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    # Store metrics
    results[model_name] = {'MAE': mae, 'MSE': mse, 'R2': r2}

# Display results
for model_name, metrics in results.items():
    print(f"{model_name}:")
    for metric_name, metric_value in metrics.items():
        print(f"  {metric_name}: {metric_value}")
    print()

LinearRegression:
  MAE: 5351097.249935576
  MSE: 5351097.249935576
  R2: 0.615234742627797

DecisionTreeRegressor:
  MAE: 1178869.2435897435
  MSE: 1178869.2435897435
  R2: 0.9152345945640508

RandomForestRegressor:
  MAE: 1021313.7426897439
  MSE: 1021313.7426897439
  R2: 0.9265634641440093

GradientBoostingRegressor:
  MAE: 583223.1629791979
  MSE: 583223.1629791979
  R2: 0.9580639259711038

CatBoostRegressor:
  MAE: 624576.5045401483
  MSE: 624576.5045401483
  R2: 0.9550904556031169

XGBRegressor:
  MAE: 677006.2528464915
  MSE: 677006.2528464915
  R2: 0.9513205473658309



### We will use GradientBoostingRegressor