In [65]:
import numpy as np
import pandas as pd


In [66]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

In [67]:
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

In [75]:
from catboost import CatBoostRegressor

In [82]:
data = pd.read_csv('data/smartwatches_cleaned.csv')

In [83]:
data.shape

(388, 36)

In [84]:
data.drop(['Unnamed: 0'], axis=1, inplace=True)

In [85]:
# data.head()

In [86]:
X = data.drop(['Discount Price'], axis=1)
y = data['Discount Price']

X_train, X_test, y_train , y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f'train_df: {X_train.shape}')
print(f'test_df: {X_test.shape}')
print(f'train_target: {y_train.shape}')
print(f'test_target: {y_test.shape}')

train_df: (310, 34)
test_df: (78, 34)
train_target: (310,)
test_target: (78,)


In [87]:
regressors = [
    LinearRegression(),
    DecisionTreeRegressor(),
    RandomForestRegressor(),
    GradientBoostingRegressor(),
    CatBoostRegressor(verbose=0),
]

In [88]:
results = {}

for regressor in regressors:
    model_name = regressor.__class__.__name__
    regressor.fit(X_train, y_train)
    y_pred = regressor.predict(X_test)
    
    # Calculate metrics
    mae = mean_squared_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    # Store metrics
    results[model_name] = {'MAE': mae, 'MSE': mse, 'R2': r2}

# Display results
for model_name, metrics in results.items():
    print(f"{model_name}:")
    for metric_name, metric_value in metrics.items():
        print(f"  {metric_name}: {metric_value}")
    print()

LinearRegression:
  MAE: 5351097.249935576
  MSE: 5351097.249935576
  R2: 0.615234742627797

DecisionTreeRegressor:
  MAE: 1221333.4487179487
  MSE: 1221333.4487179487
  R2: 0.9121812486702798

RandomForestRegressor:
  MAE: 1021733.1672307692
  MSE: 1021733.1672307692
  R2: 0.9265333058448908

GradientBoostingRegressor:
  MAE: 574099.2064682196
  MSE: 574099.2064682196
  R2: 0.958719974873082

CatBoostRegressor:
  MAE: 624576.5045401483
  MSE: 624576.5045401483
  R2: 0.9550904556031169



### We will use GradientBoostingRegressor