In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import xgboost as xgb
import catboost as cb
import lightgbm as lgb
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
import seaborn as sns
from imblearn.under_sampling import TomekLinks
from sklearn.ensemble import IsolationForest
from scipy import stats
from sklearn.preprocessing import label_binarize

In [2]:
df = pd.read_csv('phone_classification.csv')

In [3]:
# X/y split etc.
target = "price_range"

categorical_features = ['blue','dual_sim','four_g','three_g','touch_screen','wifi']

X = df.drop(columns=[target])
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# SCALING => some of the algorithms require this
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [4]:
from sklearn.ensemble import StackingClassifier

# 'Random Forest': RandomForestRegressor(),
# 'XGBoost': xgb.XGBRegressor(enable_categorical=True, objective='reg:squarederror'),
# 'LightGBM': lgb.LGBMRegressor(objective='regression')

# meta model is the judge or optimizer of the stacking algorithm
# => it assigns weights to the models above based on their performance
meta_model = LogisticRegression()

# stacking regression algorithm
stacking_model = StackingClassifier(
    estimators=[('xgb', xgb.XGBClassifier(enable_categorical=True, objective='multi:softprobar', num_class=len(X.columns))),
                ('randomforest', RandomForestClassifier(n_estimators=100)),
                ('lgbm', lgb.LGBMClassifier(objective='multiclass'))
                ],
    final_estimator = meta_model,
    cv=5
)

# fit the model
stacking_model.fit(X_train, y_train)

# test predictions and metrics
predictions = stacking_model.predict(X_test)

accuracy = stacking_model.score(X_test, y_test)
print()
print(f"Accuracy {accuracy}")

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000165 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1361
[LightGBM] [Info] Number of data points in the train set: 1600, number of used features: 20
[LightGBM] [Info] Start training from score -1.371406
[LightGBM] [Info] Start training from score -1.396345
[LightGBM] [Info] Start training from score -1.373872
[LightGBM] [Info] Start training from score -1.403949
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000139 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1359
[LightGBM] [Info] Number of data points in the train set: 1280, number of used features: 20
[LightGBM] [Info] Start training from score -1.373872
[LightGBM] [Info] Start training from score -1.395714
[LightGBM] [Info] Start training from score -1.373872
[LightGBM] [Info] Start training from score -1

In [None]:
#mark 1