In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.feature_selection import SelectKBest, SelectPercentile, f_classif
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report

In [2]:
# Membaca dataset
data = pd.read_csv('Dataset UTS_Gasal 2425.csv')
data.head()

Unnamed: 0,squaremeters,numberofrooms,hasyard,haspool,floors,citycode,citypartrange,numprevowners,made,isnewbuilt,hasstormprotector,basement,attic,garage,hasstorageroom,hasguestroom,price,category
0,75523,3,no,yes,63,9373,3,8,2005,old,yes,4313,9005,956,no,7,7559081.5,Luxury
1,55712,58,no,yes,19,34457,6,8,2021,old,no,2937,8852,135,yes,9,5574642.1,Middle
2,86929,100,yes,no,11,98155,3,4,2003,new,no,6326,4748,654,no,10,8696869.3,Luxury
3,51522,3,no,no,61,9047,8,3,2012,new,yes,632,5792,807,yes,5,5154055.2,Middle
4,96470,74,yes,no,21,92029,4,2,2011,new,yes,5414,1172,716,yes,9,9652258.1,Luxury


In [3]:
# Memisahkan fitur (X) dan target (y)
X = data.drop(columns=['category'])  # Fitur
y = data['category']  # Target
X = pd.get_dummies(X, drop_first=True)  # Mengubah kategori menjadi numerik

In [4]:
# Membagi dataset menjadi data latih dan uji (80:20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Mendefinisikan metode seleksi fitur (SelectKBest dan SelectPercentile)
feature_selection_methods = {
    'SelectKBest': SelectKBest(score_func=f_classif),
    'SelectPercentile': SelectPercentile(score_func=f_classif)
}

# Mendefinisikan metode scaling untuk SVM
scalers = {
    'StandardScaler': StandardScaler(),
    'MinMaxScaler': MinMaxScaler()
}

In [6]:
# Pipeline untuk Gradient Boosting Classifier
gb_pipeline = Pipeline([
    ('feature_selection', SelectKBest(score_func=f_classif)),  # Seleksi fitur menggunakan KBest
    ('classifier', GradientBoostingClassifier())  # Model Gradient Boosting
])

# Grid search parameter untuk Gradient Boosting
gb_param_grid = {
    'feature_selection__k': [10, 15, 17],  # Jumlah fitur yang dipilih
    'classifier__n_estimators': [100, 200],  # Jumlah estimators
    'classifier__learning_rate': [0.01, 0.1],  # Learning rate
    'classifier__max_depth': [3, 5]  # Kedalaman maksimal
}

# Pipeline untuk SVM
svm_pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Penskalaan fitur menggunakan StandardScaler
    ('feature_selection', SelectKBest(score_func=f_classif)),  # Seleksi fitur
    ('classifier', SVC())  # Model SVM
])

# Grid search parameter untuk SVM
svm_param_grid = {
    'scaler': [StandardScaler(), MinMaxScaler()],  # Mencoba StandardScaler dan MinMaxScaler
    'feature_selection__k': [10, 15, 17],  # Jumlah fitur yang dipilih
    'classifier__C': [0.1, 1, 10],  # Parameter regularisasi C
    'classifier__kernel': ['linear', 'rbf']  #merupakan Kernel yang digunakan
}

In [7]:
# Melakukan GridSearchCV untuk Gradient Boosting
gb_grid = GridSearchCV(gb_pipeline, param_grid=gb_param_grid, cv=5, scoring='accuracy')
gb_grid.fit(X_train, y_train)

# Melakukan GridSearchCV untuk SVM
svm_grid = GridSearchCV(svm_pipeline, param_grid=svm_param_grid, cv=5, scoring='accuracy')
svm_grid.fit(X_train, y_train)

In [8]:
# Menampilkan parameter terbaik untuk masing-masing model
print("Gradient Boosting Best Params:", gb_grid.best_params_)
print("SVM Best Params:", svm_grid.best_params_)

Gradient Boosting Best Params: {'classifier__learning_rate': 0.01, 'classifier__max_depth': 5, 'classifier__n_estimators': 100, 'feature_selection__k': 15}
SVM Best Params: {'classifier__C': 10, 'classifier__kernel': 'rbf', 'feature_selection__k': 10, 'scaler': StandardScaler()}


In [9]:
# Prediksi menggunakan model terbaik
gb_pred = gb_grid.predict(X_test)
svm_pred = svm_grid.predict(X_test)

In [10]:
# Menampilkan laporan klasifikasi untuk masing-masing model
print("Gradient Boosting Classification Report:")
print(classification_report(y_test, gb_pred))

print("SVM Classification Report:")
print(classification_report(y_test, svm_pred))

Gradient Boosting Classification Report:
              precision    recall  f1-score   support

       Basic       1.00      1.00      1.00       865
      Luxury       1.00      1.00      1.00       602
      Middle       1.00      1.00      1.00       533

    accuracy                           1.00      2000
   macro avg       1.00      1.00      1.00      2000
weighted avg       1.00      1.00      1.00      2000

SVM Classification Report:
              precision    recall  f1-score   support

       Basic       0.99      0.99      0.99       865
      Luxury       0.98      0.98      0.98       602
      Middle       0.97      0.97      0.97       533

    accuracy                           0.98      2000
   macro avg       0.98      0.98      0.98      2000
weighted avg       0.98      0.98      0.98      2000

