In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import Ridge
from sklearn.svm import SVR
from sklearn.feature_selection import SelectKBest, SelectPercentile, f_regression
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error

In [2]:
# Membaca dataset
data = pd.read_csv('Dataset UTS_Gasal 2425.csv')
data.head()

Unnamed: 0,squaremeters,numberofrooms,hasyard,haspool,floors,citycode,citypartrange,numprevowners,made,isnewbuilt,hasstormprotector,basement,attic,garage,hasstorageroom,hasguestroom,price,category
0,75523,3,no,yes,63,9373,3,8,2005,old,yes,4313,9005,956,no,7,7559081.5,Luxury
1,55712,58,no,yes,19,34457,6,8,2021,old,no,2937,8852,135,yes,9,5574642.1,Middle
2,86929,100,yes,no,11,98155,3,4,2003,new,no,6326,4748,654,no,10,8696869.3,Luxury
3,51522,3,no,no,61,9047,8,3,2012,new,yes,632,5792,807,yes,5,5154055.2,Middle
4,96470,74,yes,no,21,92029,4,2,2011,new,yes,5414,1172,716,yes,9,9652258.1,Luxury


In [3]:
# Memisahkan fitur (X) dan target (y)
X = data.drop(columns=['price']) 
y = data['price']  # Target kolom
X = pd.get_dummies(X, drop_first=True)  # Mengubah kategori menjadi numerik

In [4]:
# Membagi dataset menjadi data latih dan uji (80:20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Mendefinisikan metode seleksi fitur (SelectKBest dan SelectPercentile)
feature_selection_methods = {
    'SelectKBest': SelectKBest(score_func=f_regression),
    'SelectPercentile': SelectPercentile(score_func=f_regression)
}

# Mendefinisikan metode scaling untuk model regresi
scalers = {
    'StandardScaler': StandardScaler(),
    'MinMaxScaler': MinMaxScaler()
}

In [6]:
# Pipeline untuk Ridge Regression
ridge_pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Penskalaan fitur
    ('feature_selection', SelectKBest(score_func=f_regression)),  # Seleksi fitur menggunakan KBest
    ('regressor', Ridge())  # Model Ridge Regression
])

# Grid search parameter untuk Ridge Regression
ridge_param_grid = {
    'scaler': [StandardScaler(), MinMaxScaler()],  # Mencoba StandardScaler dan MinMaxScaler
    'feature_selection__k': [10, 15, 18],  # Jumlah fitur yang dipilih
    'regressor__alpha': [0.1, 1, 10]  # Parameter alpha pada Ridge
}

# Pipeline untuk Support Vector Regressor
svr_pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Penskalaan fitur
    ('feature_selection', SelectKBest(score_func=f_regression)),  # Seleksi fitur
    ('regressor', SVR())  # Model Support Vector Regressor
])

# Grid search parameter untuk Support Vector Regressor
svr_param_grid = {
    'scaler': [StandardScaler(), MinMaxScaler()],  # Mencoba StandardScaler dan MinMaxScaler
    'feature_selection__k': [10, 15, 18],  # Jumlah fitur yang dipilih
    'regressor__C': [0.1, 1, 10],  # Parameter C untuk regularisasi
    'regressor__kernel': ['linear', 'rbf']  # Jenis kernel yang digunakan
}

In [7]:
# Melakukan GridSearchCV untuk Ridge Regression
ridge_grid = GridSearchCV(ridge_pipeline, param_grid=ridge_param_grid, cv=5, scoring='neg_mean_squared_error')
ridge_grid.fit(X_train, y_train)

# Melakukan GridSearchCV untuk Support Vector Regressor
svr_grid = GridSearchCV(svr_pipeline, param_grid=svr_param_grid, cv=5, scoring='neg_mean_squared_error')
svr_grid.fit(X_train, y_train)

In [8]:
# Menampilkan parameter terbaik untuk masing-masing model
print("Ridge Regression Best Params:", ridge_grid.best_params_)
print("Support Vector Regressor Best Params:", svr_grid.best_params_)

Ridge Regression Best Params: {'feature_selection__k': 18, 'regressor__alpha': 0.1, 'scaler': StandardScaler()}
Support Vector Regressor Best Params: {'feature_selection__k': 18, 'regressor__C': 10, 'regressor__kernel': 'linear', 'scaler': StandardScaler()}


In [9]:
# Prediksi menggunakan model terbaik
ridge_pred = ridge_grid.predict(X_test)
svr_pred = svr_grid.predict(X_test)

# Menghitung Mean Squared Error untuk masing-masing model
ridge_mse = mean_squared_error(y_test, ridge_pred)
svr_mse = mean_squared_error(y_test, svr_pred)

In [10]:
# Menampilkan hasil Mean Squared Error
print("Ridge Regression MSE:", ridge_mse)
print("Support Vector Regressor MSE:", svr_mse)

Ridge Regression MSE: 3593757.043201725
Support Vector Regressor MSE: 7583744349459.359
