In [None]:
# UJI DATA EKSTERNAL
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import numpy as np
import joblib
import time
from sklearn.metrics import r2_score, mean_squared_error

# PATH
DATA_EXT_PATH = "/content/drive/MyDrive/Semester 7/Skripsi/Dataset/DATA_UJI_EKSTERNAL.csv"
MODEL_DIR     = "/content/drive/MyDrive/Semester 7/Skripsi/Code/Final Model/"

# LOAD DATA EKSTERNAL
df_ext = pd.read_csv(DATA_EXT_PATH)
df_ext.head()

Mounted at /content/drive


Unnamed: 0,HARGA,KT,KM,LT,LB,LOKASI,DIPERBARUI
0,850 juta,3,2,82,130,Buah Batu,23/12/2025
1,"1,9 miliar",3,2,79,140,Rancasari,5/12/2025
2,"1,5 miliar",3,2,80,120,Bandung Kidul,15/12/2025
3,"19,5 miliar",5,5,822,927,Sukajadi,7/11/2025
4,"2,2 miliar",5,3,174,300,Lengkong,7/11/2025


In [None]:
df_ext.columns = [
    "harga", "kt", "km", "luas_bangunan", "luas_tanah", "lokasi", "cut of date"
]
df_ext.head()

Unnamed: 0,harga,kt,km,luas_bangunan,luas_tanah,lokasi,cut of date
0,850 juta,3,2,82,130,Buah Batu,23/12/2025
1,"1,9 miliar",3,2,79,140,Rancasari,5/12/2025
2,"1,5 miliar",3,2,80,120,Bandung Kidul,15/12/2025
3,"19,5 miliar",5,5,822,927,Sukajadi,7/11/2025
4,"2,2 miliar",5,3,174,300,Lengkong,7/11/2025


In [None]:
# PREPROCESSING
def parse_harga(val):
    if pd.isnull(val):
        return None
    s = str(val).lower()
    s_clean = (
        s.replace('rp','')
         .replace('.','')
         .replace(',', '.')
         .replace(' ','')
         .strip()
    )
    try:
        if 'miliar' in s:
            return float(s_clean.replace('miliar','')) * 1000
        if 'juta' in s:
            return float(s_clean.replace('juta',''))
        if s_clean.replace('.', '', 1).isdigit():
            return float(s_clean) / 1_000_000
    except:
        return None
    return None

def clean_luas(val):
    if pd.isnull(val):
        return None
    s = str(val).lower()
    s = s.replace('mÂ²','').replace('m2','').replace('m','').strip()
    s = ''.join(c for c in s if c.isdigit() or c=='.')
    return float(s) if s != '' else None

df_ext['harga_rumah']   = df_ext['harga'].apply(parse_harga)
df_ext['luas_bangunan'] = df_ext['luas_bangunan'].apply(clean_luas)
df_ext['luas_tanah']    = df_ext['luas_tanah'].apply(clean_luas)

df_ext = df_ext.drop(columns=['cut of date'])
df_ext.head()

Unnamed: 0,harga,kt,km,luas_bangunan,luas_tanah,lokasi,harga_rumah
0,850 juta,3,2,82.0,130.0,Buah Batu,850.0
1,"1,9 miliar",3,2,79.0,140.0,Rancasari,1900.0
2,"1,5 miliar",3,2,80.0,120.0,Bandung Kidul,1500.0
3,"19,5 miliar",5,5,822.0,927.0,Sukajadi,19500.0
4,"2,2 miliar",5,3,174.0,300.0,Lengkong,2200.0


In [None]:
# Normalisasi nama kecamatan
df_ext['lokasi_clean'] = (
    df_ext['lokasi']
    .str.lower().str.strip()
    .str.replace(', bandung', '', regex=False)
    .str.replace('-', ' ', regex=False)
    .str.replace(r'\s+', ' ', regex=True)
    .replace({
        'buahbatu': 'buah batu', 'ujungberung': 'ujung berung',
        'sumurbandung': 'sumur bandung', 'gede bage': 'gedebage',
        'babakanciparay': 'babakan ciparay',
        'bojongloa kaler bandung': 'bojongloa kaler',
        'cibeunying kaler bandung': 'cibeunying kaler',
        'cinambo bandung': 'cinambo'
    })
)

# Mapping ke wilayah
kecamatan_to_wilayah = {
    'cidadap':'utara',
    'coblong':'utara',
    'sukajadi':'utara',
    'sukasari':'utara',
    'cibeunying kaler':'utara',

    'andir':'tengah',
    'astanaanyar':'tengah',
    'cicendo':'tengah',
    'sumur bandung':'tengah',
    'bandung wetan':'tengah',
    'regol':'tengah',
    'lengkong':'tengah',

    'bandung kulon':'barat',
    'bojongloa kaler':'barat',
    'bojongloa kidul':'barat',
    'babakan ciparay':'barat',

    'bandung kidul':'selatan',
    'batununggal':'selatan',
    'buah batu':'selatan',
    'kiaracondong':'selatan',
    'cibeunying kidul':'selatan',

    'antapani':'timur',
    'arcamanik':'timur',
    'cibiru':'timur',
    'gedebage':'timur',
    'mandalajati':'timur',
    'panyileukan':'timur',
    'rancasari':'timur',
    'ujung berung':'timur',
    'cinambo':'timur'
}

wilayah_mapping = {'utara':1,'tengah':2,'barat':3,'selatan':4,'timur':5}

df_ext['wilayah_encoded'] = df_ext['lokasi_clean'].map(kecamatan_to_wilayah).map(wilayah_mapping).astype('Int64')

In [None]:
# FEATURE ENGINEERING
for col in ['kt', 'km', 'luas_bangunan', 'luas_tanah']:
    df_ext[col] = pd.to_numeric(df_ext[col], errors='coerce')

df_ext['total_rooms']     = df_ext['kt'] + df_ext['km']
df_ext['luas_ratio']      = df_ext['luas_bangunan'] / df_ext['luas_tanah']
df_ext['kepadatan_kamar'] = df_ext['total_rooms'] / df_ext['luas_bangunan']
df_ext['sisa_lahan']      = df_ext['luas_tanah'] - df_ext['luas_bangunan']
df_ext['bangunan_log']    = np.log1p(df_ext['luas_bangunan'])
df_ext['harga_per_m2']    = df_ext['harga_rumah'] / df_ext['luas_bangunan']

In [None]:
# FITUR FINAL
final_features = [
    'kt',
    'km',
    'luas_bangunan',
    'luas_tanah',
    'total_rooms',
    'kepadatan_kamar',
    'bangunan_log',
    'wilayah_encoded'
]

print(final_features)
print("Jumlah fitur:", len(final_features))

X_ext = df_ext[final_features].copy()
y_true = df_ext['harga_rumah'].values

['kt', 'km', 'luas_bangunan', 'luas_tanah', 'total_rooms', 'kepadatan_kamar', 'bangunan_log', 'wilayah_encoded']
Jumlah fitur: 8


In [None]:
# LOAD MODEL & EVALUASI
model_files = {
    "Baseline": "baseline_svr.pkl",
    "GS_CV3":   "gs_cv3_svr.pkl",
    "GS_CV5":   "gs_cv5_svr.pkl",
    "GS_CV10":  "gs_cv10_svr.pkl",
    "RS_CV3":   "rs_cv3_svr.pkl",
    "RS_CV5":   "rs_cv5_svr.pkl",
    "RS_CV10":  "rs_cv10_svr.pkl",
    "BO_CV3":   "bo_cv3_svr.pkl",
    "BO_CV5":   "bo_cv5_svr.pkl",
    "BO_CV10":  "bo_cv10_svr.pkl"
}

results = []

for name, file in model_files.items():
    model = joblib.load(MODEL_DIR + file)

    start = time.time()
    y_pred_log = model.predict(X_ext)
    end = time.time()

    y_pred = np.expm1(y_pred_log)

    r2   = r2_score(y_true, y_pred)
    mse  = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)

    results.append({
        "Model": name,
        "R2": r2,
        "MSE": mse,
        "RMSE": rmse,
        "Waktu_Prediksi": end - start
    })

# HASIL AKHIR
pd.options.display.float_format = '{:,.2f}'.format

hasil_evaluasi = pd.DataFrame(results)
hasil_evaluasi = hasil_evaluasi.sort_values(by="R2", ascending=False)

hasil_evaluasi


Unnamed: 0,Model,R2,MSE,RMSE,Waktu_Prediksi
4,RS_CV3,0.45,31177476.78,5583.68,0.01
8,BO_CV5,0.45,31232740.08,5588.63,0.01
7,BO_CV3,0.45,31440535.84,5607.19,0.01
1,GS_CV3,0.45,31440535.84,5607.19,0.01
6,RS_CV10,0.44,31692951.94,5629.65,0.01
2,GS_CV5,0.44,31878910.98,5646.14,0.01
5,RS_CV5,0.44,31878910.98,5646.14,0.01
3,GS_CV10,0.44,32073838.31,5663.38,0.01
9,BO_CV10,0.43,32373187.54,5689.74,0.01
0,Baseline,0.41,33788398.32,5812.78,0.01
