In [1]:
import pandas as pd
import numpy as np
import joblib
from sklearn.base import BaseEstimator, TransformerMixin

In [2]:
# Custom transformer for interaction features
class InteractionFeatures(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        X = X.copy()
        # Create interaction features
        X['bangunan_tanah'] = X['luas_bangunan'] * X['luas_tanah']
        return X


# Load the saved model
model_path = 'balikpapan_model.pkl'
loaded_model = joblib.load(model_path)

In [3]:
# Load the data from CSV
file_path = '../Data_Clean/balikpapan_clean.csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataframe to understand its structure
print("First few rows of the dataset:")
print(data.head())

# Select 10 random rows from the dataset for inference
data_inference = data.sample(n=100, random_state=10)

print("\nRandomly selected data for inference:")
print(data_inference)


First few rows of the dataset:
     harga              lokasi  luas_bangunan  luas_tanah sertifikat  \
0  1650000  Balikpapan Selatan          180.0         141        SHM   
1  2100000  Balikpapan Selatan           50.0         776        SHM   
2  1950000    Balikpapan Utara          139.7         206        SHM   
3  1180000    Balikpapan Utara           47.0          78        SHM   
4  2500000     Balikpapan Baru          160.0         169        HGB   

          furnish  kamar_tidur  kamar_mandi  listrik  lantai  
0     Unfurnished            4            3     2200       2  
1     Unfurnished            3            1      900       1  
2     Unfurnished            5            2     2200       1  
3  Semi-furnished            2            2     1300       1  
4     Unfurnished            4            3     4400       1  

Randomly selected data for inference:
        harga              lokasi  luas_bangunan  luas_tanah sertifikat  \
1340  1500000   Balikpapan Tengah           

In [4]:
# Perform prediction
predicted_prices = loaded_model.predict(data_inference)

# Add prediction column to inference data
data_inference['predicted_price'] = predicted_prices

# Format predicted_price column to have a maximum of 9 significant digits
data_inference['predicted_price'] = data_inference['predicted_price'].apply(lambda x: f"{x:.9g}")

# Convert 'harga' and 'predicted_price' to numeric if not already
data_inference['harga'] = pd.to_numeric(data_inference['harga'], errors='coerce')
data_inference['predicted_price'] = pd.to_numeric(data_inference['predicted_price'], errors='coerce')

# Calculate the difference between 'harga' and 'predicted_price' and add it as a new column 'selisih'
data_inference['diff_in_percent'] = ((data_inference['harga'] - data_inference['predicted_price']) / data_inference['harga']) * 100

# Round the predicted_price to the nearest hundreds of thousands
data_inference['predicted_price'] = data_inference['predicted_price'].apply(lambda x: round(x, -2))

# Format 'harga', 'predicted_price', and 'selisih_persen' as strings
data_inference['harga'] = data_inference['harga'].apply(lambda x: f"Rp {int(x):,}")
data_inference['predicted_price'] = data_inference['predicted_price'].apply(lambda x: f"Rp {int(x):,}")
data_inference['diff_in_percent'] = data_inference['diff_in_percent'].apply(lambda x: f"{x:.2f}%")

# Display the inference data with rounded predicted prices
print("\nData Inferensi dengan Harga Prediksi:")
data_inference


Data Inferensi dengan Harga Prediksi:


Unnamed: 0,harga,lokasi,luas_bangunan,luas_tanah,sertifikat,furnish,kamar_tidur,kamar_mandi,listrik,lantai,predicted_price,diff_in_percent
1340,"Rp 1,500,000",Balikpapan Tengah,96.0,200,SHM,Unfurnished,3,1,1300,1,"Rp 1,111,500",25.90%
1186,"Rp 2,200,000",Balikpapan Timur,400.0,700,SHM,Unfurnished,4,2,5500,1,"Rp 2,464,100",-12.00%
1367,"Rp 4,300,000",Balikpapan Tengah,400.0,610,SHM,Unfurnished,3,3,10000,1,"Rp 4,383,100",-1.93%
339,"Rp 1,450,000",Balikpapan Utara,300.0,201,SHM,Unfurnished,5,3,4400,2,"Rp 1,724,700",-18.94%
233,"Rp 1,800,000",Balikpapan Tengah,114.0,90,SHM,Unfurnished,7,3,2200,2,"Rp 1,541,300",14.37%
...,...,...,...,...,...,...,...,...,...,...,...,...
234,"Rp 1,800,000",Balikpapan Utara,72.0,800,SHM,Unfurnished,3,1,1300,1,"Rp 1,811,700",-0.65%
1437,"Rp 2,750,000",Balikpapan Baru,298.0,200,SHM,Unfurnished,5,4,6600,2,"Rp 2,726,100",0.87%
97,"Rp 3,500,000",Balikpapan Selatan,260.0,230,SHM,Semi-furnished,5,3,3500,2,"Rp 3,081,200",11.97%
348,"Rp 680,000",Balikpapan Selatan,140.0,150,SHM,Unfurnished,2,2,1300,1,"Rp 888,400",-30.65%


In [5]:
data_inference.to_csv('../Data_Clean/hasil_prediksi.csv')