# Template Skrip Untuk Eksekusi

## 1. Load Library

In [None]:
import time
import pandas as pd
import numpy as np

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error

## 2. Load Dataset

In [None]:
train_data = pd.read_csv('https://raw.githubusercontent.com/da3gal/ml-casebook/main/machine%20learning/regression/house%20price%20dataset/train_data.csv')
test_data = pd.read_csv('https://raw.githubusercontent.com/da3gal/ml-casebook/main/machine%20learning/regression/house%20price%20dataset/public_test_data.csv')

## 3. Definisi Fungsi Prediksi

In [None]:
def prediksi(train_df, test_df):
    start_time = time.time()

    train_df = train_df[train_df['price'] != 0].reset_index(drop=True)
    train_df = train_df.drop(columns=['street'])

    for df in [train_df, test_df]:
        df['basement_ratio'] = df['sqft_basement'] / df['sqft_living']
        df['above_ratio'] = df['sqft_above'] / df['sqft_living']
        df['lot_living_ratio'] = df['sqft_lot'] / df['sqft_living']
        df['house_age'] = 2024 - df['yr_built']
        df['is_renovated'] = (df['yr_renovated'] > 0).astype(int)
        df['years_since_renovation'] = 2024 - df['yr_renovated']
        df['bathrooms_per_floor'] = df['bathrooms'] / df['floors']
        df['bedrooms_per_floor'] = df['bedrooms'] / df['floors']
        df['total_rooms'] = df['bedrooms'] + df['bathrooms']
        df['zip_code'] = df['statezip'].str.extract('(\d+)').astype(int)
        df['premium_location'] = ((df['waterfront'] == 1) | (df['view'] >= 4)).astype(int)
        df['condition_age_factor'] = df['condition'] * df['house_age']
        df['view_water_score'] = df['view'] * (df['waterfront'] + 1)
        df['price_per_sqft'] = df['price'] / df['sqft_living']
        df['total_area'] = df['sqft_living'] + df['sqft_lot']
        df['room_density'] = df['total_rooms'] / df['sqft_living']
        df['renovation_impact'] = (df['yr_renovated'] - df['yr_built']) * df['is_renovated']
        df['zipcode'] = df['statezip'].str.split().str[1]

    train_df = train_df.drop(columns=['statezip'])
    test_df = test_df.drop(columns=['statezip'])

    train_df = pd.get_dummies(train_df, columns=['city'], drop_first=True)
    test_df = pd.get_dummies(test_df, columns=['city'], drop_first=True)
    test_df = test_df.reindex(columns=train_df.columns, fill_value=0)

    X_train = train_df.drop(columns='price')
    y_train = train_df['price']
    X_test = test_df.drop(columns='price')
    y_test = test_df['price']

    scaler = MinMaxScaler(feature_range=(0, 1))
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    model = GradientBoostingRegressor(random_state=42)
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))

    time.sleep(3)
    end_time = time.time()
    execution_time = end_time - start_time

    return {"execution_time": execution_time, "RMSE": rmse}

## 4. Jalankan

In [None]:
prediksi(train_data, test_data)

{'execution_time': 4.704028844833374, 'RMSE': 38271.8273480759}