<h1 style='color:purple' align='center'>Memprediksi Harga Mobil di Indonesia

Dataset berisikan ribuan data mobil yang dapat diakses melalui <br>
https://www.carsome.id/beli-mobil-bekas

In [91]:
import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Ridge

In [92]:
df1 = pd.read_json("data/data_mobil.json")
df1.head()

Unnamed: 0,perusahaan,nama_mobil,tahun,odo,jenis_transmisi,harga
0,Toyota,Kijang,2017,76229,Automatic,377000000
1,Honda,Brio,2019,17643,Manual,138000000
2,Mazda,3,2018,33498,Automatic,330000000
3,Mazda,Cx-9,2018,51826,Automatic,550000000
4,Mitsubishi,Pajero,2016,107855,Automatic,379000000


In [93]:
dummy_mobil = pd.get_dummies(df1.nama_mobil)
dummy_transmisi = pd.get_dummies(df1.jenis_transmisi)
df1 = df1.drop(columns=['perusahaan', 'nama_mobil', 'jenis_transmisi'])

In [94]:
df1.head()

Unnamed: 0,tahun,odo,harga
0,2017,76229,377000000
1,2019,17643,138000000
2,2018,33498,330000000
3,2018,51826,550000000
4,2016,107855,379000000


In [95]:
train_df = pd.concat([df1, dummy_mobil, dummy_transmisi], axis='columns')

## Modelling

In [96]:
train_df.shape

(902, 81)

In [97]:
X = train_df.drop(columns=['harga'])
y = train_df['harga']

In [98]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=10)

In [99]:
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)
svm = SVC(kernel= 'linear', random_state=1, C=0.1)
svm.fit(X_train_std, y_train)
y_svc = svm.predict(X_test_std)
mape = mean_absolute_percentage_error(y_test, y_svc)
print("MAPE SVC:", mape)

MAPE SVC: 0.11414019773145051


In [100]:
ridge = Ridge(alpha = 5)
ridge.fit(X_train, y_train)
y_pred = ridge.predict(X_test)
mape = mean_absolute_percentage_error(y_test, y_pred)
print("MAPE ridge:", mape)

MAPE ridge: 0.17645050551321453


In [101]:
X.head()

Unnamed: 0,tahun,odo,2,3,5,6,Accord,Agya,Almaz,Alphard,...,Vios,X-trail,X1,X5,Xenia,Xl7,Xpander,Yaris,Automatic,Manual
0,2017,76229,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
1,2019,17643,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
2,2018,33498,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
3,2018,51826,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,2016,107855,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0


In [102]:
def predict_price(mobil, tahun, odo, jenis_transmisi ): 
    # perusahaan_index = np.where(X.columns==perusahaan.capitalize())[0][0]
    mobil_index = np.where(X.columns==mobil.capitalize())[0][0]
    transmisi_index = np.where(X.columns==jenis_transmisi.capitalize())[0][0]
    x = np.zeros(len(X.columns))
    x[0] = tahun
    x[1] = odo
        
    if mobil_index >= 0:
        x[mobil_index] = 1
        
    if transmisi_index >= 0:
        x[transmisi_index] = 1
        
    return int(round(ridge.predict([x])[0], -3))

In [103]:
predict_price("avanza",2019,50000,"manual")

186184000

# Export the tested model to a pickle file

In [104]:
import pickle
with open('../server/models/data/car_price_prediction.pickle','wb') as f:
    pickle.dump(ridge, f)

In [105]:
import json
columns = {
    'data_columns' : [col for col in X.columns]
}

with open('../server/models/data/car_columns.json', 'w') as f:
    f.write(json.dumps(columns))