In [6]:
# Import library yang diperlukan
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.impute import SimpleImputer
import pickle

# Load dataset
data = pd.read_csv('insurance_data.csv')

# Preprocessing seperti sebelumnya
data['Gender'] = data['Gender'].map({'Male': 0, 'Female': 1})
data['Vehicle_Age'] = data['Vehicle_Age'].map({'New': 0, '1-2 Year': 1, 'More than 2 Years': 2})
data['Vehicle_Damage'] = data['Vehicle_Damage'].map({'No': 0, 'Yes': 1})

# Drop kolom ID
data = data.drop('id', axis=1)

# Memisahkan fitur dan target
X = data.drop('Response', axis=1)
y = data['Response']

# Mengisi missing values dengan mean untuk data numerik
imputer = SimpleImputer(strategy='mean')
X = imputer.fit_transform(X)

# Membagi data menjadi train dan test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Inisialisasi model Random Forest
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Latih model
model.fit(X_train, y_train)

# Prediksi
y_pred = model.predict(X_test)

# Evaluasi
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:\n", classification_report(y_test, y_pred))

# Simpan model dalam format pickle
with open('insurance_model.pkl', 'wb') as file:
    pickle.dump(model, file)

print("Model telah disimpan dalam format pickle.")


Accuracy: 0.8650520846999554
Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.97      0.93     66699
           1       0.37      0.12      0.18      9523

    accuracy                           0.87     76222
   macro avg       0.63      0.54      0.55     76222
weighted avg       0.82      0.87      0.83     76222

Model telah disimpan dalam format pickle.
