In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

# Load dataset
df = pd.read_csv('insurance.csv')

df['smoker'] = df['smoker'].map({'yes': 1, 'no': 0})

# Menentukan variabel X dan y
X = df[['age', 'bmi', 'smoker']]
y = df['charges']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Membuat Model Regresi
model = LinearRegression()
model.fit(X_train_scaled, y_train)

y_pred = model.predict(X_test_scaled)

# Evaluasi model
score = r2_score(y_test, y_pred)
print("R-Squared Score (Akurasi Model):", score)
print("(Nilai mendekati 1.0 berarti model sangat bagus)")

# ---------------------------------------------------------
# Input
# ---------------------------------------------------------
print()
print("\nMasukan Data Pasien Baru")
input_age = int(input("Input Umur (Age): "))
input_bmi = float(input("Input BMI: "))
print("Apakah Perokok? (1 = Ya, 0 = Tidak)")
input_smoker = int(input("Input Status Perokok (1/0): "))

new_data = pd.DataFrame({
    'age': [input_age],
    'bmi': [input_bmi],
    'smoker': [input_smoker]
})

print()
print("==============================")
print()

new_data = new_data[X.columns]

new_data_scaled = scaler.transform(new_data)

# Prediksi
prediction = model.predict(new_data_scaled)

print()
print(f"Prediksi Biaya Asuransi (Charges): ${prediction[0]:.2f}")
print()