## Tugas


In [1]:
# Import pustaka
import pandas as pd

# Membaca dataset
data = pd.read_csv('dataset/insurance.csv')

# Tampilkan beberapa baris pertama
data.head()


Unnamed: 0,age,sex,bmi,children,smoker,region,charges
0,19,female,27.9,0,yes,southwest,16884.924
1,18,male,33.77,1,no,southeast,1725.5523
2,28,male,33.0,3,no,southeast,4449.462
3,33,male,22.705,0,no,northwest,21984.47061
4,32,male,28.88,0,no,northwest,3866.8552


In [2]:
from sklearn.model_selection import train_test_split

# Variabel bebas (fitur)
X = data.drop(columns=["charges"])

# Variabel target
y = data["charges"]

# Bagi data (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

print("Jumlah data latih:", len(X_train))
print("Jumlah data uji:", len(X_test))


Jumlah data latih: 1070
Jumlah data uji: 268


In [3]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

# Tentukan kolom numerik dan kategorikal
numeric_features = ["age", "bmi", "children"]
categorical_features = ["sex", "smoker", "region"]

# StandardScaler untuk fitur numerik, OneHotEncoder untuk fitur kategorikal
numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(drop="first")

# Gabungkan menjadi satu transformer
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features)
    ]
)


In [4]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression

# Pipeline untuk preprocessing dan model regresi
model = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("regressor", LinearRegression())
])


In [5]:
# Latih model pada data latih
model.fit(X_train, y_train)

# Prediksi pada data uji
y_pred = model.predict(X_test)


In [6]:
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

# Hitung metrik evaluasi
r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("Evaluasi Model Regresi Linear Berganda:")
print("---------------------------------------")
print(f"R-squared (R²): {r2:.4f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")


Evaluasi Model Regresi Linear Berganda:
---------------------------------------
R-squared (R²): 0.7836
Mean Squared Error (MSE): 33596915.85
Mean Absolute Error (MAE): 4181.19
