In [2]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Mengimpor dataset California Housing
california = fetch_california_housing()

# fitur dan label dalam dataset
X = california.data       # Fitur-fitur (input)
y = california.target     # Label (target/output) - Harga rumah

# Menampilkan informasi tentang data dan label
print("Fitur-fitur (Input):")
print(california.feature_names)
print("\nContoh Data (Fitur):")
print(X[:5])
print("\nLabel (Target) - Harga Rumah:")
print(y[:5])

# Membagi dataset menjadi data pelatihan dan pengujian
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Membuat model regresi linier
model = LinearRegression()

# Melatih model dengan data pelatihan
model.fit(X_train, y_train)

# Memprediksi harga rumah dengan data pengujian
y_pred = model.predict(X_test)

# Mengevaluasi model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Menampilkan hasil evaluasi
print(f"\nMean Squared Error (MSE): {mse}")
print(f"R-squared (R2): {r2}")

# Menampilkan beberapa prediksi vs nilai asli
df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print("\nPerbandingan Harga Rumah Asli vs Prediksi:")
print(df.head())


Fitur-fitur (Input):
['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']

Contoh Data (Fitur):
[[ 8.32520000e+00  4.10000000e+01  6.98412698e+00  1.02380952e+00
   3.22000000e+02  2.55555556e+00  3.78800000e+01 -1.22230000e+02]
 [ 8.30140000e+00  2.10000000e+01  6.23813708e+00  9.71880492e-01
   2.40100000e+03  2.10984183e+00  3.78600000e+01 -1.22220000e+02]
 [ 7.25740000e+00  5.20000000e+01  8.28813559e+00  1.07344633e+00
   4.96000000e+02  2.80225989e+00  3.78500000e+01 -1.22240000e+02]
 [ 5.64310000e+00  5.20000000e+01  5.81735160e+00  1.07305936e+00
   5.58000000e+02  2.54794521e+00  3.78500000e+01 -1.22250000e+02]
 [ 3.84620000e+00  5.20000000e+01  6.28185328e+00  1.08108108e+00
   5.65000000e+02  2.18146718e+00  3.78500000e+01 -1.22250000e+02]]

Label (Target) - Harga Rumah:
[4.526 3.585 3.521 3.413 3.422]

Mean Squared Error (MSE): 0.5558915986952425
R-squared (R2): 0.5757877060324521

Perbandingan Harga Rumah Asli vs Prediksi:
    