In [23]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Load dataset
data = fetch_california_housing(as_frame=True)
df = data.frame

# Define features and target
X = df.drop('MedHouseVal', axis=1)
y = df['MedHouseVal'].values.reshape(-1, 1)

# Split train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features (only X, not y)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train linear regression model on standardized data
model = LinearRegression()
model.fit(X_train_scaled, y_train)

# Create DataFrame to display coefficients
coef_df = pd.DataFrame({
    'Feature': X.columns,
    'Standardized Coefficient': model.coef_.flatten()
}).sort_values(by='Standardized Coefficient', ascending=False)

print(coef_df)


      Feature  Standardized Coefficient
0      MedInc                  0.854383
3   AveBedrms                  0.339259
1    HouseAge                  0.122546
4  Population                 -0.002308
5    AveOccup                 -0.040829
2    AveRooms                 -0.294410
7   Longitude                 -0.869842
6    Latitude                 -0.896929


In [24]:
from sklearn.metrics import mean_squared_error, r2_score

# پیش‌بینی روی داده‌های تست
y_pred = model.predict(X_test_scaled)

# محاسبه خطا و دقت
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("MSE:", mse)
print("R² score:", r2)


MSE: 0.5558915986952444
R² score: 0.5757877060324508
