In [2]:
from sklearn.datasets import fetch_california_housing
import pandas as pd

# Loading dataset
housing = fetch_california_housing()

# Convert to pandas 
df = pd.DataFrame(housing.data, columns=housing.feature_names)
df['Target'] = housing.target  #target variable

In [8]:
# Checking missing values
missing_values = df.isnull().sum()
print("Missing values:\n", missing_values)

df.fillna(df.mean(), inplace=True)

Missing values:
 MedInc        0
HouseAge      0
AveRooms      0
AveBedrms     0
Population    0
AveOccup      0
Latitude      0
Longitude     0
Target        0
dtype: int64


In [10]:
from sklearn.preprocessing import StandardScaler

# Initialize scaler
scaler = StandardScaler()

# Apply standardization
df[df.columns[:-1]] = scaler.fit_transform(df[df.columns[:-1]])

In [12]:
#linear regression
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# Split data
X = df.drop(columns=['Target'])
y = df['Target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
lr = LinearRegression()
lr.fit(X_train, y_train)

# Evaluate
lr_score = lr.score(X_test, y_test)
print("Linear Regression R² Score:", lr_score)

Linear Regression R² Score: 0.575787706032451


In [14]:
#Decision tree regression
from sklearn.tree import DecisionTreeRegressor

# Train model
dt = DecisionTreeRegressor(random_state=42)
dt.fit(X_train, y_train)

# Evaluate
dt_score = dt.score(X_test, y_test)
print("Decision Tree R² Score:", dt_score)

Decision Tree R² Score: 0.6228111330554302


In [18]:
#random forest regression
from sklearn.ensemble import RandomForestRegressor

# Train model
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Evaluate
rf_score = rf.score(X_test, y_test)
print("Random Forest R² Score:", rf_score)

Random Forest R² Score: 0.805024407701793


In [21]:
#gradient boosting regression
from sklearn.ensemble import GradientBoostingRegressor

# Train model
gb = GradientBoostingRegressor(n_estimators=100, random_state=42)
gb.fit(X_train, y_train)

# Evaluate
gb_score = gb.score(X_test, y_test)
print("Gradient Boosting R² Score:", gb_score)

Gradient Boosting R² Score: 0.7756433164710084


In [23]:
#svr
from sklearn.svm import SVR

# Train model
svr = SVR(kernel='rbf')
svr.fit(X_train, y_train)

# Evaluate
svr_score = svr.score(X_test, y_test)
print("SVR R² Score:", svr_score)

SVR R² Score: 0.7289407597956454


In [25]:
#Evaluation metrics
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Function to evaluate models
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    return mse, mae, r2

# Evaluate all models
models = {
    "Linear Regression": lr,
    "Decision Tree": dt,
    "Random Forest": rf,
    "Gradient Boosting": gb,
    "SVR": svr
}

results = {name: evaluate_model(m, X_test, y_test) for name, m in models.items()}

In [27]:
#display evaluation matrics
import pandas as pd

# Create DataFrame for comparison
df_results = pd.DataFrame(results, index=["MSE", "MAE", "R²"]).T
print(df_results)

                        MSE       MAE        R²
Linear Regression  0.555892  0.533200  0.575788
Decision Tree      0.494272  0.453784  0.622811
Random Forest      0.255498  0.327613  0.805024
Gradient Boosting  0.293999  0.371650  0.775643
SVR                0.355198  0.397763  0.728941
