In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.datasets import fetch_california_housing

In [4]:
california=fetch_california_housing()
x=pd.DataFrame(california.data,columns=california.feature_names)
y=pd.Series(california.target,name="MedHouseValue")
y.head()


Unnamed: 0,MedHouseValue
0,4.526
1,3.585
2,3.521
3,3.413
4,3.422


In [5]:
#missing Values
print(x.isnull().sum())


MedInc        0
HouseAge      0
AveRooms      0
AveBedrms     0
Population    0
AveOccup      0
Latitude      0
Longitude     0
dtype: int64


In [6]:
from re import X
#train test split
X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=42)



In [7]:
#feauture scaling
scaler=StandardScaler()
X_train=scaler.fit_transform(X_train)
X_test=scaler.transform(X_test)
X_train

array([[-0.326196  ,  0.34849025, -0.17491646, ...,  0.05137609,
        -1.3728112 ,  1.27258656],
       [-0.03584338,  1.61811813, -0.40283542, ..., -0.11736222,
        -0.87669601,  0.70916212],
       [ 0.14470145, -1.95271028,  0.08821601, ..., -0.03227969,
        -0.46014647, -0.44760309],
       ...,
       [-0.49697313,  0.58654547, -0.60675918, ...,  0.02030568,
        -0.75500738,  0.59946887],
       [ 0.96545045, -1.07984112,  0.40217517, ...,  0.00707608,
         0.90651045, -1.18553953],
       [-0.68544764,  1.85617335, -0.85144571, ..., -0.08535429,
         0.99543676, -1.41489815]])

In [8]:
#2-Regression and Algorithm Implementation
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor
from sklearn.svm import SVR

In [9]:
#initialising Models
models={"Linear Regression":LinearRegression(),"Decision Tree":DecisionTreeRegressor(random_state=42),"Random Forest":RandomForestRegressor(n_estimators=100, random_state=42),"Gradient Boosting":GradientBoostingRegressor(random_state=42),"SVR":SVR(kernel="rbf")}

In [10]:
models

{'Linear Regression': LinearRegression(),
 'Decision Tree': DecisionTreeRegressor(random_state=42),
 'Random Forest': RandomForestRegressor(random_state=42),
 'Gradient Boosting': GradientBoostingRegressor(random_state=42),
 'SVR': SVR()}

In [12]:
# Dictionary to store results
results ={}
#training model
for name,model in models.items():
    model.fit(X_train,y_train)

    y_pred=model.predict(X_test)

In [14]:
#evaluation metrics
mse=mean_squared_error(y_test,y_pred)
mae=mean_absolute_error(y_test,y_pred)
r2=r2_score(y_test,y_pred)

results[name]={"MSE":mse,"MAE":mae,"R2":r2}
results

{'SVR': {'MSE': 0.357004031933865,
  'MAE': 0.39859907695205365,
  'R2': 0.7275628923016773}}

In [15]:
#converting results to df
results_df=pd.DataFrame(results).T
results_df

Unnamed: 0,MSE,MAE,R2
SVR,0.357004,0.398599,0.727563


## Submission Guidelines

This assignment was implemented using Python in a Jupyter Notebook.
The complete code, along with comments and markdown explanations,
has been uploaded to GitHub. The GitHub repository link has been
submitted as per the guidelines.
