In [66]:
from sklearn.datasets import fetch_california_housing
import pandas as pd
import numpy as np
import seaborn as sns

In [67]:
dataset = fetch_california_housing()

In [68]:
print (dataset.DESCR)

.. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

    :Number of Instances: 20640

    :Number of Attributes: 8 numeric, predictive attributes and the target

    :Attribute Information:
        - MedInc        median income in block group
        - HouseAge      median house age in block group
        - AveRooms      average number of rooms per household
        - AveBedrms     average number of bedrooms per household
        - Population    block group population
        - AveOccup      average number of household members
        - Latitude      block group latitude
        - Longitude     block group longitude

    :Missing Attribute Values: None

This dataset was obtained from the StatLib repository.
https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html

The target variable is the median house value for California districts,
expressed in hundreds of thousands of dollars ($100,000).

This dataset was derived

In [69]:
print(dataset.feature_names)

['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']


In [70]:
df = pd.DataFrame(dataset.data, columns = dataset.feature_names)

In [71]:
df["Price"] = dataset.target

In [72]:
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,Price
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [73]:
df.isnull().sum()

MedInc        0
HouseAge      0
AveRooms      0
AveBedrms     0
Population    0
AveOccup      0
Latitude      0
Longitude     0
Price         0
dtype: int64

Linear Regression

In [74]:
## Divide the data into Independent and non- Independent

In [75]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [76]:
from sklearn.model_selection import train_test_split

In [77]:
X_train,X_test,y_train,y_test= train_test_split(X,y,test_size=0.33,random_state=42)

In [78]:
from sklearn.preprocessing import StandardScaler

In [79]:
scaler = StandardScaler()

In [80]:
X_train= scaler.fit_transform(X_train)

In [81]:
X_test = scaler.transform(X_test)

In [82]:
from sklearn.linear_model import LinearRegression, Ridge,Lasso,ElasticNet

In [83]:
regression = LinearRegression()

In [84]:
regression.fit(X_train,y_train)

LinearRegression()

In [85]:
regression.coef_

array([ 8.46603472e-01,  1.20333548e-01, -2.98800785e-01,  3.47243173e-01,
       -8.81413334e-05, -4.17242067e-02, -8.94420371e-01, -8.70401317e-01])

In [86]:
regression.intercept_

2.0666362048018536

In [87]:
#Prediction

y_pred= regression.predict(X_test)

In [88]:
from sklearn.metrics import mean_squared_error,mean_absolute_error

In [89]:
mse = mean_squared_error(y_test,y_pred)
print(mse)

0.5369686543372459


In [90]:
mae = mean_absolute_error(y_test,y_pred)
print(mae)

0.5295710106684453


In [91]:
rmse = np.sqrt(mse)
print(rmse)

0.7327814505957735


In [92]:
## Accuracy R2 and Adj R2

In [93]:
from sklearn.metrics import r2_score

In [94]:
score= r2_score(y_test,y_pred)
print(score)

0.5970494128783952


In [95]:
adjscore = 1-(1-score)*(len(y)-1)/(len(y)-X.shape[1]-1)

In [96]:
print(adjscore)

0.5968931623477872


Ridge Regression

In [97]:
ridge = Ridge()

In [98]:
ridge.fit(X_train,y_train)

Ridge()

In [99]:
y_pred1= ridge.predict(X_test)

In [113]:
mse1 = mean_squared_error(y_test,y_pred1)
print(mse1)

0.5369457054801822


In [114]:
mae1 = mean_absolute_error(y_test,y_pred1)
print(mae1)

0.529566870965708


In [115]:
rmse1 = np.sqrt(mse1)
print(rmse1)

0.7327657916962159


In [103]:
score1= r2_score(y_test,y_pred1)
print(score1)

0.5970666340985782


In [104]:
adjscore1 = 1-(1-score1)*(len(y)-1)/(len(y)-X.shape[1]-1)
print(adjscore1)

0.5969103902457735


Lasso Regression

In [105]:
lasso = Lasso()

In [106]:
lasso.fit(X_train,y_train)

Lasso()

In [107]:
y_pred2 = lasso.predict(X_test)

In [121]:
mse2 = mean_squared_error(y_test,y_pred2)
print(mse2)
mae2 = mean_absolute_error(y_test,y_pred2)
print(mae2)
rmse2 = np.sqrt(mse2)
print(rmse2)

1.3326257277946882
0.9126511897647483
1.15439409553007


Elastic Net Regression

In [117]:
elasticnet = ElasticNet()

In [118]:
elasticnet.fit(X_train,y_train)

ElasticNet()

In [119]:
y_pred3 = elasticnet.predict(X_test)

In [122]:
mse3 = mean_squared_error(y_test,y_pred3)
print(mse3)
mae3 = mean_absolute_error(y_test,y_pred3)
print(mae3)
rmse3 =np.sqrt(mse3)
print(rmse3)

1.061031401183927
0.8119183692654035
1.0300637850074756
