In [4]:


import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

import torch
from torch import nn, optim
from torch.utils.data import DataLoader,Dataset
import torch.nn.functional as F

from sklearn.metrics import mean_squared_error

import matplotlib.pyplot as plt

In [9]:
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()

In [10]:
housing

{'data': array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
           37.88      , -122.23      ],
        [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
           37.86      , -122.22      ],
        [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
           37.85      , -122.24      ],
        ...,
        [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
           39.43      , -121.22      ],
        [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
           39.43      , -121.32      ],
        [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
           39.37      , -121.24      ]]),
 'target': array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894]),
 'frame': None,
 'target_names': ['MedHouseVal'],
 'feature_names': ['MedInc',
  'HouseAge',
  'AveRooms',
  'AveBedrms',
  'Population',
  'AveOccup',
  'Latitude',
  'Longitude'],
 'DESCR': '.. _california_housing_dataset:\n

In [11]:
df = pd.DataFrame(housing.data)

In [13]:
df.columns = housing.feature_names

In [15]:
df['price'] = housing.target

In [16]:
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,price
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 9 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   MedInc      20640 non-null  float64
 1   HouseAge    20640 non-null  float64
 2   AveRooms    20640 non-null  float64
 3   AveBedrms   20640 non-null  float64
 4   Population  20640 non-null  float64
 5   AveOccup    20640 non-null  float64
 6   Latitude    20640 non-null  float64
 7   Longitude   20640 non-null  float64
 8   price       20640 non-null  float64
dtypes: float64(9)
memory usage: 1.4 MB


In [19]:
X = df.drop(columns='price').to_numpy()

In [26]:
Y = df['price'].to_numpy().reshape((-1,1))

In [27]:
scaler = MinMaxScaler()
scaler.fit(X)
X = scaler.transform(X)

scaler.fit(Y)
Y = scaler.transform(Y)

In [28]:
X

array([[0.53966842, 0.78431373, 0.0435123 , ..., 0.00149943, 0.5674814 ,
        0.21115538],
       [0.53802706, 0.39215686, 0.03822395, ..., 0.00114074, 0.565356  ,
        0.21215139],
       [0.46602805, 1.        , 0.05275646, ..., 0.00169796, 0.5642933 ,
        0.21015936],
       ...,
       [0.08276438, 0.31372549, 0.03090386, ..., 0.0013144 , 0.73219979,
        0.31175299],
       [0.09429525, 0.33333333, 0.03178269, ..., 0.0011515 , 0.73219979,
        0.30179283],
       [0.13025338, 0.29411765, 0.03125246, ..., 0.00154886, 0.72582359,
        0.30976096]])

In [29]:
torch.FloatTensor(X)

tensor([[0.5397, 0.7843, 0.0435,  ..., 0.0015, 0.5675, 0.2112],
        [0.5380, 0.3922, 0.0382,  ..., 0.0011, 0.5654, 0.2122],
        [0.4660, 1.0000, 0.0528,  ..., 0.0017, 0.5643, 0.2102],
        ...,
        [0.0828, 0.3137, 0.0309,  ..., 0.0013, 0.7322, 0.3118],
        [0.0943, 0.3333, 0.0318,  ..., 0.0012, 0.7322, 0.3018],
        [0.1303, 0.2941, 0.0313,  ..., 0.0015, 0.7258, 0.3098]])

In [31]:
torch.FloatTensor(Y).shape

torch.Size([20640, 1])

In [35]:
class TensorData(Dataset):
  def __init__(self,x_data,y_data):
    self.x_data = torch.FloatTensor(x_data)
    self.y_data = torch.FloatTensor(y_data)
    self.len = self.y_data.shape[0]

  def __getitem__(self,index):
    return self.x_data[index],self.y_data[index]
  
  def __len__(self):
    return self.len

In [33]:
X_train,X_test,y_train,y_test = train_test_split(X,Y,test_size=0.2)

In [54]:
trainsets = TensorData(X_train,y_train)
trainloader = DataLoader(trainsets, batch_size=32, shuffle=True)

testsets = TensorData(X_test,y_test)
testloader = DataLoader(testsets, batch_size=32, shuffle=True)

In [59]:
class Regressor(nn.Module):
  def __init__(self):
    super().__init__()
    self.fc1 = nn.Linear(8,50,bias=True)
    self.fc2 = nn.Linear(50,30,bias=True)
    self.fc3 = nn.Linear(30,1,bias=True)
    self.dropout = nn.Dropout(0.2)

  def forward(self,x):
    x = F.relu(self.fc1(x))
    x = self.dropout(F.relu(self.fc2(x)))
    x = F.relu(self.fc3(x))

    return x


In [61]:
model = Regressor()
criterion  = nn.MSELoss()

optimizer = optim.Adam(model.parameters(),lr=0.001,weight_decay=1e-7)

In [66]:
loss_ = []
n = len(trainloader)
for epoch in range(400):
   running_loss = 0.0 
   for i, data in enumerate(trainloader,0):
    inputs, values = data
    optimizer.zero_grad()

    outputs = model(inputs)
    loss = criterion(outputs, values)
    loss.backward()
    optimizer.step()

    running_loss += loss.item()

   loss_.append(np.sqrt(running_loss)/n)


In [67]:
loss_

[0.00598754607787858,
 0.005950831848776041,
 0.005960145108937321,
 0.005953363074047841,
 0.005874961663163244,
 0.005891423383726226,
 0.005857331664756232,
 0.005853970731522007,
 0.005853615794613585,
 0.005851757245327792,
 0.005829010008230512,
 0.005819671417440282,
 0.005775655747796205,
 0.0058277720502462995,
 0.005751959039135149,
 0.005761542927599424,
 0.0057039914693390965,
 0.00567229666042104,
 0.005653994769670667,
 0.00567089704391406,
 0.005681235160403746,
 0.005663297867028175,
 0.005609671487524333,
 0.005620144621324494,
 0.00558219753586824,
 0.005576420047515311,
 0.005563417411883449,
 0.005537977045147508,
 0.005541439418215539,
 0.005516335436296699,
 0.005508165047278517,
 0.005532254128299024,
 0.005519582929510125,
 0.00554231342797595,
 0.005514902052358801,
 0.005519738508640026,
 0.005495166721063735,
 0.0055379065045047075,
 0.005498505062633887,
 0.005477223900206867,
 0.005481786339615228,
 0.005463913987678377,
 0.005475024852691905,
 0.0054539732

In [44]:
from sklearn.ensemble import RandomForestRegressor



In [45]:
rf = RandomForestRegressor()
rf.fit(X_train,y_train)
y_pred = rf.predict(X_test)

mse = mean_squared_error(y_test,y_pred)

  rf.fit(X_train,y_train)


In [46]:
mse

0.010712649406116548

In [68]:
np.sqrt(mse)

0.10350192948016258