In [None]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.linear_model import SGDRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import shuffle
import sklearn.metrics as metrics
import pandas as pd
import numpy as np

**Carregamento e processamento do dataset**

In [None]:
dataset = pd.read_csv("housing.csv")
print(dataset)

       longitude  latitude  ...  median_house_value  ocean_proximity
0        -122.23     37.88  ...            452600.0         NEAR BAY
1        -122.22     37.86  ...            358500.0         NEAR BAY
2        -122.24     37.85  ...            352100.0         NEAR BAY
3        -122.25     37.85  ...            341300.0         NEAR BAY
4        -122.25     37.85  ...            342200.0         NEAR BAY
...          ...       ...  ...                 ...              ...
20635    -121.09     39.48  ...             78100.0           INLAND
20636    -121.21     39.49  ...             77100.0           INLAND
20637    -121.22     39.43  ...             92300.0           INLAND
20638    -121.32     39.43  ...             84700.0           INLAND
20639    -121.24     39.37  ...             89400.0           INLAND

[20640 rows x 10 columns]


In [None]:
dataset = dataset.dropna()

In [None]:
dataset =  shuffle(dataset)

In [None]:
dataset["ocean_proximity"] = dataset["ocean_proximity"].astype('category')
dataset["ocean_proximity"] = dataset["ocean_proximity"].cat.codes
print(dataset.head(10))

       longitude  latitude  ...  median_house_value  ocean_proximity
17765    -121.86     37.35  ...            153300.0                0
20354    -118.97     34.18  ...            293100.0                0
48       -122.26     37.82  ...            187500.0                3
10460    -117.63     33.47  ...            256000.0                4
3536     -118.50     34.26  ...            237300.0                0
17609    -121.92     37.29  ...            344600.0                0
20358    -118.95     34.18  ...            254900.0                0
19585    -120.69     37.59  ...             81800.0                1
10648    -117.78     33.54  ...            450000.0                0
3936     -118.57     34.22  ...            255200.0                0

[10 rows x 10 columns]


In [None]:
colunas = list(dataset.columns)
culunas = colunas.pop(8)
for col in colunas:
  x = np.array(dataset[col]).reshape(-1,1)
  x_normalized = MinMaxScaler().fit_transform(x)
  dataset[col] = x_normalized

y = np.array(dataset['median_house_value']).reshape(-1,1)
normalizador_alvo = MinMaxScaler().fit(y)

y_normalized = normalizador_alvo.transform(y)
dataset['median_house_value'] = y_normalized

print(dataset.head(10))

       longitude  latitude  ...  median_house_value  ocean_proximity
17765   0.248008  0.511158  ...            0.285156             0.00
20354   0.535857  0.174283  ...            0.573402             0.00
48      0.208167  0.561105  ...            0.355671             0.75
10460   0.669323  0.098831  ...            0.496907             1.00
3536    0.582669  0.182784  ...            0.458351             0.00
17609   0.242032  0.504782  ...            0.679587             0.00
20358   0.537849  0.174283  ...            0.494639             0.00
19585   0.364542  0.536663  ...            0.137733             0.25
10648   0.654382  0.106270  ...            0.896906             0.00
3936    0.575697  0.178533  ...            0.495258             0.00

[10 rows x 10 columns]


In [None]:
X = np.array(dataset[['longitude','latitude','housing_median_age','total_rooms','total_bedrooms','population','households','median_income','ocean_proximity']])
Y = np.array(dataset['median_house_value']).reshape(-1,1)

In [None]:
porcentagem = 0.75

X_treino = X[:int(porcentagem*X.shape[0])]
Y_treino = Y[:int(porcentagem*Y.shape[0])]

X_teste = X[int(porcentagem*X.shape[0]):]
Y_teste = Y[int(porcentagem*Y.shape[0]):]

print(X_treino.shape)
print(Y_treino.shape)

print(X_teste.shape)
print(Y_teste.shape)

(15324, 9)
(15324, 1)
(5109, 9)
(5109, 1)


**Arvore de Decisão**

In [None]:
regressor = DecisionTreeRegressor(max_depth=7).fit(X_treino, Y_treino)

#test the model
y_pred = regressor.predict(X_teste)
score = regressor.score(X_teste,Y_teste)
ev = metrics.explained_variance_score(Y_teste,y_pred)
mae = metrics.mean_absolute_error(Y_teste,y_pred)
mse = metrics.mean_squared_error(Y_teste, y_pred)

print('Explained Variance: ')
print(ev)

print('Mean Absolute Error: ')
print(mae)

print('Mean Squared Error: ')
print(mse)

print('R2 Score:')
print(score)

Explained Variance: 
0.6825283794011004
Mean Absolute Error: 
0.09116416030507052
Mean Squared Error: 
0.01756369925759545
Median Absolute Error:
0.06256910642136987
R2 Score:
0.682526333210514


**MLP**

In [None]:
regressor = MLPRegressor().fit(X_treino, Y_treino.ravel())

y_pred = regressor.predict(X_teste)
score = regressor.score(X_teste,Y_teste)
ev = metrics.explained_variance_score(Y_teste,y_pred)
mae = metrics.mean_absolute_error(Y_teste,y_pred)
mse = metrics.mean_squared_error(Y_teste, y_pred)
print('Explained Variance: ')
print(ev)

print('Mean Absolute Error: ')
print(mae)

print('Mean Squared Error: ')
print(mse)

print('R2 Score:')
print(score)

Explained Variance: 
0.711338762896293
Mean Absolute Error: 
0.08740147552097946
Mean Squared Error: 
0.016692445071113938
Median Absolute Error:
0.05879547494333037
R2 Score:
0.709280356619804


**SVM**

In [None]:
regressor = SVR().fit(X_treino, Y_treino.ravel())

y_pred = regressor.predict(X_teste)
score = regressor.score(X_teste,Y_teste)
ev = metrics.explained_variance_score(Y_teste,y_pred)
mae = metrics.mean_absolute_error(Y_teste,y_pred)
mse = metrics.mean_squared_error(Y_teste, y_pred)

print('Explained Variance: ')
print(ev)

print('Mean Absolute Error: ')
print(mae)

print('Mean Squared Error: ')
print(mse)

print('R2 Score:')
print(score)

Explained Variance: 
0.6360884502835007
Mean Absolute Error: 
0.10011051544135602
Mean Squared Error: 
0.02017684010844289
Median Absolute Error:
0.07490288567332759
R2 Score:
0.6352923538768511


**Floresta Aleatória**

In [None]:
regressor = RandomForestRegressor(random_state=0, n_estimators=100).fit(X_treino, Y_treino.ravel())

y_pred = regressor.predict(X_teste)
score = regressor.score(X_teste,Y_teste)
ev = metrics.explained_variance_score(Y_teste,y_pred)
mae = metrics.mean_absolute_error(Y_teste,y_pred)
mse = metrics.mean_squared_error(Y_teste, y_pred)

print('Explained Variance: ')
print(ev)

print('Mean Absolute Error: ')
print(mae)

print('Mean Squared Error: ')
print(mse)

print('R2 Score:')
print(score)

Explained Variance: 
0.7967584757184143
Mean Absolute Error: 
0.0670186118254795
Mean Squared Error: 
0.011244086211359671
Median Absolute Error:
0.04096892383948941
R2 Score:
0.7967568661440312


**SGD**

In [None]:
regressor = SGDRegressor(max_iter=1000, loss='epsilon_insensitive').fit(X_treino, Y_treino.ravel())
y_pred = regressor.predict(X_teste)
score = regressor.score(X_teste,Y_teste)
ev = metrics.explained_variance_score(Y_teste,y_pred)
mae = metrics.mean_absolute_error(Y_teste,y_pred)
mse = metrics.mean_squared_error(Y_teste, y_pred)

print('Explained Variance: ')
print(ev)

print('Mean Absolute Error: ')
print(mae)

print('Mean Squared Error: ')
print(mse)

print('R2 Score:')
print(score)

Explained Variance: 
0.5592644197503023
Mean Absolute Error: 
0.1114569205239704
Mean Squared Error: 
0.024614319588672606
Median Absolute Error:
0.08390006590725335
R2 Score:
0.5550824356113468


**"Desnormalização" do atributo alvo**

In [None]:
Y_teste_nao_normalizado = normalizador_alvo.inverse_transform(Y_teste.reshape(-1, 1))
y_pred_nao_normalizado = normalizador_alvo.inverse_transform(y_pred.reshape(-1, 1))
print("Predição do atributo alvo:")
print(y_pred_nao_normalizado)
print("Valor real do atributo alvo:")
print(Y_teste_nao_normalizado)

Predição do atributo alvo:
[[ 55536.13809524]
 [248673.01      ]
 [105375.35952381]
 ...
 [341205.03769231]
 [155975.        ]
 [229421.45      ]]
Valor real do atributo alvo:
[[ 52200.]
 [442900.]
 [135700.]
 ...
 [301300.]
 [163600.]
 [260100.]]
