In [0]:
import numpy as np
import pandas as pd

from sklearn.datasets import fetch_california_housing
from sklearn.neural_network import MLPRegressor
from sklearn.cluster import KMeans
from sklearn.preprocessing import MinMaxScaler

import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

from xgboost import XGBRegressor

data = fetch_california_housing()
frame = pd.DataFrame(data.data, columns=data.feature_names)
frame['target'] = data.target

components = list(data.feature_names)
train_X, test_X, train_y, test_y = train_test_split(frame[components], frame.target, test_size=0.5, shuffle=True, random_state=0)

In [2]:
xgb = XGBRegressor(random_state=0).fit(train_X, train_y)
print("%.4f" % r2_score(test_y, xgb.predict(test_X).reshape(-1, 1)))

0.7808


In [3]:
scaler_x = MinMaxScaler((0, 1)).fit(train_X)
scaled_train_x = scaler_x.transform(train_X)
scaled_test_x = scaler_x.transform(test_X)

scaler_y = MinMaxScaler((0, 1)).fit(train_y.values.reshape(-1, 1))
scaled_train_y = scaler_y.transform(train_y.values.reshape(-1, 1)).ravel()

mlp = MLPRegressor([180] * 4, activation='relu', random_state=0).fit(scaled_train_x, scaled_train_y)
print("%.4f" % r2_score(test_y, scaler_y.inverse_transform(mlp.predict(scaled_test_x).reshape(-1, 1))))

0.7380


In [4]:
clusters = KMeans(2).fit(train_X)
centers_train = np.array([
    clusters.cluster_centers_[label] for label in clusters.labels_
])
centers_test = np.array([
    clusters.cluster_centers_[label] for label in clusters.predict(test_X)
])
for index, column in enumerate(centers_train.T):
  train_X[str(index)] = [
      np.linalg.norm(column[iterator] - train_X[components[index]].values[iterator]) for iterator in range(len(column))
  ]
  
for index, column in enumerate(centers_test.T):
  test_X[str(index)] = [
      np.linalg.norm(column[iterator] - test_X[components[index]].values[iterator]) for iterator in range(len(column))
  ]

scaler_x = MinMaxScaler((0, 1)).fit(train_X)
scaled_train_x = scaler_x.transform(train_X)
scaled_test_x = scaler_x.transform(test_X)

scaler_y = MinMaxScaler((0, 1)).fit(train_y.values.reshape(-1, 1))
scaled_train_y = scaler_y.transform(train_y.values.reshape(-1, 1)).ravel()

mlp = MLPRegressor([180] * 4, activation='relu', random_state=0).fit(scaled_train_x, scaled_train_y)
print("%.4f" % r2_score(test_y, scaler_y.inverse_transform(mlp.predict(scaled_test_x).reshape(-1, 1))))

0.7981
