In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.neighbors import NearestNeighbors

In [2]:
class House:
    def __init__(self):
        self.housing = pd.read_csv('housing.csv',sep=',',decimal='.',encoding='utf-8')
        self.housing_encoded = self.encode_fit_transform(self.housing)
        
        self.findKneighbors_fit(self.housing_encoded)
        
    def encode_fit_transform(self, database):
        
        #one hot encoder for ocean_proximity
        self.onehot = OneHotEncoder(sparse=False, categories= 'auto')
        ocean_labels=database.ocean_proximity.values.reshape(-1, 1)
        onehot_ocean_labels = self.onehot.fit_transform(ocean_labels)
        
        #concatenate onehot encoded values to dataframe
        dfOneHot = pd.DataFrame(onehot_ocean_labels,
                                columns = ["Ocean_"+str(int(i)) for i in range(onehot_ocean_labels.shape[1])])
        encoded_database = pd.concat([database, dfOneHot], axis=1)
        
        #clean dataframe
        encoded_database.drop('ocean_proximity', axis = 1, inplace = True) #drop ocean_proximity original column
        encoded_database.dropna(axis=0, how='any',inplace = True) #drop NaN
        
        #dataframe columns
        self.columns = encoded_database.columns.tolist()
        
        #normilize values all columns
        self.scaler = MinMaxScaler()
        encoded_database[self.columns] = self.scaler.fit_transform(encoded_database[self.columns])
        
        return encoded_database
    
    def findKneighbors_fit(self, database):
        self.nNeighbors = NearestNeighbors(n_neighbors=2, algorithm='ball_tree').fit(database)        
    
    def transform(self, sample):
        columns_ = ['longitude', 'latitude', 'housing_median_age', 'total_rooms',
                    'total_bedrooms', 'population', 'households', 'median_income',
                    'median_house_value', 'ocean_proximity']

        #transform list to dataframe
        sample_df = pd.DataFrame(columns = columns_)
        sample_df.loc[0] = sample
        
        #transform
        #onehot encoder
        ocean_labels=sample_df.ocean_proximity.values.reshape(-1, 1)
        onehot_ocean_labels = self.onehot.transform(ocean_labels)
        
        #concatenate onehot encoded values to dataframe
        dfOneHot = pd.DataFrame(onehot_ocean_labels,
                                columns = ["Ocean_"+str(int(i)) for i in range(onehot_ocean_labels.shape[1])])
        encoded_sample = pd.concat([sample_df, dfOneHot], axis=1)
        
        encoded_sample.drop('ocean_proximity', axis = 1, inplace = True) #drop ocean_proximity original column
        
        
        encoded_sample[self.columns] = self.scaler.transform(encoded_sample[self.columns]).astype(float)
        
        return encoded_sample
    
    def findKneighbors(self,sample, n_kneighbors = 5):
        self.encoded_sample = self.transform(sample)
        dist, ind = self.nNeighbors.kneighbors(self.encoded_sample, n_kneighbors)
        return self.housing.loc[ind[0].tolist()]
        

In [3]:
house1 = House()

In [5]:
datatable1 = house1.findKneighbors([-120.24,
 32.85,
 54.0,
 1467.0,
 190.0,
 496.0,
 177.0,
 7.2574,
 352100.0,
 'NEAR BAY'], 10)

In [10]:
datatable1['total_rooms'] / datatable1['population']

16972    2.067485
156      2.607555
158      3.205011
141      2.921279
2        2.957661
15868    1.155675
154      2.866797
15850    1.559322
124      2.899001
401      2.317021
dtype: float64

In [9]:
datatable1

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,ocean_proximity
16972,-122.31,37.56,45.0,1685.0,321.0,815.0,314.0,4.2955,309700.0,NEAR OCEAN
156,-122.24,37.81,52.0,2485.0,313.0,953.0,327.0,6.8591,352400.0,NEAR BAY
158,-122.23,37.81,52.0,2814.0,365.0,878.0,352.0,7.508,348700.0,NEAR BAY
141,-122.21,37.82,52.0,2375.0,333.0,813.0,350.0,7.0549,331400.0,NEAR BAY
2,-122.24,37.85,52.0,1467.0,190.0,496.0,177.0,7.2574,352100.0,NEAR BAY
15868,-122.41,37.76,52.0,3452.0,784.0,2987.0,753.0,2.8135,260300.0,NEAR BAY
154,-122.22,37.81,52.0,2927.0,402.0,1021.0,380.0,8.1564,390100.0,NEAR BAY
15850,-122.44,37.74,23.0,184.0,44.0,118.0,40.0,4.5375,350000.0,NEAR BAY
124,-122.24,37.85,52.0,2612.0,365.0,901.0,367.0,7.2354,391100.0,NEAR BAY
401,-122.29,37.89,52.0,2178.0,421.0,940.0,423.0,5.0551,232200.0,NEAR BAY
