<a href="https://colab.research.google.com/github/anish-t/Intro_to_ML/blob/main/002100_Radius_Nearest_Neighbors_Classifier_from_scratch_and_sklearn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Radius Neighbors Classifier first stores the training examples. During prediction, when it encounters a new instance ( or test example) to predict, it finds the number of neighbors from training instances within a fixed radius of r center at test instance, where r is a floating-point value specified by the user.  Then assigns the most common class among the training instances within that radius to the test instance.


In [1]:
# Importing libraries
 
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import RadiusNeighborsRegressor

In [2]:
# Radius Nearest Neighbors Regression from scratch
 
class Radius_Nearest_Neighbors_Regression() :
     
    def __init__( self, r ) :
         
        self.r = r
         
    # Function to store training set
         
    def fit( self, X_train, Y_train ) :
         
        self.X_train = X_train
         
        self.Y_train = Y_train
         
        # no_of_training_examples, no_of_features
         
        self.m, self.n = X_train.shape
     
    # Function for prediction
         
    def predict( self, X_test ) :
         
        self.X_test = X_test
         
        # no_of_test_examples, no_of_features
         
        self.m_test, self.n = X_test.shape
         
        # initialize Y_predict
         
        Y_predict = np.zeros( self.m_test )
         
        for i in range( self.m_test ) :
             
            x = self.X_test[i]
             
            # find the number of neighbors within a fixed
            # radius r of current training example
             
            neighbors = self.find_neighbors( x )
             
            # mean of the neighbors in the circle drawn by
            # current training example of fixed radius r
             
            Y_predict[i] = np.mean( neighbors )
             
        return Y_predict
     
    # Function to find the number of neighbors within a fixed
    # radius r of current training example
           
    def find_neighbors( self, x ) :
         
        # list to store training examples which will fall in the circle
         
        inside = []
         
        for i in range( self.m ) :
             
            d = self.euclidean( x, self.X_train[i] )
             
            if d <= self.r :
                 
                inside.append( self.Y_train[i] )
 
        inside_array = np.array( inside )
                 
        return inside_array
     
    # Function to calculate euclidean distance
             
    def euclidean( self, x, x_train ) :
         
        return np.sqrt( np.sum( np.square( x - x_train ) ) )
       

In [3]:
 # Importing dataset

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

!kaggle datasets download -d varungitboi/employee-salary-dataset

!unzip employee-salary-dataset.zip

Downloading employee-salary-dataset.zip to /content
  0% 0.00/8.39k [00:00<?, ?B/s]
100% 8.39k/8.39k [00:00<00:00, 7.65MB/s]
Archive:  employee-salary-dataset.zip
  inflating: employee_data.csv       


In [8]:

df = pd.read_csv( "employee_data.csv" )

X = df.iloc[:,3:6].values
Y = df.iloc[:,-1].values

# df

In [13]:
# Splitting dataset into train and test set

X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size = 1/3, random_state = 0 )
  
# Model training
  
model = Radius_Nearest_Neighbors_Regression( r = 5 )
  
model.fit( X_train, Y_train )
  
model1 = RadiusNeighborsRegressor( radius = 5 )

model1.fit( X_train, Y_train )


RadiusNeighborsRegressor(radius=5)

In [12]:
# Prediction on test set
  
Y_pred = model.predict( X_test )

Y_pred1 = model1.predict( X_test )
  
print( "Real values                         : ", Y_test[:3] )
  
print( "Predicted values by our model       : ", np.round( Y_pred[:3], 2 ) )
  
print( "Predicted values by sklearn model   : ", np.round( Y_pred1[:3], 2 ) )

Real values                         :  [2297  662 2056]
Predicted values by our model       :  [2177.27 1738.81 1272.44]
Predicted values by sklearn model   :  [2177.27 1738.81 1272.44]
