## Import required packages

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
fruits_data = pd.read_csv('fruits_weight_sphercity.csv')
fruits_data.head()

In [None]:
# Encode the labels and Color column
fruits_data['Color'] = fruits_data['Color'].replace(['Green', 'Greenish yellow', 'Orange', 'Red','Reddish yellow'],[20, 40, 60, 80, 100])  
fruits_data['labels'] = fruits_data['labels'].replace(['apple','orange'],[1, 0])

In [None]:
fruits_data.shape

## Take the data samples for training after the interval of  3

In [None]:
# Consider only 20 samples for Train Set
train = fruits_data[0:60:3] 
train

## Check the length of the dataset

In [None]:
print(len(fruits_data))
print(len(train))
print(type(train))

In [None]:
# Consider 5 samples for Test set after the interval of 10
test = fruits_data[1:50:10] 
test

In [None]:
print(len(test))

In [None]:
# Features of training data and testing data  
traindata = train.iloc[:, 1:4] 
testdata = test.iloc[:, 1:4]

In [None]:
traindata.head()

In [None]:
testdata.head()

In [None]:
traindata.shape, testdata.shape

## Apply KNN Classifier on the data

In [None]:
from sklearn.neighbors import KNeighborsClassifier
k = 3
neigh = KNeighborsClassifier(n_neighbors=k)

In [None]:
# Train or fit the model with the train data
neigh.fit(traindata, train.labels)

# Test the trained model
predictions = neigh.predict(testdata)

In [None]:
print(predictions, "predictions")
print(test.labels.values, "Actual_labels")

In [None]:
# Stack the test data with predictions (can be used for plotting)
predicted_data = np.column_stack((testdata.iloc[:,:2], predictions))

predicted_df = pd.DataFrame(predicted_data, columns = ['Weight','Sphericity', 'labels'])
predicted_df.head()

## Plot the train, test and predictions before scaling

In [None]:
import matplotlib.pyplot as plt
from mlxtend.plotting import category_scatter

def plotting(traindata, testdata, df_Pred):
    
  Oranges_train, Oranges_test = traindata[traindata.labels == 0], testdata[testdata.labels == 0] 
  Apples_train, Apples_test = traindata[traindata.labels == 1], testdata[testdata.labels == 1]

  Oranges_pred = df_Pred[df_Pred.iloc[:,2] == 0]
  Apples_pred = df_Pred[df_Pred.iloc[:,2] == 1]

  Oranges_train.shape , Apples_train.shape, Oranges_test.shape, Apples_test.shape, Oranges_pred.shape, Apples_pred.shape

  df1 = (pd.concat([Oranges_train, Oranges_test, Apples_train, Apples_test], axis=0, keys=('Oranges_train', 'Oranges_test', 'Apples_train', 'Apples_test'))
          .swaplevel(0,1, axis=0))
  df1 = df1.reset_index(level=1)
  df2 = (pd.concat([Oranges_train, Oranges_pred, Apples_train, Apples_pred], axis=0, keys=('Oranges_train', 'Oranges_pred', 'Apples_train','Apples_pred'))
          .swaplevel(0,1, axis=0))
  df2 = df2.reset_index(level=1)

  fig = category_scatter(x='Sphericity', y='Weight', label_col='level_1', 
                        data=df1, markers='*o*o', colors=('red', 'red', 'green', 'green'), markersize=50, legend_loc='upper left')

  fig = category_scatter(x='Sphericity', y='Weight', label_col='level_1', 
                        data=df2, markers='*o*o', colors=('red', 'red', 'green', 'green'), markersize=50, legend_loc='upper left')

In [None]:
plotting(traindata, testdata, predicted_df)

## Scaling the data

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [None]:
# Data Before Scaling
fruits_data.head()

In [None]:
fruits_data[['Sphericity', 'Weight']] = scaler.fit_transform(fruits_data[['Sphericity', 'Weight']])

In [None]:
# Data After Scaling 
fruits_data.head()

### Take the data samples for training after the interval of  3

In [None]:
train = fruits_data[0:60:3]
train.head()

In [None]:
test = fruits_data[1:50:10]
test

In [None]:
print(len(test))

### Apply KNN Classifier on the scaled data

In [None]:
from sklearn.neighbors import KNeighborsClassifier
k = 3
neigh = KNeighborsClassifier(n_neighbors=k)

In [None]:
# Convert dataframe into array
traindata = train.iloc[:,1:4] 
testdata = test.iloc[:,1:4]

In [None]:
# Train or fit the model with the train data
neigh.fit(traindata, train.labels)

# Test the trained model
scaled_predictions = neigh.predict(testdata)

In [None]:
print(scaled_predictions,"predictions") 
print(test.labels.values,"labels")

In [None]:
predicted_data = np.column_stack((testdata.iloc[:,:2], scaled_predictions))

df_Pred_scale = pd.DataFrame(predicted_data, columns = ['Weight','Sphericity', 'labels'])
df_Pred_scale.head()

### Plot the train and test points after scaling

In [None]:
plotting(traindata, testdata, df_Pred_scale)