<a href="https://colab.research.google.com/github/akkirajubhavana/Machine-Learning-/blob/main/KNN_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [58]:
#imports
import numpy as np
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

In [118]:
class KNN:
  def __init__(self, k,distance_metric,p):
    self.k = k
    self.distance_metric = distance_metric
    self.p = p


  def fit(self, X_train,y_train):
    self.X_train = X_train
    self.y_train = y_train

  def euclidean_distances(self,x_test):
    euc_distance = np.sqrt(np.sum((self.X_train - x_test)**2,axis =1))
    return euc_distance

  def manhattan_distance(self,x_test):
       man_distance = np.sum(np.abs(self.X_train -x_test),axis =1)
       return man_distance

  def cosine_distance(self,x_test):
   num = np.dot(self.X_train,x_test)
   xtrain_norm = np.sqrt(np.sum(self.X_train**2,axis = 1))
   xtest_norm = np.sqrt(np.sum(x_test**2))
   cos_sim = num / (xtrain_norm * xtest_norm)
   cos_sim = np.clip(cos_sim, -1.0, 1.0)
   cos_distance = 1 - cos_sim

   return cos_distance

  def minikowski_distance(self,x_test):
    if self.p is None:
        raise ValueError("Parameter 'p' must be provided for Minikowski distance.")

    min_distance = np.power(np.sum(np.power(np.abs(self.X_train - x_test), self.p), axis=1), 1/self.p)
    return min_distance

  def distance(self,x_test):
    if self.distance_metric == "euclidean":
      distances = self.euclidean_distances(x_test)
      return distances
    if self.distance_metric == "manhattan":
      distances = self.manhattan_distance(x_test)
      return distances
    if self.distance_metric == "cosine":
      distances = self.cosine_distance(x_test)
      return distances
    if self.distance_metric == "minikowski":
      distances = self.minikowski_distance(x_test)
      return distances
    else:
            print(f"Invalid distance metric: {self.distance_metric}")
            raise ValueError("Invalid distance metric")
    #return distances
  def __repr__(self):
        return f"KNN(k={self.k}, distance_metric={self.distance_metric})"

  def predict(self, X_test):
        predictions = [self._predict(x) for x in X_test]
        return np.array(predictions)

  def _predict(self, x):
        distances = self.distance(x)
        k_neighbors_indices = np.argsort(distances)[:self.k]
        k_neighbor_labels = [self.y_train[i] for i in k_neighbors_indices]
        most_common = np.bincount(k_neighbor_labels).argmax()
        return most_common



In [119]:
def k_fold_cross_validation(X, y, distance_metric, k_values, k_fold=5,p=None):
    skf = StratifiedKFold(n_splits=k_fold, shuffle=True, random_state=42)
    best_k = None
    best_accuracy = 0.0

    for k in k_values:
            accuracies = []

            for train_index, test_index in skf.split(X, y):
                X_train, X_test = X[train_index], X[test_index]
                y_train, y_test = y[train_index], y[test_index]

                knn = KNN(k=k, distance_metric=distance_metric,p= p)
                #print(knn)
                knn.fit(X_train, y_train)
                y_pred = knn.predict(X_test)

                accuracy = accuracy_score(y_test, y_pred)
                accuracies.append(accuracy)

            mean_accuracy = np.mean(accuracies)

            if mean_accuracy > best_accuracy:
                best_accuracy = mean_accuracy
                best_k = k

    return best_k

In [124]:
# Load the Iris dataset
from sklearn.datasets import load_iris
iris = load_iris()
X, y = iris.data, iris.target

# Take user input for distance_metric, k_fold
distance_metric = input("Enter distance metric (euclidean, manhattan, cosine, minikowski): ")
k_fold = int(input("Enter k_fold value: "))

p_value = float(input("Enter the value of p for Minikowski distance (default is None): ") or None) if distance_metric == 'minikowski' else None
# Define a range of k values to try
initial_k = int(np.sqrt(len(X)))
k_values = range(initial_k, initial_k + 10)

if distance_metric == "minikowski":
    optimal_k = k_fold_cross_validation(X, y, distance_metric, k_values, k_fold, p=p_value)
else:
    optimal_k = k_fold_cross_validation(X, y, distance_metric, k_values, k_fold)

print(f"Optimal k value: {optimal_k}")

# Use the optimal k for the KNN model
knn = KNN(k=optimal_k, distance_metric=distance_metric,p=p_value)
knn.fit(X, y)

# Get accuracy using the entire dataset
y_pred = knn.predict(X)
accuracy = accuracy_score(y, y_pred)
print(f"Accuracy on the entire dataset: {accuracy * 100:.2f}%")

Enter distance metric (euclidean, manhattan, cosine, minikowski): euclidean
Enter k_fold value: 10
Optimal k value: 17
Accuracy on the entire dataset: 98.00%


**Conclusion** : For **Iris Datas**et, Euclidean distance,cosine, minikowski is giving best accuracy of 98.00% compared to others


---



```
 Enter distance metric (euclidean, manhattan, cosine, minikowski): euclidean
 Enter k_fold value: 6
 Optimal k value: 17
 Accuracy on the entire dataset: 98.00%
```



---



```
 Enter distance metric (euclidean, manhattan, cosine, minikowski): manhattan
  Enter k_fold value: 6
  Optimal k value: 13
  Accuracy on the entire dataset: 96.67%
```


---



```
Enter distance metric (euclidean, manhattan, cosine, minikowski): cosine
Enter k_fold value: 6
Accuracy on the entire dataset:98.00%
Optimal k value: 19
```



---



```

Enter distance metric (euclidean, manhattan, cosine, minikowski): minikowski
Enter k_fold value: 2
Enter the value of p for Minikowski distance (default is None): 2
Optimal k value: 12
Accuracy on the entire dataset: 98.00%

```



---






```

Enter distance metric (euclidean, manhattan, cosine, minikowski): minikowski
Enter k_fold value: 6
Enter the value of p for Minikowski distance (default is None): 5
Optimal k value: 17
Accuracy on the entire dataset: 98.00%

```






---




