<a href="https://colab.research.google.com/github/bammu27/ML/blob/main/knn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **K nearest Neighbor:-**

In [84]:
import numpy as np
import pandas as pd

def euclidean_distance(x, y):
    return np.sqrt(np.sum((x - y) ** 2))

def generate_data(n):
    return pd.DataFrame({
        'imdb': np.random.randint(3, 10, n),
        'no_vote': np.random.randint(1, 1000, n),
        'genre': np.random.choice(['Action', 'Comedy'], n)
    })

def calculate_distances(test, train):
    return np.sqrt(np.sum((np.array(test)[:, np.newaxis, :] - np.array(train)[np.newaxis, :, :]) ** 2, axis=2))

def get_k_neighbors_indices(distances, k):
    return np.argsort(distances, axis=1)[:, :k]

from collections import Counter

def find_mode(sublist):
    counts = Counter(sublist)
    mode_values = counts.most_common()

    if len(mode_values) == 1 or mode_values[0][1] > mode_values[1][1]:
        return mode_values[0][0], mode_values[0][1]
    else:
        return "No unique mode", 0

def main():
    # Example usage
    np.random.seed(42)  # for reproducibility
    test_input = generate_data(5)
    input_data = generate_data(10)
    target_train = generate_data(10)

    distances = calculate_distances(test_input[['imdb', 'no_vote']], input_data[['imdb', 'no_vote']])
    k_neighbors_indices = get_k_neighbors_indices(distances, k=3)

    genre_list = target_train.iloc[k_neighbors_indices.flatten()]['genre'].tolist()
    genre_list = np.array(genre_list).reshape(k_neighbors_indices.shape)

    print("Genre List:")
    print(genre_list)

    result = [find_mode(sublist) for sublist in genre_list]

    for i, (mode_genre, count) in enumerate(result, start=1):
        print(f"List {i}: Mode Genre: {mode_genre}, Count: {count}")


main()


Genre List:
[['Comedy' 'Action' 'Comedy']
 ['Comedy' 'Action' 'Comedy']
 ['Comedy' 'Action' 'Comedy']
 ['Comedy' 'Comedy' 'Action']
 ['Action' 'Comedy' 'Comedy']]
List 1: Mode Genre: Comedy, Count: 2
List 2: Mode Genre: Comedy, Count: 2
List 3: Mode Genre: Comedy, Count: 2
List 4: Mode Genre: Comedy, Count: 2
List 5: Mode Genre: Comedy, Count: 2
