<a href="https://colab.research.google.com/github/benasphy/KNN/blob/main/Movie%20Recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors # Fixed import: KNeighborsClassifier is for classification, NearestNeighbors is for finding similar items
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

ratings = pd.read_csv('/content/u.data', sep = '\t', names = ['user_id', 'movie_id', 'rating', 'timestamp'])
movies = pd.read_csv('/content/u.item', sep = '|', encoding = 'latin-1', names= ['movie_id', 'title'], usecols = [0,1])

data = pd.merge(ratings, movies, on = 'movie_id')

# Use pivot_table to handle duplicate entries by taking the mean rating
user_movie_matrix = data.pivot_table(index = 'user_id', columns = 'title', values = 'rating', aggfunc='mean') # Changed to pivot_table and added aggfunc

imputer = SimpleImputer(strategy = 'constant', fill_value = 0)
user_movie_matrix_imputed = imputer.fit_transform(user_movie_matrix)

scaler = StandardScaler()
user_movie_matrix_scaled = scaler.fit_transform(user_movie_matrix_imputed)

model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
model_knn.fit(user_movie_matrix_scaled.T)

def recommend_movie(movie_title, n_recommendations = 5):
  if movie_title not in user_movie_matrix.columns:
    print(f"Movie '{movie_title}' not found in the dataset.")
    return[]

  movie_idx = user_movie_matrix.columns.get_loc(movie_title)
  print(f"Finding Recommendation for {movie_title}...") # Fixed: movie.title to movie_title
  distances, indices = model_knn.kneighbors([user_movie_matrix_scaled[:, movie_idx]], n_neighbors = n_recommendations + 1)

  recommendations = []
  for i in range(1, len(distances.flatten())):
    idx = indices.flatten()[i]
    recommended_movie = user_movie_matrix.columns[idx]
    recommendations.append((recommended_movie, distances.flatten()[i]))

  return recommendations


movie_title = 'Toy Story (1995)'
recommendations = recommend_movie(movie_title)

print("Recommeded Movies:")
for movie, dist in recommendations:
  print(f"{movie}, Distance: {dist:.4f}")

Finding Recommendation for Toy Story (1995)...
Recommeded Movies:
Star Wars (1977), Distance: 0.5423
Independence Day (ID4) (1996), Distance: 0.5455
Rock, The (1996), Distance: 0.5682
Willy Wonka and the Chocolate Factory (1971), Distance: 0.5760
Return of the Jedi (1983), Distance: 0.5770
