In [7]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.neighbors import NearestNeighbors

# Load data into a pandas DataFrame with NaN values
data = pd.read_csv('anime.csv')

# Introduce NaN values in the 'rating' column
data.loc[data['anime_id'] == 28977, 'rating'] = None
data.loc[data['anime_id'] == 820, 'rating'] = None

# Handle missing values using SimpleImputer
imputer = SimpleImputer(strategy='mean')
data['rating'] = imputer.fit_transform(data[['rating']])

# Extract features for collaborative filtering
X = data[['anime_id', 'rating', 'name']]  # Include 'name' column

# Build and train the Nearest Neighbors model
knn_model = NearestNeighbors(n_neighbors=3, metric='cosine')
knn_model.fit(X[['anime_id', 'rating']])  # Exclude 'name' column from model training

# Make recommendations for a given anime (anime_id = 32281 for example)
anime_id_to_recommend = 32281
anime_to_recommend = data[data['anime_id'] == anime_id_to_recommend][['anime_id', 'rating', 'name']]  # Include 'name' column

# Find the nearest neighbors
distances, indices = knn_model.kneighbors(anime_to_recommend[['anime_id', 'rating']], n_neighbors=5)

# Print recommended anime
print("Top 5 recommended animes for", anime_to_recommend['name'].values[0])
for i, idx in enumerate(indices.flatten()):
    recommended_anime = data.iloc[idx]
    print(f"{recommended_anime['name']} (Anime ID: {recommended_anime['anime_id']}), Similarity: {1 - distances.flatten()[i]}")


Top 5 recommended animes for Kimi no Na wa.
Shounen Ninja Kaze no Fujimaru: Nazo no Arabiya Ningyou (Anime ID: 19983), Similarity: 1.0
Kimi no Na wa. (Anime ID: 32281), Similarity: 1.0
Sekai Seifuku: Bouryaku no Zvezda - Shin Zvezda Daisakusen (Anime ID: 23459), Similarity: 0.9999999999999996
Cross Ange: Tenshi to Ryuu no Rondo (Anime ID: 25731), Similarity: 0.9999999999999989
Ore no Imouto ga Konnani Kawaii Wake ga Nai.: Ore no Imouto ga Konnani Eroge na Wake Nai (Anime ID: 23363), Similarity: 0.9999999999999982
