In [34]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import NearestNeighbors
from sklearn.feature_extraction.text import CountVectorizer

# Load the dataset
file_path = 'final_movies_.csv'  # Replace with your dataset file path
movies = pd.read_csv(file_path)

# Features to use
numerical_features = ["popularity", "runtime"]
categorical_features = ["genres", "original_language"]

### ** Scale Numerical Features**
scaler = MinMaxScaler()
numerical_scaled = scaler.fit_transform(movies[numerical_features])

### **Use CountVectorizer for 'genres'**
# Encode the 'genres' column using CountVectorizer
vectorizer = CountVectorizer()
genres_encoded = vectorizer.fit_transform(movies['genres'])

# Convert genres to DataFrame for better visibility
genres_df = pd.DataFrame(genres_encoded.toarray(), columns=vectorizer.get_feature_names_out())
#print(genres_df)

### ** One-Hot Encode 'original_language'**
# Encode 'original_language' using OneHotEncoding
onehot_encoded_language = pd.get_dummies(movies['original_language'], prefix='lang')
#print(onehot_encoded_language)

### **Combine Numerical, Genres, and Language Encoded Features**
features_combined = pd.concat(
    [pd.DataFrame(numerical_scaled, columns=numerical_features), genres_df, onehot_encoded_language],
    axis=1
).values
#print(features_combined)

### **5️Train the KNN Model**
knn = NearestNeighbors(metric='cosine')
knn.fit(features_combined)

### **6️ Function to Recommend Movies**
def recommend_movies_knn(movie_title, n_recommendations=5):
    try:
        # Find the index of the movie
        movie_idx = movies[movies['title'].str.lower() == movie_title.lower()].index[0]
        
        # Get the feature vector for the movie
        movie_vector = features_combined[movie_idx].reshape(1, -1)
        #print(movie_vector)
        
        # Find the nearest neighbors
        distances, indices = knn.kneighbors(movie_vector, n_neighbors=n_recommendations + 1)
        #print(distances)
        #print(indices)
        
        # Get the titles of the recommended movies (exclude the movie itself)
        recommended_movies = movies.iloc[indices[0][1:]]['title'].tolist()
        return recommended_movies
    except IndexError:
        return f"Movie '{movie_title}' not found in the dataset."


movie_to_search = "X-Men"  # Replace with a valid movie title
recommended_movies = recommend_movies_knn(movie_to_search)

### ** Output Recommendations**
print(f"Movies similar to '{movie_to_search}':")
print(recommended_movies)


Movies similar to 'X-Men':
["The Secret of the Incas' Empire", 'Clash of the Warlords', 'Miles', 'Mount Adams', 'Space Fury']
